goer@sophist.uchicago.EDU (Richard Goerwitz) (02/28/90)
Having received a number of requests for this thing from a number of places, it seemed prudent to clean it up, comment it, fix the bugs, and repost. I had no idea that anyone would really _use_ it. #------------------------------------------------------------------- # # MAPTARFILE # # Map 15+ char. filenames in a tar archive to 14 chars. # Handles both the header blocks and the source itself. # Obviates the need for renaming files and directories # by hand, and for altering source and docs to refer to # the new file and directory names. # # Richard L. Goerwitz, III # # Last modified 2/27/90 # #------------------------------------------------------------------- global filenametbl, chunkset, short_chunkset # see procedure mappiece(s) record hblock(name,junk,size,mtime,chksum, linkflag,linkname,therest) # see readtarhdr(s) procedure main(a) usage := "usage: maptarfile inputfile # output goes to stdout" 0 < *a < 2 | stop("Bad arg count.\n",usage) intext := open(a[1],"r") | stop("maptarfile: can't open ",a[1]) # Run through all the headers in the input file, filling # (global) filenametbl with the names of overlong files; # make_table_of_filenames fails if there are no such files. make_table_of_filenames(intext) | stop("maptarfile: no overlong path names to map") # Now that a table of overlong filenames exists, go back # through the text, remapping all occurrences of these names # to new, 14-char values; also, reset header checksums, and # reformat text into correctly padded 512-byte blocks. Ter- # minate output with 512 nulls. seek(intext,1) every writes(output_mapped_headers_and_texts(intext)) close(intext) write_report() # Record mapped file and dir names for future ref. exit(0) end procedure make_table_of_filenames(intext) local header # chunkset is global # search headers for overlong filenames; for now # ignore everything else while header := readtarhdr(reads(intext,512)) do { # tab upto the next header block tab_nxt_hdr(intext,trim_str(header.size),1) # record overlong filenames in several global tables, sets fixpath(trim_str(header.name)) } *chunkset = 0 & fail return &null end procedure output_mapped_headers_and_texts(intext) # Remember that filenametbl, chunkset, and short_chunkset # (which are used by various procedures below) are global. local header, newtext, full_block, block, lastblock # Read in headers, one at a time. while header := readtarhdr(reads(intext,512)) do { # Replace overlong filenames with shorter ones, according to # the conversions specified in the global hash table filenametbl # (which were generated by fixpath() on the first pass). header.name := left(map_filenams(header.name),100,"\x00") header.linkname := left(map_filenams(header.linkname),100,"\x00") # Use header.size field to determine the size of the subsequent text. # Read in the text as one string. Map overlong filenames found in it # to shorter names as specified in the global hash table filenamtbl. newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size))) # Now, find the length of newtext, and insert it into the size field. header.size := right(exbase10(*newtext,8) || " ",12," ") # Calculate the checksum of the newly retouched header. header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ") # Finally, join all the header fields into a new block and write it out full_block := ""; every full_block ||:= !header suspend left(full_block,512,"\x00") # Now we're ready to write out the text, padding the final block # out to an even 512 bytes if necessary; the next header must start # right at the beginning of a 512-byte block. newtext ? { while block := move(512) do suspend block pos(0) & next lastblock := left(tab(0),512,"\x00") suspend lastblock } } # Write out a final null-filled block. Some tar programs will write # out 1024 nulls at the end. Dunno why. return repl("\x00",512) end procedure trim_str(s) # Knock out spaces, nulls from those crazy tar header # block fields (some of which end in a space and a null, # some just a space, and some just a null [anyone know # why?]). return s ? { (tab(many(' ')) | &null) & trim(tab(find("\x00")|0)) } \ 1 end procedure tab_nxt_hdr(f,size_str,firstpass) # Tab upto the next header block. Return the bypassed text # as a string (this value is not always used). local hs, next_header_offset hs := integer("8r" || size_str) next_header_offset := (hs / 512) * 512 hs % 512 ~= 0 & next_header_offset +:= 512 if 0 = next_header_offset then return "" else { # if this is pass no. 1 don't bother returning a value; we're # just collecting long filenames; if \firstpass then { seek(f,where(f)+next_header_offset) return } else { return reads(f,next_header_offset)[1:hs+1] | stop("maptarfile: error reading in ", string(next_header_offset)," bytes.") } } end procedure fixpath(s) # Fixpath is a misnomer of sorts, since it is used on # the first pass only, and merely examines each filename # in a path, using the procedure mappiece to record any # overlong ones in the global table filenametbl and in # the global sets chunkset and short_chunkset; no fixing # is actually done here. s2 := "" s ? { while piece := tab(find("/")+1) do s2 ||:= mappiece(piece) s2 ||:= mappiece(tab(0)) } return s2 end procedure mappiece(s) # Check s (the name of a file or dir as recorded in the tar header # being examined) to see if it is over 14 chars long. If so, # generate a unique 14-char version of the name, and store # both values in the global hashtable filenametbl. Also store # the original (overlong) file name in chunkset. Store the # first fifteen chars of the original file name in short_chunkset. # Sorry about all of the tables and sets. It actually makes for # a reasonably efficient program. Doing away with both sets, # while possible, causes a tenfold drop in execution speed! # global filenametbl, chunkset, short_chunkset local j, ending initial { filenametbl := table() chunkset := set() short_chunkset := set() } chunk := trim(s,'/') if *chunk > 14 then { i := 0 repeat { # if the file has already been looked at, continue if \filenametbl[chunk] then next # else find a new unique 14-character name for it # preserve important suffixes like ".Z," ".c," etc. if chunk ? (tab(find(".")), ending := move(1) || tab(any(&ascii)), pos(0)) then lchunk := chunk[1:11] || right(string(i+:=1),2,"0") || ending else lchunk := chunk[1:12] || right(string(i+:=1),3,"0") if lchunk == !filenametbl then next else break } # record filename in various global sets and tables filenametbl[chunk] := lchunk insert(chunkset,chunk) insert(short_chunkset,chunk[1:16]) } else lchunk := chunk lchunk ||:= (s[-1] == "/") return lchunk end procedure readtarhdr(s) # Read the silly tar header into a record. Note that, as was # complained about above, some of the fields end in a null, some # in a space, and some in a space and a null. The procedure # trim_str() may (and in fact often _is_) used to remove this # extra garbage. this_block := hblock() s ? { this_block.name := move(100) # <- to be looked at later this_block.junk := move(8+8+8) # skip the permissions, uid, etc. this_block.size := move(12) # <- to be looked at later this_block.mtime := move(12) this_block.chksum := move(8) # <- to be looked at later this_block.linkflag := move(1) this_block.linkname := move(100) # <- to be looked at later this_block.therest := tab(0) } integer(this_block.size) | fail # If it's not an integer, we've hit # the final (null-filled) block. return this_block end procedure map_filenams(s) # Chunkset is global, and contains all the overlong filenames # found in the first pass through the input file; here the aim # is to map these filenames to the shortened variants as stored # in filenametbl (GLOBAL). local s2 s2 := "" s ? { until pos(0) do { # first narrow the possibilities, using short_chunkset if member(short_chunkset,&subject[&pos:&pos+15]) # then try to map from a long to a shorter 14-char filename then s2 ||:= (filenametbl[=!chunkset] | move(1)) else s2 ||:= move(1) } } return s2 end # From the IPL. Thanks, Ralph - # Author: Ralph E. Griswold # Date: June 10, 1988 # exbase10(i,j) convert base-10 integer i to base j # The maximum base allowed is 36. procedure exbase10(i,j) static digits local s, d, sign initial digits := &digits || &lcase if i = 0 then return 0 if i < 0 then { sign := "-" i := -i } else sign := "" s := "" while i > 0 do { d := i % j if d > 9 then d := digits[d + 1] s := d || s i /:= j } return sign || s end # end IPL material procedure get_checksum(r) # Calculates the new value of the checksum field for the # current header block. Note that the specification say # that, when calculating this value, the chksum field must # be blank-filled. sum := 0 r.chksum := " " every field := !r do every sum +:= ord(!field) return sum end procedure write_report() # This procedure writes out a list of filenames which were # remapped (because they exceeded the SysV 14-char limit), # and then notifies the user of the existence of this file. local outtext, stbl, i (outtext := open(fname := "mapping.report","w")) | open(fname := "/tmp/mapping.report","w") | stop("maptarfile: Can't find a place to put mapping.report!") stbl := sort(filenametbl,3) every i := 1 to *stbl -1 by 2 do { write(outtext,left(stbl[i],35," ")," ",stbl[i+1]) } write(&errout,"maptarfile: ",fname," contains the list of changes") close(outtext) return &null end