goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (02/23/90)
Recently I had occasion to install a number of BSD archives on my home (SysV) machine, and I got fed up with having to rename all the directories, and altering all the source to recognize the new and shorter names. It seemed better to create a filter that would take a tar archive and map everything all at once. I started writing the program in C, but I soon realized that doing the job in C would take a couple of evenings. The Icon program took just part of one evening. It's not meant to be pretty, but since it is probably something others would find useful, I'm posting it. It works here at my site. Naturally, I don't guarantee that it will work anywhere else. ---------------------------------------------------------------------- global filenametbl, chunkset, short_chunkset # see procedure mappiece(s) record hblock(name,junk,size,mtime,chksum, linkflag,linkname,therest) # see readtarhdr(s) procedure main(a) usage := "usage: maptarfile inputfile # output goes to stdout" 0 < *a < 2 | stop("Bad arg count.\n",usage) intext := open(a[1],"r") | stop("maptarfile: can't open ",a[1]) # run through all the headers in the input file, filling # (global) filenametbl with the names of overlong files; # make_table_of_filenames fails if there are no such files make_table_of_filenames(intext) | stop("maptarfile: no overlong path names to map") # now that a table of overlong filenames exists, go back # through the text, remapping all occurrences of these names # to new, 14-char values; also, reset header checksums, and # reformat text into correctly padded 512-byte blocks seek(intext,1) output_mapped_headers_and_texts(intext) | stop("maptarfile: error reformatting text") close(intext) write_report() exit(0) end procedure make_table_of_filenames(intext) # global chunkset (set of overlong filenames) local header # read headers for overlong filenames; for now # ignore everything else while header := readtarhdr(reads(intext,512)) do { tab_nxt_hdr(intext,trim_str(header.size)) fixpath(trim_str(header.name)) } *chunkset = 0 & fail return &null end procedure output_mapped_headers_and_texts(intext) # remember that filenametbl, chunkset, and short_chunkset # (which are used by various procedures below) are GLOBAL local header, newtext, full_block # read in headers, one at a time while header := readtarhdr(reads(intext,512)) do { # replace overlong filenames with shorter ones, according to # the conversions specified in the global hash table filenametbl header.name := left(map_filenams(header.name),100,"\x00") header.linkname := left(map_filenams(header.linkname),100,"\x00") # use header.size field to read in and map the subsequent text newtext := trim( map_filenams(tab_nxt_hdr(intext,trim_str(header.size))),'\x00' ) # now, find the length of newtext, and insert it into the size field header.size := right(exbase10(*newtext,8) || " ",12," ") # calculate the checksum of the newly retouched header header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ") # finally, join all the header fields into a new block and write it out full_block := ""; every full_block ||:= !header writes(left(full_block,512,"\x00")) # now we're ready to write out the text, padding the final block # out to an even 512 bytes if necessary; the next header must start # right at the beginning of a 512 byte block newtext ? { while writes(move(512)) if not pos(0) then writes(left(tab(0),512,"\x00")) | fail } } writes(repl("\x00",512)) return &null end procedure trim_str(s) # knock out spaces, nulls return s ? { (tab(many(' ')) | &null) & trim(tab(find("\x00")|0)) } \ 1 end procedure tab_nxt_hdr(f,size_str) hs := integer("8r" || size_str) next_header_offset := (hs / 512) * 512 hs % 512 ~= 0 & next_header_offset +:= 512 if 0 = next_header_offset then return "" return reads(f,next_header_offset) | stop("maptarfile: error reading in ", string(next_header_offset)," bytes.") end procedure fixpath(s) # fixpath is a misnomer of sorts, since it is used on # the first pass only, and merely examines each filename # in a path, using the procedure mappiece to record any # overlong ones in the global table filenametbl and in # the global sets chunkset and short_chunkset; no fixing # is actually done here s2 := "" s ? { while piece := tab(find("/")+1) do s2 ||:= mappiece(piece) s2 ||:= mappiece(tab(0)) } return s2 end procedure mappiece(s) # global filenametbl, chunkset short_chunkset initial { filenametbl := table() chunkset := set() short_chunkset := set() } chunk := trim(s,'/') if *chunk > 14 then { i := 0 repeat { # if the file has already been looked at, continue if \filenametbl[chunk] then next # else find a new unique 14-character name for it lchunk := chunk[1:12] || right(string(i+:=1),3,"0") if lchunk == !filenametbl then next else break } # record filename in various global sets and tables filenametbl[chunk] := lchunk insert(chunkset,chunk) insert(short_chunkset,chunk[1:16]) } else lchunk := chunk lchunk ||:= (s[-1] == "/") return lchunk end procedure readtarhdr(s) this_block := hblock() s ? { this_block.name := move(100) # <- to be looked at later this_block.junk := move(8+8+8) # skip the permissions, uid, etc. this_block.size := move(12) # <- to be looked at later this_block.mtime := move(12) this_block.chksum := move(8) # <- to be looked at later this_block.linkflag := move(1) this_block.linkname := move(100) # <- to be looked at later this_block.therest := tab(0) } integer(this_block.size) | fail return this_block end procedure map_filenams(s) # chunkset is global, and contains all the overlong filenames # found in the first pass through the input file; here the aim # is to map the filenames to the shortened variants as stored # in filenametbl (which happens to be GLOBAL) local s2 s2 := "" s ? { until pos(0) do { # first narrow the possibilities, then try to map; # short_chunkset, chunkset & filenametbl are global if member(short_chunkset,&subject[&pos:&pos+15]) then s2 ||:= filenametbl[=!chunkset] else s2 ||:= move(1) } } return s2 end # Author: Ralph E. Griswold # Date: June 10, 1988 # exbase10(i,j) convert base-10 integer i to base j # The maximum base allowed is 36. procedure exbase10(i,j) static digits local s, d, sign initial digits := &digits || &lcase if i = 0 then return 0 if i < 0 then { sign := "-" i := -i } else sign := "" s := "" while i > 0 do { d := i % j if d > 9 then d := digits[d + 1] s := d || s i /:= j } return sign || s end procedure get_checksum(r) sum := 0 r.chksum := " " every field := !r do every sum +:= ord(!field) return sum end procedure write_report() # this procedure writes out a list of filenames which were # remapped (because they exceeded the SysV 14-char limit) local outtext, stbl, i (outtext := open(fname := "mapping.report","w")) | open(fname := "/tmp/mapping.report","w") | stop("maptarfile: Can't find a place to put mapping.report!") stbl := sort(filenametbl,3) every i := 1 to *stbl -1 by 2 do { write(outtext,left(stbl[i],35," ")," ",stbl[i+1]) } write(&errout,"maptarfile: ",fname," contains the list of changes") close(outtext) return &null end