[comp.lang.icon] BSD -> SYSV filename mapper

goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (02/23/90)

Recently I had occasion to install a number of BSD archives on my
home (SysV) machine, and I got fed up with having to rename all the
directories, and altering all the source to recognize the new and
shorter names.  It seemed better to create a filter that would take
a tar archive and map everything all at once.

I started writing the program in C, but I soon realized that doing
the job in C would take a couple of evenings.  The Icon program took
just part of one evening.  It's not meant to be pretty, but since it
is probably something others would find useful, I'm posting it.  It
works here at my site.  Naturally, I don't guarantee that it will
work anywhere else.

----------------------------------------------------------------------
global filenametbl, chunkset, short_chunkset        # see procedure mappiece(s)
record hblock(name,junk,size,mtime,chksum,
              linkflag,linkname,therest)            # see readtarhdr(s)


procedure main(a)

    usage := "usage:  maptarfile inputfile      # output goes to stdout"
    0 < *a < 2 | stop("Bad arg count.\n",usage)
    intext := open(a[1],"r") |
	stop("maptarfile:  can't open ",a[1])

    # run through all the headers in the input file, filling
    # (global) filenametbl with the names of overlong files;
    # make_table_of_filenames fails if there are no such files
    make_table_of_filenames(intext) |
	stop("maptarfile:  no overlong path names to map")
  
    # now that a table of overlong filenames exists, go back
    # through the text, remapping all occurrences of these names
    # to new, 14-char values; also, reset header checksums, and
    # reformat text into correctly padded 512-byte blocks
    seek(intext,1)
    output_mapped_headers_and_texts(intext) |
	stop("maptarfile:  error reformatting text")

    close(intext)
    write_report()
    exit(0)
    
end



procedure make_table_of_filenames(intext)

    # global chunkset (set of overlong filenames)
    local header

    # read headers for overlong filenames; for now
    # ignore everything else
    while header := readtarhdr(reads(intext,512)) do {
	tab_nxt_hdr(intext,trim_str(header.size))
	fixpath(trim_str(header.name))
    }
    *chunkset = 0 & fail
    return &null

end



procedure output_mapped_headers_and_texts(intext)

    # remember that filenametbl, chunkset, and short_chunkset
    # (which are used by various procedures below) are GLOBAL
    local header, newtext, full_block 

    # read in headers, one at a time
    while header := readtarhdr(reads(intext,512)) do {

	# replace overlong filenames with shorter ones, according to
	# the conversions specified in the global hash table filenametbl
      	header.name := left(map_filenams(header.name),100,"\x00")
	header.linkname := left(map_filenams(header.linkname),100,"\x00")

	# use header.size field to read in and map the subsequent text
	newtext := trim(
	    map_filenams(tab_nxt_hdr(intext,trim_str(header.size))),'\x00'
	    )

	# now, find the length of newtext, and insert it into the size field
	header.size := right(exbase10(*newtext,8) || " ",12," ")

	# calculate the checksum of the newly retouched header
	header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")

	# finally, join all the header fields into a new block and write it out
	full_block := ""; every full_block ||:= !header
	writes(left(full_block,512,"\x00"))

	# now we're ready to write out the text, padding the final block
	# out to an even 512 bytes if necessary; the next header must start
	# right at the beginning of a 512 byte block
	newtext ? {
	    while writes(move(512))
	    if not pos(0)
	    then writes(left(tab(0),512,"\x00")) | fail
	}
    }
    writes(repl("\x00",512))
    return &null

end



procedure trim_str(s)
    # knock out spaces, nulls
    return s ? {
	(tab(many(' ')) | &null) &
	    trim(tab(find("\x00")|0))
    } \ 1
end 



procedure tab_nxt_hdr(f,size_str)

    hs := integer("8r" || size_str)
    next_header_offset := (hs / 512) * 512
    hs % 512 ~= 0 & next_header_offset +:= 512
    if 0 = next_header_offset then return ""
    return reads(f,next_header_offset) |
	stop("maptarfile:  error reading in ",
	     string(next_header_offset)," bytes.")

end



procedure fixpath(s)

    # fixpath is a misnomer of sorts, since it is used on
    # the first pass only, and merely examines each filename
    # in a path, using the procedure mappiece to record any
    # overlong ones in the global table filenametbl and in
    # the global sets chunkset and short_chunkset; no fixing
    # is actually done here
    s2 := ""
    s ? {
	while piece := tab(find("/")+1)
	do s2 ||:= mappiece(piece) 
	s2 ||:= mappiece(tab(0))
    }
    return s2

end



procedure mappiece(s)
    
    # global filenametbl, chunkset short_chunkset
    initial {
	filenametbl := table()
	chunkset := set()
	short_chunkset := set()
    }
    
    chunk := trim(s,'/')
    if *chunk > 14 then {
	i := 0
	repeat {
	# if the file has already been looked at, continue
	    if \filenametbl[chunk] then next
	# else find a new unique 14-character name for it
	    lchunk := chunk[1:12] || right(string(i+:=1),3,"0")
	    if lchunk == !filenametbl
	    then next else break
	}
	# record filename in various global sets and tables
	filenametbl[chunk] := lchunk
	insert(chunkset,chunk)
	insert(short_chunkset,chunk[1:16])
    }
    else lchunk := chunk

    lchunk ||:= (s[-1] == "/")
    return lchunk

end



procedure readtarhdr(s)
    this_block := hblock()
    s ? {
	this_block.name     := move(100)    # <- to be looked at later
	this_block.junk     := move(8+8+8)  # skip the permissions, uid, etc.
	this_block.size     := move(12)     # <- to be looked at later
	this_block.mtime    := move(12)
	this_block.chksum   := move(8)      # <- to be looked at later
	this_block.linkflag := move(1)
	this_block.linkname := move(100)    # <- to be looked at later
	this_block.therest  := tab(0)
    }
    integer(this_block.size) | fail
    return this_block
end



procedure map_filenams(s)

    # chunkset is global, and contains all the overlong filenames
    # found in the first pass through the input file; here the aim
    # is to map the filenames to the shortened variants as stored
    # in filenametbl (which happens to be GLOBAL)

    local s2

    s2 := ""
    s ? {
	until pos(0) do {
	    # first narrow the possibilities, then try to map;
	    # short_chunkset, chunkset & filenametbl are global
	    if member(short_chunkset,&subject[&pos:&pos+15])
	    then s2 ||:= filenametbl[=!chunkset]
	    else s2 ||:= move(1)
	}
    }
    return s2

end



#  Author:  Ralph E. Griswold
#  Date:  June 10, 1988
#  exbase10(i,j) convert base-10 integer i to base j
#  The maximum base allowed is 36.

procedure exbase10(i,j)
   static digits
   local s, d, sign
   initial digits := &digits || &lcase
   if i = 0 then return 0
   if i < 0 then {
      sign := "-"
      i := -i
      }
   else sign := ""
   s := ""
   while i > 0 do {
      d := i % j
      if d > 9 then d := digits[d + 1]
      s := d || s
      i /:= j
      }
   return sign || s
end



procedure get_checksum(r)
    sum := 0
    r.chksum := "        "
    every field := !r
    do every sum +:= ord(!field)
    return sum
end



procedure write_report()

    # this procedure writes out a list of filenames which were
    # remapped (because they exceeded the SysV 14-char limit)

    local outtext, stbl, i

    (outtext := open(fname := "mapping.report","w")) |
	open(fname := "/tmp/mapping.report","w") |
	     stop("maptarfile:  Can't find a place to put mapping.report!")
    stbl := sort(filenametbl,3)
    every i := 1 to *stbl -1 by 2 do {
	write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
    }
    write(&errout,"maptarfile:  ",fname," contains the list of changes")
    close(outtext)
    return &null

end