goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (02/23/90)
Recently I had occasion to install a number of BSD archives on my
home (SysV) machine, and I got fed up with having to rename all the
directories, and altering all the source to recognize the new and
shorter names. It seemed better to create a filter that would take
a tar archive and map everything all at once.
I started writing the program in C, but I soon realized that doing
the job in C would take a couple of evenings. The Icon program took
just part of one evening. It's not meant to be pretty, but since it
is probably something others would find useful, I'm posting it. It
works here at my site. Naturally, I don't guarantee that it will
work anywhere else.
----------------------------------------------------------------------
global filenametbl, chunkset, short_chunkset # see procedure mappiece(s)
record hblock(name,junk,size,mtime,chksum,
linkflag,linkname,therest) # see readtarhdr(s)
procedure main(a)
usage := "usage: maptarfile inputfile # output goes to stdout"
0 < *a < 2 | stop("Bad arg count.\n",usage)
intext := open(a[1],"r") |
stop("maptarfile: can't open ",a[1])
# run through all the headers in the input file, filling
# (global) filenametbl with the names of overlong files;
# make_table_of_filenames fails if there are no such files
make_table_of_filenames(intext) |
stop("maptarfile: no overlong path names to map")
# now that a table of overlong filenames exists, go back
# through the text, remapping all occurrences of these names
# to new, 14-char values; also, reset header checksums, and
# reformat text into correctly padded 512-byte blocks
seek(intext,1)
output_mapped_headers_and_texts(intext) |
stop("maptarfile: error reformatting text")
close(intext)
write_report()
exit(0)
end
procedure make_table_of_filenames(intext)
# global chunkset (set of overlong filenames)
local header
# read headers for overlong filenames; for now
# ignore everything else
while header := readtarhdr(reads(intext,512)) do {
tab_nxt_hdr(intext,trim_str(header.size))
fixpath(trim_str(header.name))
}
*chunkset = 0 & fail
return &null
end
procedure output_mapped_headers_and_texts(intext)
# remember that filenametbl, chunkset, and short_chunkset
# (which are used by various procedures below) are GLOBAL
local header, newtext, full_block
# read in headers, one at a time
while header := readtarhdr(reads(intext,512)) do {
# replace overlong filenames with shorter ones, according to
# the conversions specified in the global hash table filenametbl
header.name := left(map_filenams(header.name),100,"\x00")
header.linkname := left(map_filenams(header.linkname),100,"\x00")
# use header.size field to read in and map the subsequent text
newtext := trim(
map_filenams(tab_nxt_hdr(intext,trim_str(header.size))),'\x00'
)
# now, find the length of newtext, and insert it into the size field
header.size := right(exbase10(*newtext,8) || " ",12," ")
# calculate the checksum of the newly retouched header
header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")
# finally, join all the header fields into a new block and write it out
full_block := ""; every full_block ||:= !header
writes(left(full_block,512,"\x00"))
# now we're ready to write out the text, padding the final block
# out to an even 512 bytes if necessary; the next header must start
# right at the beginning of a 512 byte block
newtext ? {
while writes(move(512))
if not pos(0)
then writes(left(tab(0),512,"\x00")) | fail
}
}
writes(repl("\x00",512))
return &null
end
procedure trim_str(s)
# knock out spaces, nulls
return s ? {
(tab(many(' ')) | &null) &
trim(tab(find("\x00")|0))
} \ 1
end
procedure tab_nxt_hdr(f,size_str)
hs := integer("8r" || size_str)
next_header_offset := (hs / 512) * 512
hs % 512 ~= 0 & next_header_offset +:= 512
if 0 = next_header_offset then return ""
return reads(f,next_header_offset) |
stop("maptarfile: error reading in ",
string(next_header_offset)," bytes.")
end
procedure fixpath(s)
# fixpath is a misnomer of sorts, since it is used on
# the first pass only, and merely examines each filename
# in a path, using the procedure mappiece to record any
# overlong ones in the global table filenametbl and in
# the global sets chunkset and short_chunkset; no fixing
# is actually done here
s2 := ""
s ? {
while piece := tab(find("/")+1)
do s2 ||:= mappiece(piece)
s2 ||:= mappiece(tab(0))
}
return s2
end
procedure mappiece(s)
# global filenametbl, chunkset short_chunkset
initial {
filenametbl := table()
chunkset := set()
short_chunkset := set()
}
chunk := trim(s,'/')
if *chunk > 14 then {
i := 0
repeat {
# if the file has already been looked at, continue
if \filenametbl[chunk] then next
# else find a new unique 14-character name for it
lchunk := chunk[1:12] || right(string(i+:=1),3,"0")
if lchunk == !filenametbl
then next else break
}
# record filename in various global sets and tables
filenametbl[chunk] := lchunk
insert(chunkset,chunk)
insert(short_chunkset,chunk[1:16])
}
else lchunk := chunk
lchunk ||:= (s[-1] == "/")
return lchunk
end
procedure readtarhdr(s)
this_block := hblock()
s ? {
this_block.name := move(100) # <- to be looked at later
this_block.junk := move(8+8+8) # skip the permissions, uid, etc.
this_block.size := move(12) # <- to be looked at later
this_block.mtime := move(12)
this_block.chksum := move(8) # <- to be looked at later
this_block.linkflag := move(1)
this_block.linkname := move(100) # <- to be looked at later
this_block.therest := tab(0)
}
integer(this_block.size) | fail
return this_block
end
procedure map_filenams(s)
# chunkset is global, and contains all the overlong filenames
# found in the first pass through the input file; here the aim
# is to map the filenames to the shortened variants as stored
# in filenametbl (which happens to be GLOBAL)
local s2
s2 := ""
s ? {
until pos(0) do {
# first narrow the possibilities, then try to map;
# short_chunkset, chunkset & filenametbl are global
if member(short_chunkset,&subject[&pos:&pos+15])
then s2 ||:= filenametbl[=!chunkset]
else s2 ||:= move(1)
}
}
return s2
end
# Author: Ralph E. Griswold
# Date: June 10, 1988
# exbase10(i,j) convert base-10 integer i to base j
# The maximum base allowed is 36.
procedure exbase10(i,j)
static digits
local s, d, sign
initial digits := &digits || &lcase
if i = 0 then return 0
if i < 0 then {
sign := "-"
i := -i
}
else sign := ""
s := ""
while i > 0 do {
d := i % j
if d > 9 then d := digits[d + 1]
s := d || s
i /:= j
}
return sign || s
end
procedure get_checksum(r)
sum := 0
r.chksum := " "
every field := !r
do every sum +:= ord(!field)
return sum
end
procedure write_report()
# this procedure writes out a list of filenames which were
# remapped (because they exceeded the SysV 14-char limit)
local outtext, stbl, i
(outtext := open(fname := "mapping.report","w")) |
open(fname := "/tmp/mapping.report","w") |
stop("maptarfile: Can't find a place to put mapping.report!")
stbl := sort(filenametbl,3)
every i := 1 to *stbl -1 by 2 do {
write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
}
write(&errout,"maptarfile: ",fname," contains the list of changes")
close(outtext)
return &null
end