goer@sophist.uchicago.EDU (Richard Goerwitz) (02/28/90)
Having received a number of requests for this thing from a number
of places, it seemed prudent to clean it up, comment it, fix the
bugs, and repost.
I had no idea that anyone would really _use_ it.
#-------------------------------------------------------------------
#
# MAPTARFILE
#
# Map 15+ char. filenames in a tar archive to 14 chars.
# Handles both the header blocks and the source itself.
# Obviates the need for renaming files and directories
# by hand, and for altering source and docs to refer to
# the new file and directory names.
#
# Richard L. Goerwitz, III
#
# Last modified 2/27/90
#
#-------------------------------------------------------------------
global filenametbl, chunkset, short_chunkset # see procedure mappiece(s)
record hblock(name,junk,size,mtime,chksum,
linkflag,linkname,therest) # see readtarhdr(s)
procedure main(a)
usage := "usage: maptarfile inputfile # output goes to stdout"
0 < *a < 2 | stop("Bad arg count.\n",usage)
intext := open(a[1],"r") |
stop("maptarfile: can't open ",a[1])
# Run through all the headers in the input file, filling
# (global) filenametbl with the names of overlong files;
# make_table_of_filenames fails if there are no such files.
make_table_of_filenames(intext) |
stop("maptarfile: no overlong path names to map")
# Now that a table of overlong filenames exists, go back
# through the text, remapping all occurrences of these names
# to new, 14-char values; also, reset header checksums, and
# reformat text into correctly padded 512-byte blocks. Ter-
# minate output with 512 nulls.
seek(intext,1)
every writes(output_mapped_headers_and_texts(intext))
close(intext)
write_report() # Record mapped file and dir names for future ref.
exit(0)
end
procedure make_table_of_filenames(intext)
local header # chunkset is global
# search headers for overlong filenames; for now
# ignore everything else
while header := readtarhdr(reads(intext,512)) do {
# tab upto the next header block
tab_nxt_hdr(intext,trim_str(header.size),1)
# record overlong filenames in several global tables, sets
fixpath(trim_str(header.name))
}
*chunkset = 0 & fail
return &null
end
procedure output_mapped_headers_and_texts(intext)
# Remember that filenametbl, chunkset, and short_chunkset
# (which are used by various procedures below) are global.
local header, newtext, full_block, block, lastblock
# Read in headers, one at a time.
while header := readtarhdr(reads(intext,512)) do {
# Replace overlong filenames with shorter ones, according to
# the conversions specified in the global hash table filenametbl
# (which were generated by fixpath() on the first pass).
header.name := left(map_filenams(header.name),100,"\x00")
header.linkname := left(map_filenams(header.linkname),100,"\x00")
# Use header.size field to determine the size of the subsequent text.
# Read in the text as one string. Map overlong filenames found in it
# to shorter names as specified in the global hash table filenamtbl.
newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size)))
# Now, find the length of newtext, and insert it into the size field.
header.size := right(exbase10(*newtext,8) || " ",12," ")
# Calculate the checksum of the newly retouched header.
header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")
# Finally, join all the header fields into a new block and write it out
full_block := ""; every full_block ||:= !header
suspend left(full_block,512,"\x00")
# Now we're ready to write out the text, padding the final block
# out to an even 512 bytes if necessary; the next header must start
# right at the beginning of a 512-byte block.
newtext ? {
while block := move(512)
do suspend block
pos(0) & next
lastblock := left(tab(0),512,"\x00")
suspend lastblock
}
}
# Write out a final null-filled block. Some tar programs will write
# out 1024 nulls at the end. Dunno why.
return repl("\x00",512)
end
procedure trim_str(s)
# Knock out spaces, nulls from those crazy tar header
# block fields (some of which end in a space and a null,
# some just a space, and some just a null [anyone know
# why?]).
return s ? {
(tab(many(' ')) | &null) &
trim(tab(find("\x00")|0))
} \ 1
end
procedure tab_nxt_hdr(f,size_str,firstpass)
# Tab upto the next header block. Return the bypassed text
# as a string (this value is not always used).
local hs, next_header_offset
hs := integer("8r" || size_str)
next_header_offset := (hs / 512) * 512
hs % 512 ~= 0 & next_header_offset +:= 512
if 0 = next_header_offset then return ""
else {
# if this is pass no. 1 don't bother returning a value; we're
# just collecting long filenames;
if \firstpass then {
seek(f,where(f)+next_header_offset)
return
}
else {
return reads(f,next_header_offset)[1:hs+1] |
stop("maptarfile: error reading in ",
string(next_header_offset)," bytes.")
}
}
end
procedure fixpath(s)
# Fixpath is a misnomer of sorts, since it is used on
# the first pass only, and merely examines each filename
# in a path, using the procedure mappiece to record any
# overlong ones in the global table filenametbl and in
# the global sets chunkset and short_chunkset; no fixing
# is actually done here.
s2 := ""
s ? {
while piece := tab(find("/")+1)
do s2 ||:= mappiece(piece)
s2 ||:= mappiece(tab(0))
}
return s2
end
procedure mappiece(s)
# Check s (the name of a file or dir as recorded in the tar header
# being examined) to see if it is over 14 chars long. If so,
# generate a unique 14-char version of the name, and store
# both values in the global hashtable filenametbl. Also store
# the original (overlong) file name in chunkset. Store the
# first fifteen chars of the original file name in short_chunkset.
# Sorry about all of the tables and sets. It actually makes for
# a reasonably efficient program. Doing away with both sets,
# while possible, causes a tenfold drop in execution speed!
# global filenametbl, chunkset, short_chunkset
local j, ending
initial {
filenametbl := table()
chunkset := set()
short_chunkset := set()
}
chunk := trim(s,'/')
if *chunk > 14 then {
i := 0
repeat {
# if the file has already been looked at, continue
if \filenametbl[chunk] then next
# else find a new unique 14-character name for it
# preserve important suffixes like ".Z," ".c," etc.
if chunk ?
(tab(find(".")), ending := move(1) || tab(any(&ascii)), pos(0))
then lchunk := chunk[1:11] || right(string(i+:=1),2,"0") || ending
else lchunk := chunk[1:12] || right(string(i+:=1),3,"0")
if lchunk == !filenametbl
then next else break
}
# record filename in various global sets and tables
filenametbl[chunk] := lchunk
insert(chunkset,chunk)
insert(short_chunkset,chunk[1:16])
}
else lchunk := chunk
lchunk ||:= (s[-1] == "/")
return lchunk
end
procedure readtarhdr(s)
# Read the silly tar header into a record. Note that, as was
# complained about above, some of the fields end in a null, some
# in a space, and some in a space and a null. The procedure
# trim_str() may (and in fact often _is_) used to remove this
# extra garbage.
this_block := hblock()
s ? {
this_block.name := move(100) # <- to be looked at later
this_block.junk := move(8+8+8) # skip the permissions, uid, etc.
this_block.size := move(12) # <- to be looked at later
this_block.mtime := move(12)
this_block.chksum := move(8) # <- to be looked at later
this_block.linkflag := move(1)
this_block.linkname := move(100) # <- to be looked at later
this_block.therest := tab(0)
}
integer(this_block.size) | fail # If it's not an integer, we've hit
# the final (null-filled) block.
return this_block
end
procedure map_filenams(s)
# Chunkset is global, and contains all the overlong filenames
# found in the first pass through the input file; here the aim
# is to map these filenames to the shortened variants as stored
# in filenametbl (GLOBAL).
local s2
s2 := ""
s ? {
until pos(0) do {
# first narrow the possibilities, using short_chunkset
if member(short_chunkset,&subject[&pos:&pos+15])
# then try to map from a long to a shorter 14-char filename
then s2 ||:= (filenametbl[=!chunkset] | move(1))
else s2 ||:= move(1)
}
}
return s2
end
# From the IPL. Thanks, Ralph -
# Author: Ralph E. Griswold
# Date: June 10, 1988
# exbase10(i,j) convert base-10 integer i to base j
# The maximum base allowed is 36.
procedure exbase10(i,j)
static digits
local s, d, sign
initial digits := &digits || &lcase
if i = 0 then return 0
if i < 0 then {
sign := "-"
i := -i
}
else sign := ""
s := ""
while i > 0 do {
d := i % j
if d > 9 then d := digits[d + 1]
s := d || s
i /:= j
}
return sign || s
end
# end IPL material
procedure get_checksum(r)
# Calculates the new value of the checksum field for the
# current header block. Note that the specification say
# that, when calculating this value, the chksum field must
# be blank-filled.
sum := 0
r.chksum := " "
every field := !r
do every sum +:= ord(!field)
return sum
end
procedure write_report()
# This procedure writes out a list of filenames which were
# remapped (because they exceeded the SysV 14-char limit),
# and then notifies the user of the existence of this file.
local outtext, stbl, i
(outtext := open(fname := "mapping.report","w")) |
open(fname := "/tmp/mapping.report","w") |
stop("maptarfile: Can't find a place to put mapping.report!")
stbl := sort(filenametbl,3)
every i := 1 to *stbl -1 by 2 do {
write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
}
write(&errout,"maptarfile: ",fname," contains the list of changes")
close(outtext)
return &null
end