boneill@hawk.ulowell.edu (SoftXc Coordinator) (03/26/88)
Article 27 of comp.sources.misc:
Path: brl-smoke!brl-adm!umd5!mimsy!seismo!rutgers!clyde!cbatt!cwruecmp!hal!ncoast!allbery
From: allbery@ncoast.UUCP (Brandon S. Allbery)
Newsgroups: comp.sources.misc
Subject: System V ARC bug fixes (2 of 2)
Message-ID: <265@cpsc6b.cpsc6a.ATT.COM>
Date: 16 May 87 06:06:51 GMT
Distribution: usa
Organization: AT&T (CPSC), Oakland, CA
Lines: 1342
Keywords: arc
Approved: allbery@ncoast.UUCP
[ Line eater? Baloney! There's no such animal (IS THERE?) ]
See part 1 for details...
Chris Seaman
..!ihnp4!cpsc6a!crs
---------------------------- CUT HERE ------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
# arclzw.c
# arcmisc.c
# arcpack.c
# This archive created: Fri May 15 22:57:28 1987
# By: C. R. Seaman ()
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'arclzw.c'" '(25543 characters)'
if test -f 'arclzw.c'
then
echo shar: "will not over-write existing file 'arclzw.c'"
else
sed 's/^ X//' << \SHAR_EOF > 'arclzw.c'
X/*
X * arclzw.c 1.2
X *
X * Author: Thom Henderson
X * Original System V port: Mike Stump
X * Enhancements, Bug fixes, and cleanup: Chris Seaman
X * Date: Fri Mar 20 09:57:02 1987
X * Last Mod. 5/15/87
X *
X */
X
X/*
X * ARC - Archive utility - ARCLZW
X *
X * Version 1.88, created on 01/20/86 at 16:47:04
X *
X * (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X *
X * Description:
X * This file contains the routines used to implement Lempel-Zev
X * data compression, which calls for building a coding table on
X * the fly. This form of compression is especially good for encoding
X * files which contain repeated strings, and can often give dramatic
X * improvements over traditional Huffman SQueezing.
X *
X * Programming notes:
X * In this section I am drawing heavily on the COMPRESS program
X * from UNIX. The basic method is taken from "A Technique for High
X * Performance Data Compression", Terry A. Welch, IEEE Computer
X * Vol 17, No 6 (June 1984), pp 8-19. Also see "Knuth's Fundamental
X * Algorithms", Donald Knuth, Vol 3, Section 6.4.
X *
X * As best as I can tell, this method works by tracing down a hash
X * table of code strings where each entry has the property:
X *
X * if <string> <char> is in the table
X * then <string> is in the table.
X */
X
X#include "arc.h"
X
X/* definitions for older style crunching */
X
X#define FALSE 0
X#define TRUE !FALSE
X#define TABSIZE 4096
X#define NO_PRED 0xFFFF
X#define EMPTY 0xFFFF
X#define NOT_FND 0xFFFF
X
X/* Changed unsigned INT to int -- is compared against EOF (-1) -- crs */
Xstatic int inbuf; /* partial input code storage */
Xstatic INT sp; /* current stack pointer */
X
Xstatic struct entry { /* string table entry format */
X char used; /* true when this entry is in use */
X unsigned INT next; /* ptr to next in collision list */
X unsigned INT predecessor; /* code for preceeding string */
X unsigned char follower; /* char following string */
X} string_tab[TABSIZE]; /* the code string table */
X
X
X/* definitions for the new dynamic Lempel-Zev crunching */
X
X#define BITS 12 /* maximum bits per code */
X#define HSIZE 5003 /* 80% occupancy */
X#define INIT_BITS 9 /* initial number of bits/code */
X
Xstatic INT n_bits; /* number of bits/code */
Xstatic INT maxcode; /* maximum code, given n_bits */
X#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */
Xstatic INT maxcodemax = 1 << BITS; /* largest possible code (+1) */
X
Xstatic unsigned char buf[BITS]; /* input/output buffer */
X
Xstatic unsigned char lmask[9] = { /* left side masks */
X 0xff, 0xfe, 0xfc,
X 0xf8, 0xf0, 0xe0,
X 0xc0, 0x80, 0x00
X};
Xstatic unsigned char rmask[9] = { /* right side masks */
X 0x00, 0x01, 0x03,
X 0x07, 0x0f, 0x1f,
X 0x3f, 0x7f, 0xff
X};
X
Xstatic INT offset; /* byte offset for code output */
Xstatic long in_count; /* length of input */
Xstatic long bytes_out; /* length of compressed output */
Xstatic unsigned INT ent;
X
X/*
X * To save much memory (which we badly need at this point), we overlay
X * the table used by the previous version of Lempel-Zev with those used
X * by the new version. Since no two of these routines will be used
X * together, we can safely do this. Note that the tables used for Huffman
X * squeezing may NOT overlay these, since squeezing and crunching are done
X * in parallel.
X */
X
Xstatic long htab[HSIZE]; /* hash code table (crunch) */
Xstatic unsigned INT codetab[HSIZE]; /* string code table (crunch) */
X
Xstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
X
Xstatic unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */
Xstatic INT free_ent; /* first unused entry */
Xstatic INT firstcmp; /* true at start of compression */
Xstatic unsigned char stack[HSIZE]; /* local push/pop stack */
X
X/*
X * block compression parameters -- after all codes are used up,
X * and compression rate changes, start over.
X */
X
Xstatic INT clear_flg;
Xstatic long ratio;
X#define CHECK_GAP 10000 /* ratio check interval */
Xstatic long checkpoint;
X
X/*
X * the next two codes should not be changed lightly, as they must not
X * lie within the contiguous general code space.
X */
X
X#define FIRST 257 /* first free entry */
X#define CLEAR 256 /* table clear output code */
X
Xstatic INT cl_block(t) /* table clear for block compress */
XFILE *t; /* our output file */
X{
X long rat;
X INT putcode();
X
X checkpoint = in_count + CHECK_GAP;
X
X if (in_count > 0x007fffff) /* shift will overflow */
X {
X rat = bytes_out >> 8;
X if (rat == 0) /* Don't divide by zero */
X rat = 0x7fffffff;
X else
X rat = in_count / rat;
X }
X else
X rat = (in_count<<8)/bytes_out;/* 8 fractional bits */
X
X if (rat > ratio)
X ratio = rat;
X else
X {
X ratio = 0;
X setmem(htab,HSIZE*sizeof(long),0xff);
X free_ent = FIRST;
X clear_flg = 1;
X putcode(CLEAR,t);
X }
X}
X
X/*
X * Output a given code.
X * Inputs:
X * code: A n_bits-bit integer. If == -1, then EOF. This assumes
X * that n_bits =< (long)wordsize - 1.
X * Outputs:
X * Outputs code to the file.
X * Assumptions:
X * Chars are 8 bits long.
X * Algorithm:
X * Maintain a BITS character long buffer (so that 8 codes will
X * fit in it exactly). When the buffer fills up empty it and start over.
X */
X
Xstatic INT putcode(code,t) /* output a code */
XINT code; /* code to output */
XFILE *t; /* where to put it */
X{
X INT r_off = offset; /* right offset */
X INT bits = n_bits; /* bits to go */
X unsigned char *bp = buf; /* buffer pointer */
X INT n; /* index */
X
X if (code >= 0) /* if a real code */
X {
X bp += (r_off >> 3); /* Get to the first byte. */
X r_off &= 7;
X
X /*
X * Since code is always >= 8 bits, only need to mask the first
X * hunk on the left.
X */
X
X *bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off];
X bp++;
X bits -= (8 - r_off);
X code >>= (8 - r_off);
X
X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X
X if (bits >= 8)
X {
X *bp++ = code;
X code >>= 8;
X bits -= 8;
X }
X
X /* Last bits. */
X
X if (bits)
X *bp = code;
X
X offset += n_bits;
X
X if (offset == (n_bits << 3))
X {
X bp = buf;
X bits = n_bits;
X bytes_out += bits;
X do
X putc_pak(*bp++,t);
X while (--bits);
X offset = 0;
X }
X
X /*
X * If the next entry is going to be too big for the code size,
X * then increase it, if possible.
X */
X
X if (free_ent>maxcode || clear_flg>0)
X {
X /*
X * Write the whole buffer, because the input side won't
X * discover the size increase until after it has read it.
X */
X
X if (offset > 0)
X {
X bp = buf; /* reset pointer for writing */
X bytes_out += n = n_bits;
X while (n--)
X putc_pak(*bp++,t);
X }
X offset = 0;
X
X if (clear_flg) /* reset if clearing */
X {
X maxcode = MAXCODE(n_bits = INIT_BITS);
X clear_flg = 0;
X }
X else /* else use more bits */
X {
X n_bits++;
X if (n_bits == BITS)
X maxcode = maxcodemax;
X else
X maxcode = MAXCODE(n_bits);
X }
X }
X }
X else /* dump the buffer on EOF */
X {
X bytes_out += n = (offset+7) / 8;
X
X if (offset > 0)
X while (n--)
X putc_pak(*bp++,t);
X offset = 0;
X }
X}
X
X/*
X * Read one code from the standard input. If EOF, return -1.
X * Inputs:
X * cmpin
X * Outputs:
X * code or -1 is returned.
X */
X
Xstatic INT getcode(f) /* get a code */
XFILE *f; /* file to get from */
X{
X INT code;
X static INT offset = 0, size = 0;
X INT r_off, bits;
X unsigned char *bp = buf;
X
X if (clear_flg > 0 || offset >= size || free_ent > maxcode)
X {
X /*
X * If the next entry will be too big for the current code
X * size, then we must increase the size. This implies reading
X * a new buffer full, too.
X */
X
X if (free_ent > maxcode)
X {
X n_bits++;
X if (n_bits == BITS)
X maxcode = maxcodemax; /* won't get any bigger now */
X else
X maxcode = MAXCODE(n_bits);
X }
X if (clear_flg > 0)
X {
X maxcode = MAXCODE(n_bits = INIT_BITS);
X clear_flg = 0;
X }
X
X for (size=0; size<n_bits; size++)
X {
X if ((code=getc_unp(f))==EOF)
X break;
X else
X buf[size] = code;
X }
X if (size <= 0)
X return(-1); /* end of file */
X
X offset = 0;
X
X /* Round size down to integral number of codes */
X
X size = (size << 3)-(n_bits - 1);
X }
X r_off = offset;
X bits = n_bits;
X
X /* Get to the first byte. */
X
X bp +=(r_off >> 3);
X r_off &= 7;
X
X /* Get first part (low order bits) */
X
X code = (*bp++ >> r_off);
X bits -= 8 - r_off;
X r_off = 8 - r_off; /* now, offset into code word */
X
X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X
X if (bits >= 8)
X {
X code |= *bp++ << r_off;
X r_off += 8;
X bits -= 8;
X }
X
X /* high order bits. */
X
X code |= (*bp & rmask[bits]) << r_off;
X offset += n_bits;
X
X return(code);
X}
X
X/*
X * compress a file
X *
X * Algorithm: use open addressing double hashing (no chaining) on the
X * prefix code / next character combination. We do a variant of Knuth's
X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
X * secondary probe. Here, the modular division first probe is gives way
X * to a faster exclusive-or manipulation. Also do block compression with
X * an adaptive reset, where the code table is cleared when the compression
X * ratio decreases, but after the table fills. The variable-length output
X * codes are re-sized at this point, and a special CLEAR code is generated
X * for the decompressor.
X */
X
XINT init_cm(f,t) /* initialize for compression */
XFILE *f; /* file we will be compressing */
XFILE *t; /* where we will put it */
X{
X offset = 0;
X bytes_out = 1;
X clear_flg = 0;
X ratio = 0;
X in_count = 1;
X checkpoint = CHECK_GAP;
X maxcode = MAXCODE(n_bits = INIT_BITS);
X free_ent = FIRST;
X setmem(htab,HSIZE*sizeof(long),0xff);
X n_bits = INIT_BITS; /* set starting code size */
X
X putc_pak(BITS,t); /* note our max code length */
X
X firstcmp = 1; /* next byte will be first */
X}
X
XINT putc_cm(c,t) /* compress a character */
Xunsigned char c; /* character to compress */
XFILE *t; /* where to put it */
X{
X static long fcode;
X static INT hshift;
X INT i;
X INT disp;
X
X if (firstcmp) /* special case for first byte */
X {
X ent = c; /* remember first byte */
X
X hshift = 0;
X for (fcode=(long)HSIZE; fcode<65536L; fcode*=2L)
X hshift++;
X hshift = 8 - hshift; /* set hash code range bund */
X
X firstcmp = 0; /* no longer first */
X return;
X }
X
X in_count++;
X fcode =(long)(((long)c << BITS)+ent);
X i = (c<<hshift)^ent; /* xor hashing */
X
X if (htab[i]==fcode)
X {
X ent = codetab[i];
X return;
X }
X else if (htab[i]<0) /* empty slot */
X goto nomatch;
X disp = HSIZE - i; /* secondary hash (after G.Knott) */
X if (i == 0)
X disp = 1;
X
Xprobe:
X if ((i -= disp) < 0)
X i += HSIZE;
X
X if (htab[i] == fcode)
X {
X ent = codetab[i];
X return;
X }
X if (htab[i] > 0)
X goto probe;
X
Xnomatch:
X putcode(ent,t);
X ent = c;
X if (free_ent < maxcodemax)
X {
X codetab[i] = free_ent++; /* code -> hashtable */
X htab[i] = fcode;
X }
X else if ((long)in_count >= checkpoint)
X cl_block(t);
X}
X
Xlong pred_cm(t) /* finish compressing a file */
XFILE *t; /* where to put it */
X{
X putcode(ent,t); /* put out the final code */
X putcode(-1,t); /* tell output we are done */
X
X return(bytes_out); /* say how big it got */
X}
X
X/*
X * Decompress a file. This routine adapts to the codes in the file
X * building the string table on-the-fly; requiring no table to be stored
X * in the compressed file. The tables used herein are shared with those of
X * the compress() routine. See the definitions above.
X */
X
XINT decomp(f,t) /* decompress a file */
XFILE *f; /* file to read codes from */
XFILE *t; /* file to write text to */
X{
X unsigned char *stackp;
X INT finchar;
X INT code, oldcode, incode;
X
X if ((code=getc_unp(f))!=BITS)
X abort("File packed with %d bits, I can only handle %d",code,BITS);
X
X n_bits = INIT_BITS; /* set starting code size */
X clear_flg = 0;
X
X /* As above, initialize the first 256 entries in the table. */
X
X maxcode = MAXCODE(n_bits=INIT_BITS);
X for (code = 255; code >= 0; code--)
X {
X prefix[code] = 0;
X suffix[code] = (unsigned char)code;
X }
X free_ent = FIRST;
X
X finchar = oldcode = getcode(f);
X if (oldcode == -1) /* EOF already? */
X return; /* Get out of here */
X putc_ncr((char)finchar,t); /* first code must be 8 bits=char */
X stackp = stack;
X
X while ((code = getcode(f))> -1)
X {
X if (code==CLEAR)
X {
X for (code = 255; code >= 0; code--)
X prefix[code] = 0;
X clear_flg = 1;
X free_ent = FIRST - 1;
X if ((code=getcode(f))==-1) /* O, untimely death! */
X break;
X }
X incode = code;
X
X /* Special case for KwKwK string. */
X
X if (code >= free_ent)
X {
X *stackp++ = finchar;
X code = oldcode;
X }
X
X /* Generate output characters in reverse order */
X
X while (code >= 256)
X {
X *stackp++ = suffix[code];
X code = prefix[code];
X }
X *stackp++ = finchar = suffix[code];
X
X /* And put them out in forward order */
X
X do
X putc_ncr(*--stackp,t);
X while (stackp > stack);
X
X /* Generate the new entry. */
X
X if ((code=free_ent) < maxcodemax)
X {
X prefix[code] = (unsigned short)oldcode;
X suffix[code] = finchar;
X free_ent = code+1;
X }
X
X /* Remember previous code. */
X
X oldcode = incode;
X }
X}
X
X
X/*
X * Please note how much trouble it can be to maintain upwards
X * compatibility. All that follows is for the sole purpose of unpacking
X * files which were packed using an older method.
X */
X
X
X/*
X * The h() pointer points to the routine to use for calculating a hash
X * value. It is set in the init routines to point to either of oldh()
X * or newh().
X *
X * oldh() calculates a hash value by taking the middle twelve bits
X * of the square of the key.
X *
X * newh() works somewhat differently, and was tried because it makes
X * ARC about 23% faster. This approach was abandoned because dynamic
X * Lempel-Zev (above) works as well, and packs smaller also. However,
X * inadvertent release of a developmental copy forces us to leave this in.
X */
X
Xstatic unsigned INT (*h)(); /* pointer to hash function */
X
Xstatic unsigned INT oldh(pred,foll) /* old hash function */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* value of following char */
X{
X long local; /* local hash value */
X
X local = (pred + foll) | 0x0800; /* create the hash key */
X local *= local; /* square it */
X return((local >> 6) & 0x0FFF); /* return the middle 12 bits */
X}
X
Xstatic unsigned INT newh(pred,foll) /* new hash function */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* value of following char */
X{
X return(((pred+foll)*15073)&0xFFF); /* faster hash */
X}
X
X/*
X * The eolist() function is used to trace down a list of entries with
X * duplicate keys until the last duplicate is found.
X */
X
Xstatic unsigned INT eolist(index) /* find last duplicate */
Xunsigned INT index;
X{
X INT temp;
X
X while (temp=string_tab[index].next) /* while more duplicates */
X index = temp;
X
X return(index);
X}
X
X/*
X * The hash() routine is used to find a spot in the hash table for a new
X * entry. It performs a "hash and linear probe" lookup, using h() to
X * calculate the starting hash value and eolist() to perform the linear
X * probe. This routine DOES NOT detect a table full condition. That
X * MUST be checked for elsewhere.
X */
X
Xstatic unsigned INT hash(pred,foll) /* find spot in the string table */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* char following string */
X{
X unsigned INT local, tempnext; /* scratch storage */
X struct entry *ep; /* allows faster table handling */
X
X local = (*h)(pred,foll); /* get initial hash value */
X
X if (!string_tab[local].used) /* if that spot is free */
X return(local); /* then that's all we need */
X else /* else a collision has occured */
X {
X local = eolist(local); /* move to last duplicate */
X
X /*
X * We must find an empty spot. We start looking 101 places
X * down the table from the last duplicate.
X */
X
X tempnext = (local+101) & 0x0FFF;
X ep = &string_tab[tempnext]; /* initialize pointer */
X
X while (ep->used) /* while empty spot not found */
X {
X if (++tempnext==TABSIZE) /* if we are at the end */
X {
X tempnext = 0; /* wrap to beginning of table*/
X ep = string_tab;
X }
X else
X ++ep; /* point to next element in table */
X }
X
X /*
X * local still has the pointer to the last duplicate, while
X * tempnext has the pointer to the spot we found. We use
X * this to maintain the chain of pointers to duplicates.
X */
X
X string_tab[local].next = tempnext;
X
X return(tempnext);
X }
X}
X
X/*
X * The init_tab() routine is used to initialize our hash table.
X * You realize, of course, that "initialize" is a complete misnomer.
X */
X
Xstatic INT init_tab() /* set ground state in hash table */
X{
X unsigned INT i; /* table index */
X INT upd_tab();
X
X setmem((char *)string_tab,sizeof(string_tab),0);
X
X for (i=0; i<256; i++) /* list all single byte strings */
X upd_tab(NO_PRED,i);
X
X inbuf = EMPTY; /* nothing is in our buffer */
X}
X
X/*
X * The upd_tab routine is used to add a new entry to the string table.
X * As previously stated, no checks are made to ensure that the table
X * has any room. This must be done elsewhere.
X */
X
XINT upd_tab(pred,foll) /* add an entry to the table */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned INT foll; /* character which follows string */
X{
X struct entry *ep; /* pointer to current entry */
X
X /* calculate offset just once */
X
X ep = &string_tab[hash(pred,foll)];
X
X ep->used = TRUE; /* this spot is now in use */
X ep->next = 0; /* no duplicates after this yet */
X ep->predecessor = pred; /* note code of preceeding string */
X ep->follower = foll; /* note char after string */
X}
X
X/*
X * This algorithm encoded a file into twelve bit strings (three nybbles).
X * The gocode() routine is used to read these strings a byte (or two)
X * at a time.
X */
X
Xstatic INT gocode(fd) /* read in a twelve bit code */
XFILE *fd; /* file to get code from */
X{
X /* Changed unsigned INT to int -- is compared against EOF (-1) -- crs */
X int localbuf, returnval;
X
X if (inbuf==EMPTY) /* if on a code boundary */
X {
X if ((localbuf=getc_unp(fd))==EOF) /* get start of next code */
X return(EOF); /* pass back end of file status */
X localbuf &= 0xFF; /* mask down to true byte value */
X if ((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
X return(EOF); /* this should never happen */
X inbuf &= 0xFF; /* mask down to true byte value */
X
X returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
X inbuf &= 0x000F; /* leave partial code pending */
X }
X else /* buffer contains first nybble */
X {
X if ((localbuf=getc_unp(fd))==EOF)
X return(EOF);
X localbuf &= 0xFF;
X
X returnval = localbuf + ((inbuf<<8)&0xF00);
X inbuf = EMPTY; /* note no hanging nybbles */
X }
X return(returnval); /* pass back assembled code */
X}
X
Xstatic INT push(c) /* push char onto stack */
XINT c; /* character to push */
X{
X stack[sp] = ((char) c); /* coerce integer into a char */
X
X if (++sp >= TABSIZE)
X abort("Stack overflow\n");
X}
X
Xstatic INT pop() /* pop character from stack */
X{
X if (sp>0)
X return(((INT) stack[--sp])); /* leave ptr at next empty slot */
X else
X return(EMPTY);
X}
X
X/***** LEMPEL-ZEV DECOMPRESSION *****/
X
Xstatic INT code_count; /* needed to detect table full */
Xstatic INT firstc; /* true only on first character */
X
XINT init_ucr(new) /* get set for uncrunching */
XINT new; /* true to use new hash function */
X{
X if (new) /* set proper hash function */
X h = newh;
X else
X h = oldh;
X
X sp = 0; /* clear out the stack */
X init_tab(); /* set up atomic code definitions */
X code_count = TABSIZE - 256; /* note space left in table */
X firstc = 1; /* true only on first code */
X}
X
XINT getc_ucr(f) /* get next uncrunched byte */
XFILE *f; /* file containing crunched data */
X{
X INT code, newcode;
X static INT oldcode, finchar;
X struct entry *ep; /* allows faster table handling */
X
X if (firstc) /* first code is always known */
X {
X firstc = FALSE; /* but next will not be first */
X oldcode = gocode(f);
X return(finchar = string_tab[oldcode].follower);
X }
X
X if (!sp) /* if stack is empty */
X {
X if ((code=newcode=gocode(f))==EOF)
X return(EOF);
X
X ep = &string_tab[code]; /* initialize pointer */
X
X if (!ep->used) /* if code isn't known */
X {
X code = oldcode;
X ep = &string_tab[code]; /* re-initialize pointer */
X push(finchar);
X }
X
X while (ep->predecessor!=NO_PRED)
X {
X push(ep->follower); /* decode string backwards */
X code = ep->predecessor;
X ep = &string_tab[code];
X }
X
X push(finchar=ep->follower); /* save first character also */
X
X /*
X * The above loop will terminate, one way or another,
X * with string_tab[code].follower equal to the first
X * character in the string.
X */
X
X if (code_count) /* if room left in string table */
X {
X upd_tab(oldcode,finchar);
X --code_count;
X }
X
X oldcode = newcode;
X }
X
X return(pop()); /* return saved character */
X}
SHAR_EOF
if test 25543 -ne "`wc -c < 'arclzw.c'`"
then
echo shar: "error transmitting 'arclzw.c'" '(should have been 25543 characters)'
fi
fi
echo shar: "extracting 'arcmisc.c'" '(5966 characters)'
if test -f 'arcmisc.c'
then
echo shar: "will not over-write existing file 'arcmisc.c'"
else
sed 's/^ X//' << \SHAR_EOF > 'arcmisc.c'
X/*
X * arcmisc.c 1.2
X *
X * Author: Thom Henderson
X * Original System V port: Mike Stump
X * Enhancements, Bug fixes, and cleanup: Chris Seaman
X * Date: Fri Mar 20 09:57:02 1987
X * Last Mod. 5/15/87
X *
X */
X
X/*
X * ARC - Archive utility - ARCMISC
X *
X * Description:
X * This file contains miscellaneous routines for string
X * management, file management, and program control.
X */
X
X#include "arc.h"
X#include <ctype.h>
X
XINT rempath(nargs,arg) /* remove paths from filenames */
XINT nargs; /* number of names */
Xchar *arg[]; /* pointers to names */
X{
X char *i, *strrchr(); /* string index, reverse indexer */
X INT n; /* index */
X
X for(n=0; n<nargs; n++) /* for each supplied name */
X {
X i=strrchr(arg[n],'/'); /* search for end of path */
X if(i) /* if path was found */
X arg[n] = i+1; /* then skip it */
X }
X}
X
X/* make a file name using a template */
Xmakefnam(rawfn,template,result)
Xchar *rawfn; /* the original file name */
Xchar *template; /* the template data */
Xchar *result; /* where to place the result */
X{
X char *arc_ext = ".arc"; /* possible existing extension */
X char *i, *strrchr(); /* string indexing stuff */
X
X setmem(result, STRLEN, '\0'); /* need to zero out result for strncpy */
X
X if (i = strrchr(rawfn,'.')) /* strip 'arc' extension from filename */
X if (strcmp(i,arc_ext) == 0) *i = '\0';
X
X strncpy(result,rawfn,10); /* rebuild it using supplied template */
X strcat(result,template);
X return;
X}
X
X/* convert a string to upper case */
Xupper(s)
Xchar *s;
X{
X while (*s)
X {
X *s = toupper(*s);
X ++s;
X }
X}
X
Xsetmem(dest,size,c)
Xchar *dest,c;
XINT size;
X{
X int i;
X
X for (i = 0; i < size; dest[i] = c, i++);
X}
X
Xabort(s,arg1,arg2,arg3) /* something went wrong...QUIT!! */
Xchar *s;
X{
X char tempname1[STRLEN], tempname2[STRLEN];
X
X sprintf(tempname1,"%s.crn",arctemp);
X sprintf(tempname2,"%s.cvt",arctemp);
X
X unlink(bakname); /* remove all possible temp files */
X unlink(newname);
X unlink(tempname1);
X unlink(tempname2);
X
X fprintf(stderr,"arc: "); /* explain things to the user */
X fprintf(stderr,s,arg1,arg2,arg3);
X fprintf(stderr,"\n");
X exit(1); /* quit */
X}
X
Xrename(o, n)
Xchar *o, *n;
X{
X return(link(o, n) || unlink(o));
X}
X
Xmakenames(rawfn)
Xchar *rawfn;
X{
X char pathtemp[STRLEN]; /* temporary path holder */
X char nametemp[STRLEN]; /* temporary arcname holder */
X char *buf; /* temporary pointer */
X char *i, *strrchr(); /* string indexing junk */
X long getpid(); /* process id function */
X
X strcpy(pathtemp,rawfn);
X if (i = strrchr(buf=rawfn,'/')) /* if names are part of paths */
X { /* lots to do */
X buf=i+1;
X pathtemp[strlen(rawfn)-strlen(buf)]='\0';
X
X makefnam(buf,".arc",nametemp); /* make 'arcname' */
X sprintf(arcname,"%s%s",pathtemp,nametemp);
X
X makefnam(buf,".bak",nametemp); /* make 'bakname' */
X sprintf(bakname,"%s%s",pathtemp,nametemp);
X
X sprintf(arctemp,"%s.Arc%ld",pathtemp,getpid());
X }
X else /* not so much to do */
X {
X makefnam(rawfn,".arc",arcname);
X makefnam(rawfn,".bak",bakname);
X
X sprintf(arctemp,".Arc%ld",getpid());
X }
X sprintf(newname,"%s.arc",arctemp);
X}
X
Xonintr() /* SIGNAL was caught */
X{
X abort("User Requested Abort");
X}
X
X/*
X * This function sorts the command line file arguments. Needed since
X * the add, update, etc., function does no sorting, and could result in
X * multiple archive entries for the same file name.
X */
Xsortarg(num,arg) /* sort argument list, remove dups */
Xint num;
Xchar *arg[];
X{
X char *temp; /* temporary pointer */
X INT top, seek; /* placeholders for sorting */
X INT dups = 0; /* how many duplicates are there */
X char *strrchr(), *i; /* string indexing stuff */
X char *buf1, *buf2; /* pointers for strcmp to use */
X
X /* sort the arguments, ignoring pathnames */
X
X for (top = 0;top < num-1;top++)
X for (seek = top+1;seek<num;seek++)
X {
X buf1 = arg[top];
X buf2 = arg[seek];
X if (i = strrchr(arg[top],'/')) buf1 = i + 1;
X if (i = strrchr(arg[seek],'/')) buf2 = i + 1;
X if (strcmp(buf1,buf2) > 0)
X {
X temp = arg[top];
X arg[top] = arg[seek];
X arg[seek] = temp;
X }
X }
X
X /* find any occurences of 'arcname', and remove them */
X
X for (top = 0;top < num;top++)
X while (strcmp(arg[top],arcname) == 0)
X {
X for (seek = top; seek < num;seek++)
X arg[seek] = arg[seek + 1];
X arg[--num] = '\0';
X dups++;
X }
X
X /* find any other duplicate arguments (ignoring pathnames), */
X /* and remove the second and subsequent occurences */
X
X for (top = 0;top < num-1;top++)
X {
X buf1 = arg[top];
X buf2 = arg[top + 1];
X if (i = strrchr(arg[top],'/')) buf1 = i + 1;
X if (i = strrchr(arg[top + 1],'/')) buf2 = i + 1;
X while (strcmp(buf1,buf2) == 0)
X {
X for (seek = top + 1;seek < num;seek++)
X arg[seek] = arg[seek + 1];
X arg[--num] = '\0';
X buf2 = arg[top + 1];
X if (i = strrchr(arg[top + 1],'/')) buf2 = i + 1;
X dups++;
X }
X }
X return(dups); /* tell main() how many we found */
X}
SHAR_EOF
if test 5966 -ne "`wc -c < 'arcmisc.c'`"
then
echo shar: "error transmitting 'arcmisc.c'" '(should have been 5966 characters)'
fi
fi
echo shar: "extracting 'arcpack.c'" '(9147 characters)'
if test -f 'arcpack.c'
then
echo shar: "will not over-write existing file 'arcpack.c'"
else
sed 's/^ X//' << \SHAR_EOF > 'arcpack.c'
X/*
X * arcpack.c 1.2
X *
X * Author: Thom Henderson
X * Original System V port: Mike Stump
X * Enhancements, Bug fixes, and cleanup: Chris Seaman
X * Date: Fri Mar 20 09:57:02 1987
X * Last Mod. 5/15/87
X *
X */
X
X/*
X * ARC - Archive utility - ARCPACK
X *
X * Version 3.37, created on 02/03/86 at 22:58:01
X *
X * (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X *
X * Description:
X * This file contains the routines used to compress a file
X * when placing it in an archive.
X */
X
X#include "arc.h"
X
X/* stuff for non-repeat packing */
X
X#define DLE 0x90 /* repeat sequence marker */
X
Xstatic unsigned char state; /* current packing state */
X
X/* non-repeat packing states */
X
X#define NOHIST 0 /* don't consider previous input*/
X#define SENTCHAR 1 /* lastchar set, no lookahead yet */
X#define SENDNEWC 2 /* run over, send new char next */
X#define SENDCNT 3 /* newchar set, send count next */
X
X/* packing results */
X
Xstatic long stdlen; /* length for standard packing */
Xstatic INT crcval; /* CRC check value */
X
XINT pack(f,t,hdr) /* pack file into an archive */
XFILE *f, *t; /* source, destination */
Xstruct heads *hdr; /* pointer to header data */
X{
X INT c; /* one character of stream */
X long ncrlen; /* length after packing */
X long huflen; /* length after squeezing */
X long lzwlen; /* length after crunching */
X long pred_sq(), file_sq(); /* stuff for squeezing */
X long pred_cm(); /* dynamic crunching cleanup */
X char tnam[STRLEN]; /* temporary name buffer */
X FILE *crn = NULL; /* temporary crunch file */
X INT getch();
X INT getc_ncr();
X INT putc_pak();
X
X /* first pass - see which method is best */
X
X if (!nocomp) /* if storage kludge not active */
X {
X if (note)
X {
X printf(" analyzing, ");
X fflush(stdout);
X }
X
X sprintf(tnam,"%s.crn",arctemp);
X crn = fopen(tnam,"w+");
X state = NOHIST; /* initialize ncr packing */
X stdlen = ncrlen = 0; /* reset size counters */
X crcval = 0; /* initialize CRC check value */
X setcode(); /* initialize encryption */
X
X init_cm(f,crn); /* initialize for crunching */
X init_sq(); /* initialize for squeeze scan */
X
X while ((c=getc_ncr(f))!=EOF) /* for each byte of file */
X {
X ncrlen++; /* one more packed byte */
X scan_sq(c); /* see what squeezing can do */
X putc_cm(c,crn); /* see what crunching can do */
X }
X huflen = pred_sq(); /* finish up after squeezing */
X lzwlen = pred_cm(crn); /* finish up after crunching */
X }
X else /* else kludge the method */
X {
X stdlen = 0; /* make standard look best */
X ncrlen = huflen = lzwlen = 1;
X }
X
X /* standard set-ups common to all methods */
X
X fseek(f,0L,0); /* rewind input */
X hdr->crc = crcval; /* note CRC check value */
X hdr->length = stdlen; /* set actual file length */
X state = NOHIST; /* reinitialize ncr packing */
X setcode(); /* reinitialize encryption */
X
X /* choose and use the shortest method */
X
X if (stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen)
X {
X if (note) /* store w/out compression */
X {
X printf("storing, ");
X fflush(stdout);
X }
X hdrver = 2; /* note packing method */
X stdlen = crcval = 0; /* recalc these for kludge */
X while ((c=getch(f))!=EOF) /* store it straight */
X putc_pak(c,t);
X hdr->crc = crcval;
X hdr->length = hdr->size = stdlen;
X }
X else if (ncrlen<huflen && ncrlen<lzwlen)
X {
X if (note) /* pack w/repeat suppression */
X {
X printf("packing, ");
X fflush(stdout);
X }
X hdrver = 3; /* note packing method */
X hdr->size = ncrlen; /* set data length */
X while ((c=getc_ncr(f))!=EOF)
X putc_pak(c,t);
X }
X else if (huflen<lzwlen)
X {
X if (note)
X {
X printf("squeezing, ");
X fflush(stdout);
X }
X hdrver = 4; /* note packing method */
X hdr->size = file_sq(f,t); /* note final size */
X }
X else
X {
X if (note)
X {
X printf("crunching, ");
X fflush(stdout);
X }
X hdrver = 8;
X hdr->size = lzwlen; /* size should not change */
X if (crn) /* if temp was created */
X {
X fseek(crn,0L,0); /* then copy over crunched temp */
X while ((c=fgetc(crn))!=EOF)
X putc_tst(c,t);
X }
X else /* else re-crunch */
X {
X init_cm(f,t);
X while ((c=getc_ncr(f))!=EOF)
X putc_cm(c,t);
X pred_cm(t); /* finish up after crunching */
X }
X }
X
X /* standard cleanups common to all methods */
X
X if (crn) /* get rid of crunch temporary */
X {
X fclose(crn);
X if (unlink(tnam) && warn)
X {
X printf("Cannot delete temporary file %s\n",tnam);
X nerrs++;
X }
X }
X if (note)
X printf("done.\n");
X}
X
X/*
X * Non-repeat compression - text is passed through normally, except that
X * a run of more than two is encoded as:
X *
X * <char> <DLE> <count>
X *
X * Special case: a count of zero indicates that the DLE is really a DLE,
X * not a repeat marker.
X */
X
XINT getc_ncr(f) /* get bytes with collapsed runs */
XFILE *f; /* file to get from */
X{
X static INT lastc; /* value returned on last call */
X static INT repcnt; /* repetition counter */
X static INT c; /* latest value seen */
X
X switch(state) /* depends on our state */
X {
X case NOHIST: /* no relevant history */
X state = SENTCHAR;
X return(lastc = getch(f)); /* remember the value next time */
X case SENTCHAR: /* char was sent. look ahead */
X switch(lastc) /* action depends on char */
X {
X case DLE: /* if we sent a real DLE */
X state = NOHIST; /* then start over again */
X return(0); /* but note that the DLE was real */
X case EOF: /* EOF is always a special case */
X return(EOF);
X default: /* else test for a repeat */
X for (repcnt=1; (c=getch(f))==lastc && repcnt<255; repcnt++)
X ; /* find end of run */
X switch(repcnt) /* action depends on run size */
X {
X case 1: /* not a repeat */
X return(lastc = c); /* but remember value next time */
X case 2: /* a repeat, but too short */
X state = SENDNEWC; /* send the second one next time */
X return(lastc);
X default: /* a run - compress it */
X state = SENDCNT; /* send repeat count next time */
X return(DLE); /* send repeat marker this time */
X }
X }
X case SENDNEWC: /* send second char of short run */
X state = SENTCHAR;
X return(lastc = c);
X case SENDCNT: /* sent DLE, now send count */
X state = SENDNEWC;
X return(repcnt);
X default:
X abort("Bug - bad ncr state\n");
X }
X}
X
Xstatic INT getch(f) /* special get char for packing */
XFILE *f; /* file to get from */
X{
X INT c; /* a char from the file */
X
X if ((c=fgetc(f))!=EOF) /* if not the end of file */
X {
X crcval = addcrc(crcval,c); /* then update CRC check value */
X stdlen++; /* and bump length counter */
X }
X
X return(c);
X}
X
XINT putc_pak(c,f) /* put a packed byte into archive */
Xchar c; /* byte to put */
XFILE *f; /* archive to put it in */
X{
X putc_tst(code(c),f); /* put encoded byte, with checks */
X}
SHAR_EOF
if test 9147 -ne "`wc -c < 'arcpack.c'`"
then
echo shar: "error transmitting 'arcpack.c'" '(should have been 9147 characters)'
fi
fi
exit 0
# End of shell archive
--
Brandon S. Allbery {decvax,cbatt,cbosgd}!cwruecmp!ncoast!allbery
Tridelta Industries {ames,mit-eddie,talcott}!necntc!ncoast!allbery
7350 Corporate Blvd. necntc!ncoast!allbery@harvard.HARVARD.EDU
Mentor, OH 44060 +01 216 255 1080 (also eddie.MIT.EDU)