[comp.binaries.ibm.pc] ARC for SYS V: Part 5 of 5

boneill@hawk.ulowell.edu (SoftXc Coordinator) (03/26/88)
Article 27 of comp.sources.misc:
Path: brl-smoke!brl-adm!umd5!mimsy!seismo!rutgers!clyde!cbatt!cwruecmp!hal!ncoast!allbery
From: allbery@ncoast.UUCP (Brandon S. Allbery)
Newsgroups: comp.sources.misc
Subject: System V ARC bug fixes (2 of 2)
Message-ID: <265@cpsc6b.cpsc6a.ATT.COM>
Date: 16 May 87 06:06:51 GMT
Distribution: usa
Organization: AT&T (CPSC), Oakland, CA
Lines: 1342
Keywords: arc
Approved: allbery@ncoast.UUCP


[ Line eater?  Baloney!  There's no such animal (IS THERE?) ]

See part 1 for details...

Chris Seaman
..!ihnp4!cpsc6a!crs

---------------------------- CUT HERE ------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	arclzw.c
#	arcmisc.c
#	arcpack.c
# This archive created: Fri May 15 22:57:28 1987
# By:	C. R. Seaman ()
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'arclzw.c'" '(25543 characters)'
if test -f 'arclzw.c'
then
	echo shar: "will not over-write existing file 'arclzw.c'"
else
sed 's/^	X//' << \SHAR_EOF > 'arclzw.c'
	X/*
	X *	arclzw.c	1.2
	X *
	X *	Author: Thom Henderson
	X *	Original System V port: Mike Stump
	X *	Enhancements, Bug fixes, and cleanup: Chris Seaman
	X *	Date: Fri Mar 20 09:57:02 1987
	X *	Last Mod.	5/15/87
	X *
	X */
	X
	X/*
	X * ARC - Archive utility - ARCLZW
	X * 
	X * Version 1.88, created on 01/20/86 at 16:47:04
	X * 
	X * (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
	X * 
	X *     Description:
	X *          This file contains the routines used to implement Lempel-Zev
	X *          data compression, which calls for building a coding table on
	X *          the fly.  This form of compression is especially good for encoding
	X *          files which contain repeated strings, and can often give dramatic
	X *          improvements over traditional Huffman SQueezing.
	X * 
	X *     Programming notes:
	X *          In this section I am drawing heavily on the COMPRESS program
	X *          from UNIX.  The basic method is taken from "A Technique for High
	X *          Performance Data Compression", Terry A. Welch, IEEE Computer
	X *          Vol 17, No 6 (June 1984), pp 8-19.  Also see "Knuth's Fundamental
	X *          Algorithms", Donald Knuth, Vol 3, Section 6.4.
	X * 
	X *          As best as I can tell, this method works by tracing down a hash
	X *          table of code strings where each entry has the property:
	X * 
	X *               if <string> <char> is in the table
	X *               then <string> is in the table.
	X */
	X
	X#include "arc.h"
	X
	X/* definitions for older style crunching */
	X
	X#define FALSE    0
	X#define TRUE     !FALSE
	X#define TABSIZE  4096
	X#define NO_PRED  0xFFFF
	X#define EMPTY    0xFFFF
	X#define NOT_FND  0xFFFF
	X
	X/* Changed unsigned INT to int -- is compared against EOF (-1) -- crs */
	Xstatic int inbuf;                      /* partial input code storage */
	Xstatic INT sp;                         /* current stack pointer */
	X
	Xstatic struct entry {                  /* string table entry format */
	X    char used;                         /* true when this entry is in use */
	X    unsigned INT next;                 /* ptr to next in collision list */
	X    unsigned INT predecessor;          /* code for preceeding string */
	X    unsigned char follower;            /* char following string */
	X}   string_tab[TABSIZE];               /* the code string table */
	X
	X
	X/* definitions for the new dynamic Lempel-Zev crunching */
	X
	X#define BITS   12                      /* maximum bits per code */
	X#define HSIZE  5003                    /* 80% occupancy */
	X#define INIT_BITS 9                    /* initial number of bits/code */
	X
	Xstatic INT n_bits;                     /* number of bits/code */
	Xstatic INT maxcode;                    /* maximum code, given n_bits */
	X#define MAXCODE(n)      ((1<<(n)) - 1) /* maximum code calculation */
	Xstatic INT maxcodemax =  1 << BITS;    /* largest possible code (+1) */
	X
	Xstatic unsigned char buf[BITS];        /* input/output buffer */
	X
	Xstatic unsigned char lmask[9] = {      /* left side masks */
	X    0xff, 0xfe, 0xfc,
	X    0xf8, 0xf0, 0xe0,
	X    0xc0, 0x80, 0x00
	X};
	Xstatic unsigned char rmask[9] = {      /* right side masks */
	X    0x00, 0x01, 0x03,
	X    0x07, 0x0f, 0x1f,
	X    0x3f, 0x7f, 0xff
	X};
	X
	Xstatic INT offset;                     /* byte offset for code output */
	Xstatic long in_count;                  /* length of input */
	Xstatic long bytes_out;                 /* length of compressed output */
	Xstatic unsigned INT ent;
	X
	X/*
	X * To save much memory (which we badly need at this point), we overlay
	X * the table used by the previous version of Lempel-Zev with those used
	X * by the new version.  Since no two of these routines will be used
	X * together, we can safely do this.  Note that the tables used for Huffman
	X * squeezing may NOT overlay these, since squeezing and crunching are done
	X * in parallel.
	X */
	X
	Xstatic long htab[HSIZE];               /* hash code table   (crunch) */
	Xstatic unsigned INT codetab[HSIZE];    /* string code table (crunch) */
	X
	Xstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
	X
	Xstatic unsigned char suffix[HSIZE];    /* suffix table (uncrunch) */
	Xstatic INT free_ent;                   /* first unused entry */
	Xstatic INT firstcmp;                   /* true at start of compression */
	Xstatic unsigned char stack[HSIZE];     /* local push/pop stack */
	X
	X/*
	X * block compression parameters -- after all codes are used up,
	X * and compression rate changes, start over.
	X */
	X
	Xstatic INT clear_flg;
	Xstatic long ratio;
	X#define CHECK_GAP 10000                /* ratio check interval */
	Xstatic long checkpoint;
	X
	X/*
	X * the next two codes should not be changed lightly, as they must not
	X * lie within the contiguous general code space.
	X */
	X
	X#define FIRST   257                    /* first free entry */
	X#define CLEAR   256                    /* table clear output code */
	X
	Xstatic INT cl_block(t)                 /* table clear for block compress */
	XFILE *t;                               /* our output file */
	X{
	X    long rat;
	X    INT putcode();
	X
	X    checkpoint = in_count + CHECK_GAP;
	X
	X    if (in_count > 0x007fffff)         /* shift will overflow */
	X    {
	X        rat = bytes_out >> 8;
	X        if (rat == 0)                  /* Don't divide by zero */
	X            rat = 0x7fffffff;
	X        else
	X            rat = in_count / rat;
	X    }
	X    else
	X        rat = (in_count<<8)/bytes_out;/* 8 fractional bits */
	X
	X    if (rat > ratio)
	X        ratio = rat;
	X    else
	X    {
	X        ratio = 0;
	X        setmem(htab,HSIZE*sizeof(long),0xff);
	X        free_ent = FIRST;
	X        clear_flg = 1;
	X        putcode(CLEAR,t);
	X    }
	X}
	X
	X/*
	X * Output a given code.
	X * Inputs:
	X *      code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes
	X *              that n_bits =< (long)wordsize - 1.
	X * Outputs:
	X *      Outputs code to the file.
	X * Assumptions:
	X *      Chars are 8 bits long.
	X * Algorithm:
	X *      Maintain a BITS character long buffer (so that 8 codes will
	X * fit in it exactly).  When the buffer fills up empty it and start over.
	X */
	X
	Xstatic INT putcode(code,t)             /* output a code */
	XINT code;                              /* code to output */
	XFILE *t;                               /* where to put it */
	X{
	X    INT r_off = offset;                /* right offset */
	X    INT bits = n_bits;                 /* bits to go */
	X    unsigned char *bp = buf;           /* buffer pointer */
	X    INT n;                             /* index */
	X
	X    if (code >= 0)                     /* if a real code */
	X    {
	X        bp += (r_off >> 3);            /* Get to the first byte. */
	X        r_off &= 7;
	X
	X        /*
	X         * Since code is always >= 8 bits, only need to mask the first
	X         * hunk on the left.
	X         */
	X
	X        *bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off];
	X        bp++;
	X        bits -= (8 - r_off);
	X        code >>= (8 - r_off);
	X
	X        /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	X
	X        if (bits >= 8)
	X        {
	X            *bp++ = code;
	X            code >>= 8;
	X            bits -= 8;
	X        }
	X
	X        /* Last bits. */
	X
	X        if (bits)
	X            *bp = code;
	X
	X        offset += n_bits;
	X
	X        if (offset == (n_bits << 3))
	X        {
	X            bp = buf;
	X            bits = n_bits;
	X            bytes_out += bits;
	X            do
	X                putc_pak(*bp++,t);
	X            while (--bits);
	X            offset = 0;
	X        }
	X
	X        /*
	X         * If the next entry is going to be too big for the code size,
	X         * then increase it, if possible.
	X         */
	X
	X        if (free_ent>maxcode || clear_flg>0)
	X        {
	X            /*
	X             * Write the whole buffer, because the input side won't
	X             * discover the size increase until after it has read it.
	X             */
	X
	X            if (offset > 0)
	X            {
	X                bp = buf;              /* reset pointer for writing */
	X                bytes_out += n = n_bits;
	X                while (n--)
	X                    putc_pak(*bp++,t);
	X            }
	X            offset = 0;
	X
	X            if (clear_flg)             /* reset if clearing */
	X            {
	X                maxcode = MAXCODE(n_bits = INIT_BITS);
	X                clear_flg = 0;
	X            }
	X            else                       /* else use more bits */
	X            {
	X                n_bits++;
	X                if (n_bits == BITS)
	X                    maxcode = maxcodemax;
	X                else
	X                    maxcode = MAXCODE(n_bits);
	X            }
	X        }
	X    }
	X    else                               /* dump the buffer on EOF */
	X    {
	X        bytes_out += n = (offset+7) / 8;
	X
	X        if (offset > 0)
	X            while (n--)
	X                putc_pak(*bp++,t);
	X        offset = 0;
	X    }
	X}
	X
	X/*
	X * Read one code from the standard input.  If EOF, return -1.
	X * Inputs:
	X *      cmpin
	X * Outputs:
	X *      code or -1 is returned.
	X */
	X
	Xstatic INT getcode(f)                  /* get a code */
	XFILE *f;                               /* file to get from */
	X{
	X    INT code;
	X    static INT offset = 0, size = 0;
	X    INT r_off, bits;
	X    unsigned char *bp = buf;
	X
	X    if (clear_flg > 0 || offset >= size || free_ent > maxcode)
	X    {
	X        /*
	X         * If the next entry will be too big for the current code
	X         * size, then we must increase the size.  This implies reading
	X         * a new buffer full, too.
	X         */
	X
	X        if (free_ent > maxcode)
	X        {
	X            n_bits++;
	X            if (n_bits == BITS)
	X                maxcode = maxcodemax;  /* won't get any bigger now */
	X            else
	X                maxcode = MAXCODE(n_bits);
	X        }
	X        if (clear_flg > 0)
	X        {
	X            maxcode = MAXCODE(n_bits = INIT_BITS);
	X            clear_flg = 0;
	X        }
	X
	X        for (size=0; size<n_bits; size++)
	X        {
	X            if ((code=getc_unp(f))==EOF)
	X                break;
	X            else
	X                buf[size] = code;
	X        }
	X        if (size <= 0)
	X            return(-1);                /* end of file */
	X
	X        offset = 0;
	X
	X        /* Round size down to integral number of codes */
	X
	X        size = (size << 3)-(n_bits - 1);
	X    }
	X    r_off = offset;
	X    bits = n_bits;
	X
	X    /* Get to the first byte. */
	X
	X    bp +=(r_off >> 3);
	X    r_off &= 7;
	X
	X    /* Get first part (low order bits) */
	X
	X    code = (*bp++ >> r_off);
	X    bits -= 8 - r_off;
	X    r_off = 8 - r_off;                 /* now, offset into code word */
	X
	X    /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	X
	X    if (bits >= 8)
	X    {
	X        code |= *bp++ << r_off;
	X        r_off += 8;
	X        bits -= 8;
	X    }
	X
	X    /* high order bits. */
	X
	X    code |= (*bp & rmask[bits]) << r_off;
	X    offset += n_bits;
	X
	X    return(code);
	X}
	X
	X/*
	X * compress a file
	X *
	X * Algorithm:  use open addressing double hashing (no chaining) on the
	X * prefix code / next character combination.  We do a variant of Knuth's
	X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
	X * secondary probe.  Here, the modular division first probe is gives way
	X * to a faster exclusive-or manipulation.  Also do block compression with
	X * an adaptive reset, where the code table is cleared when the compression
	X * ratio decreases, but after the table fills.  The variable-length output
	X * codes are re-sized at this point, and a special CLEAR code is generated
	X * for the decompressor.
	X */
	X
	XINT init_cm(f,t)                       /* initialize for compression */
	XFILE *f;                               /* file we will be compressing */
	XFILE *t;                               /* where we will put it */
	X{
	X    offset = 0;
	X    bytes_out = 1;
	X    clear_flg = 0;
	X    ratio = 0;
	X    in_count = 1;
	X    checkpoint = CHECK_GAP;
	X    maxcode = MAXCODE(n_bits = INIT_BITS);
	X    free_ent = FIRST;
	X    setmem(htab,HSIZE*sizeof(long),0xff);
	X    n_bits = INIT_BITS;                /* set starting code size */
	X
	X    putc_pak(BITS,t);                  /* note our max code length */
	X
	X    firstcmp = 1;                      /* next byte will be first */
	X}
	X
	XINT putc_cm(c,t)                       /* compress a character */
	Xunsigned char c;                       /* character to compress */
	XFILE *t;                               /* where to put it */
	X{
	X    static long fcode;
	X    static INT hshift;
	X    INT i;
	X    INT disp;
	X
	X    if (firstcmp)                      /* special case for first byte */
	X    {
	X        ent = c;                       /* remember first byte */
	X
	X        hshift = 0;
	X        for (fcode=(long)HSIZE;  fcode<65536L; fcode*=2L)
	X            hshift++;
	X        hshift = 8 - hshift;           /* set hash code range bund */
	X
	X        firstcmp = 0;                  /* no longer first */
	X        return;
	X    }
	X
	X    in_count++;
	X    fcode =(long)(((long)c << BITS)+ent);
	X    i = (c<<hshift)^ent;               /* xor hashing */
	X
	X    if (htab[i]==fcode)
	X    {
	X        ent = codetab[i];
	X        return;
	X    }
	X    else if (htab[i]<0)                /* empty slot */
	X        goto nomatch;
	X    disp = HSIZE - i;                  /* secondary hash (after G.Knott) */
	X    if (i == 0)
	X        disp = 1;
	X
	Xprobe:
	X    if ((i -= disp) < 0)
	X        i += HSIZE;
	X
	X    if (htab[i] == fcode)
	X    {
	X        ent = codetab[i];
	X        return;
	X    }
	X    if (htab[i] > 0)
	X        goto probe;
	X
	Xnomatch:
	X    putcode(ent,t);
	X    ent = c;
	X    if (free_ent < maxcodemax)
	X    {
	X        codetab[i] = free_ent++;       /* code -> hashtable */
	X        htab[i] = fcode;
	X    }
	X    else if ((long)in_count >= checkpoint)
	X        cl_block(t);
	X}
	X
	Xlong pred_cm(t)                        /* finish compressing a file */
	XFILE *t;                               /* where to put it */
	X{
	X    putcode(ent,t);                    /* put out the final code */
	X    putcode(-1,t);                     /* tell output we are done */
	X
	X    return(bytes_out);                 /* say how big it got */
	X}
	X
	X/*
	X * Decompress a file.  This routine adapts to the codes in the file
	X * building the string table on-the-fly; requiring no table to be stored
	X * in the compressed file.  The tables used herein are shared with those of
	X * the compress() routine.  See the definitions above.
	X */
	X
	XINT decomp(f,t)                        /* decompress a file */
	XFILE *f;                               /* file to read codes from */
	XFILE *t;                               /* file to write text to */
	X{
	X    unsigned char *stackp;
	X    INT finchar;
	X    INT code, oldcode, incode;
	X
	X    if ((code=getc_unp(f))!=BITS)
	X        abort("File packed with %d bits, I can only handle %d",code,BITS);
	X
	X    n_bits = INIT_BITS;                /* set starting code size */
	X    clear_flg = 0;
	X
	X    /* As above, initialize the first 256 entries in the table. */
	X
	X    maxcode = MAXCODE(n_bits=INIT_BITS);
	X    for (code = 255; code >= 0; code--)
	X    {
	X        prefix[code] = 0;
	X        suffix[code] = (unsigned char)code;
	X    }
	X    free_ent = FIRST;
	X
	X    finchar = oldcode = getcode(f);
	X    if (oldcode == -1)                 /* EOF already? */
	X        return;                        /* Get out of here */
	X    putc_ncr((char)finchar,t);         /* first code must be 8 bits=char */
	X    stackp = stack;
	X
	X    while ((code = getcode(f))> -1)
	X    {
	X        if (code==CLEAR)
	X        {
	X            for (code = 255; code >= 0; code--)
	X                prefix[code] = 0;
	X            clear_flg = 1;
	X            free_ent = FIRST - 1;
	X            if ((code=getcode(f))==-1) /* O, untimely death! */
	X                break;
	X        }
	X        incode = code;
	X
	X        /* Special case for KwKwK string. */
	X
	X        if (code >= free_ent)
	X        {
	X            *stackp++ = finchar;
	X            code = oldcode;
	X        }
	X
	X        /* Generate output characters in reverse order */
	X
	X        while (code >= 256)
	X        {
	X            *stackp++ = suffix[code];
	X            code = prefix[code];
	X        }
	X        *stackp++ = finchar = suffix[code];
	X
	X        /* And put them out in forward order */
	X
	X        do
	X            putc_ncr(*--stackp,t);
	X        while (stackp > stack);
	X
	X        /* Generate the new entry. */
	X
	X        if ((code=free_ent) < maxcodemax)
	X        {
	X            prefix[code] = (unsigned short)oldcode;
	X            suffix[code] = finchar;
	X            free_ent = code+1;
	X        }
	X
	X        /* Remember previous code. */
	X
	X        oldcode = incode;
	X    }
	X}
	X
	X
	X/*
	X * Please note how much trouble it can be to maintain upwards
	X * compatibility.  All that follows is for the sole purpose of unpacking
	X * files which were packed using an older method.
	X */
	X
	X
	X/*
	X *  The h() pointer points to the routine to use for calculating a hash
	X *  value.  It is set in the init routines to point to either of oldh()
	X *  or newh().
	X *
	X *  oldh() calculates a hash value by taking the middle twelve bits
	X *  of the square of the key.
	X *
	X *  newh() works somewhat differently, and was tried because it makes
	X *  ARC about 23% faster.  This approach was abandoned because dynamic
	X *  Lempel-Zev (above) works as well, and packs smaller also.  However,
	X *  inadvertent release of a developmental copy forces us to leave this in.
	X */
	X
	Xstatic unsigned INT (*h)();            /* pointer to hash function */
	X
	Xstatic unsigned INT oldh(pred,foll)    /* old hash function */
	Xunsigned INT pred;                     /* code for preceeding string */
	Xunsigned char foll;                    /* value of following char */
	X{
	X    long local;                        /* local hash value */
	X
	X    local = (pred + foll) | 0x0800;    /* create the hash key */
	X    local *= local;                    /* square it */
	X    return((local >> 6) & 0x0FFF);     /* return the middle 12 bits */
	X}
	X
	Xstatic unsigned INT newh(pred,foll)    /* new hash function */
	Xunsigned INT pred;                     /* code for preceeding string */
	Xunsigned char foll;                    /* value of following char */
	X{
	X    return(((pred+foll)*15073)&0xFFF); /* faster hash */
	X}
	X
	X/*
	X *  The eolist() function is used to trace down a list of entries with
	X *  duplicate keys until the last duplicate is found.
	X */
	X
	Xstatic unsigned INT eolist(index)      /* find last duplicate */
	Xunsigned INT index;
	X{
	X    INT temp;
	X
	X    while (temp=string_tab[index].next) /* while more duplicates */
	X        index = temp;
	X
	X    return(index);
	X}
	X
	X/*
	X *  The hash() routine is used to find a spot in the hash table for a new
	X *  entry.  It performs a "hash and linear probe" lookup, using h() to
	X *  calculate the starting hash value and eolist() to perform the linear
	X *  probe.  This routine DOES NOT detect a table full condition.  That
	X *  MUST be checked for elsewhere.
	X */
	X
	Xstatic unsigned INT hash(pred,foll)    /* find spot in the string table */
	Xunsigned INT pred;                     /* code for preceeding string */
	Xunsigned char foll;                    /* char following string */
	X{
	X    unsigned INT local, tempnext;      /* scratch storage */
	X    struct entry *ep;                  /* allows faster table handling */
	X
	X    local = (*h)(pred,foll);           /* get initial hash value */
	X
	X    if (!string_tab[local].used)       /* if that spot is free */
	X        return(local);                 /* then that's all we need */
	X    else                               /* else a collision has occured */
	X    {
	X        local = eolist(local);         /* move to last duplicate */
	X
	X        /*
	X         * We must find an empty spot. We start looking 101 places
	X         * down the table from the last duplicate.
	X         */
	X
	X        tempnext = (local+101) & 0x0FFF;
	X        ep = &string_tab[tempnext];    /* initialize pointer */
	X
	X        while (ep->used)               /* while empty spot not found */
	X        {
	X            if (++tempnext==TABSIZE)   /* if we are at the end */
	X            {
	X                tempnext = 0;          /* wrap to beginning of table*/
	X                ep = string_tab;
	X            }
	X            else
	X                ++ep;                  /* point to next element in table */
	X        }
	X
	X        /*
	X         * local still has the pointer to the last duplicate, while
	X         * tempnext has the pointer to the spot we found.  We use
	X         * this to maintain the chain of pointers to duplicates.
	X         */
	X
	X        string_tab[local].next = tempnext;
	X
	X        return(tempnext);
	X    }
	X}
	X
	X/*
	X * The init_tab() routine is used to initialize our hash table.
	X * You realize, of course, that "initialize" is a complete misnomer.
	X */
	X
	Xstatic INT init_tab()                  /* set ground state in hash table */
	X{
	X    unsigned INT i;                    /* table index */
	X    INT upd_tab();
	X
	X    setmem((char *)string_tab,sizeof(string_tab),0);
	X
	X    for (i=0; i<256; i++)              /* list all single byte strings */
	X        upd_tab(NO_PRED,i);
	X
	X    inbuf = EMPTY;                     /* nothing is in our buffer */
	X}
	X
	X/*
	X * The upd_tab routine is used to add a new entry to the string table.
	X * As previously stated, no checks are made to ensure that the table
	X * has any room.  This must be done elsewhere.
	X */
	X
	XINT upd_tab(pred,foll)                 /* add an entry to the table */
	Xunsigned INT pred;                     /* code for preceeding string */
	Xunsigned INT foll;                     /* character which follows string */
	X{
	X    struct entry *ep;                  /* pointer to current entry */
	X
	X    /* calculate offset just once */
	X
	X    ep = &string_tab[hash(pred,foll)];
	X
	X    ep->used = TRUE;                   /* this spot is now in use */
	X    ep->next = 0;                      /* no duplicates after this yet */
	X    ep->predecessor = pred;            /* note code of preceeding string */
	X    ep->follower = foll;               /* note char after string */
	X}
	X
	X/*
	X * This algorithm encoded a file into twelve bit strings (three nybbles).
	X * The gocode() routine is used to read these strings a byte (or two)
	X * at a time.
	X */
	X
	Xstatic INT gocode(fd)                  /* read in a twelve bit code */
	XFILE *fd;                              /* file to get code from */
	X{
	X    /* Changed unsigned INT to int -- is compared against EOF (-1) -- crs */
	X    int localbuf, returnval;
	X
	X    if (inbuf==EMPTY)                  /* if on a code boundary */
	X    {
	X        if ((localbuf=getc_unp(fd))==EOF) /* get start of next code */
	X            return(EOF);               /* pass back end of file status */
	X        localbuf &= 0xFF;              /* mask down to true byte value */
	X        if ((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
	X            return(EOF);               /* this should never happen */
	X        inbuf &= 0xFF;                 /* mask down to true byte value */
	X
	X        returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
	X        inbuf &= 0x000F;               /* leave partial code pending */
	X    }
	X    else                               /* buffer contains first nybble */
	X    {
	X        if ((localbuf=getc_unp(fd))==EOF)
	X            return(EOF);
	X        localbuf &= 0xFF;
	X
	X        returnval = localbuf + ((inbuf<<8)&0xF00);
	X        inbuf = EMPTY;                 /* note no hanging nybbles */
	X    }
	X    return(returnval);                 /* pass back assembled code */
	X}
	X
	Xstatic INT push(c)                     /* push char onto stack */
	XINT c;                                 /* character to push */
	X{
	X    stack[sp] = ((char) c);            /* coerce integer into a char */
	X
	X    if (++sp >= TABSIZE)
	X        abort("Stack overflow\n");
	X}
	X
	Xstatic INT pop()                       /* pop character from stack */
	X{
	X    if (sp>0)
	X        return(((INT) stack[--sp]));   /* leave ptr at next empty slot */
	X    else
	X        return(EMPTY);
	X}
	X
	X/***** LEMPEL-ZEV DECOMPRESSION *****/
	X
	Xstatic INT code_count;                 /* needed to detect table full */
	Xstatic INT firstc;                     /* true only on first character */
	X
	XINT init_ucr(new)                      /* get set for uncrunching */
	XINT new;                               /* true to use new hash function */
	X{
	X    if (new)                           /* set proper hash function */
	X        h = newh;
	X    else
	X        h = oldh;
	X
	X    sp = 0;                            /* clear out the stack */
	X    init_tab();                        /* set up atomic code definitions */
	X    code_count = TABSIZE - 256;        /* note space left in table */
	X    firstc = 1;                        /* true only on first code */
	X}
	X
	XINT getc_ucr(f)                        /* get next uncrunched byte */
	XFILE *f;                               /* file containing crunched data */
	X{
	X    INT code, newcode;
	X    static INT oldcode, finchar;
	X    struct entry *ep;                  /* allows faster table handling */
	X
	X    if (firstc)                        /* first code is always known */
	X    {
	X        firstc = FALSE;                /* but next will not be first */
	X        oldcode = gocode(f);
	X        return(finchar = string_tab[oldcode].follower);
	X    }
	X
	X    if (!sp)                           /* if stack is empty */
	X    {
	X        if ((code=newcode=gocode(f))==EOF)
	X            return(EOF);
	X
	X        ep = &string_tab[code];        /* initialize pointer */
	X
	X        if (!ep->used)                 /* if code isn't known */
	X        {
	X            code = oldcode;
	X            ep = &string_tab[code];    /* re-initialize pointer */
	X            push(finchar);
	X        }
	X
	X        while (ep->predecessor!=NO_PRED)
	X        {
	X            push(ep->follower);        /* decode string backwards */
	X            code = ep->predecessor;
	X            ep = &string_tab[code];
	X        }
	X
	X        push(finchar=ep->follower);    /* save first character also */
	X
	X        /*
	X         * The above loop will terminate, one way or another,
	X         * with string_tab[code].follower equal to the first
	X         * character in the string.
	X         */
	X
	X        if (code_count)                /* if room left in string table */
	X        {
	X            upd_tab(oldcode,finchar);
	X            --code_count;
	X        }
	X
	X        oldcode = newcode;
	X    }
	X
	X    return(pop());                     /* return saved character */
	X}
SHAR_EOF
if test 25543 -ne "`wc -c < 'arclzw.c'`"
then
	echo shar: "error transmitting 'arclzw.c'" '(should have been 25543 characters)'
fi
fi
echo shar: "extracting 'arcmisc.c'" '(5966 characters)'
if test -f 'arcmisc.c'
then
	echo shar: "will not over-write existing file 'arcmisc.c'"
else
sed 's/^	X//' << \SHAR_EOF > 'arcmisc.c'
	X/*
	X *	arcmisc.c	1.2
	X *
	X *	Author: Thom Henderson
	X *	Original System V port: Mike Stump
	X *	Enhancements, Bug fixes, and cleanup: Chris Seaman
	X *	Date: Fri Mar 20 09:57:02 1987
	X *	Last Mod.	5/15/87
	X *
	X */
	X
	X/*
	X * ARC - Archive utility - ARCMISC
	X * 
	X * Description:
	X *      This file contains miscellaneous routines for string
	X *      management, file management, and program control.
	X */
	X
	X#include "arc.h"
	X#include <ctype.h>
	X
	XINT rempath(nargs,arg)               /* remove paths from filenames */
	XINT nargs;                           /* number of names */
	Xchar *arg[];                         /* pointers to names */
	X{
	X    char *i, *strrchr();             /* string index, reverse indexer */
	X    INT n;                           /* index */
	X
	X    for(n=0; n<nargs; n++)           /* for each supplied name */
	X    {
	X        i=strrchr(arg[n],'/');       /* search for end of path */
	X        if(i)                        /* if path was found */
	X            arg[n] = i+1;            /* then skip it */
	X    }
	X}
	X
	X/* make a file name using a template */
	Xmakefnam(rawfn,template,result)
	Xchar *rawfn;                         /* the original file name */
	Xchar *template;                      /* the template data */
	Xchar *result;                        /* where to place the result */
	X{
	X    char *arc_ext = ".arc";          /* possible existing extension */
	X    char *i, *strrchr();             /* string indexing stuff */
	X
	X    setmem(result, STRLEN, '\0');    /* need to zero out result for strncpy */
	X
	X    if (i = strrchr(rawfn,'.'))      /* strip 'arc' extension from filename */
	X        if (strcmp(i,arc_ext) == 0) *i = '\0';
	X
	X    strncpy(result,rawfn,10);        /* rebuild it using supplied template */
	X    strcat(result,template);
	X    return;
	X}
	X
	X/*  convert a string to upper case  */
	Xupper(s)
	Xchar *s;
	X{
	X    while (*s)
	X    {
	X        *s = toupper(*s);
	X        ++s;
	X    }
	X}
	X
	Xsetmem(dest,size,c)
	Xchar *dest,c;
	XINT size;
	X{
	X    int i;
	X
	X    for (i = 0; i < size; dest[i] = c, i++);
	X}
	X
	Xabort(s,arg1,arg2,arg3)                /* something went wrong...QUIT!! */
	Xchar *s;
	X{
	X    char tempname1[STRLEN], tempname2[STRLEN];
	X
	X    sprintf(tempname1,"%s.crn",arctemp);
	X    sprintf(tempname2,"%s.cvt",arctemp);
	X
	X    unlink(bakname);                   /* remove all possible temp files */
	X    unlink(newname);
	X    unlink(tempname1);
	X    unlink(tempname2);
	X
	X    fprintf(stderr,"arc: ");           /* explain things to the user */
	X    fprintf(stderr,s,arg1,arg2,arg3);
	X    fprintf(stderr,"\n");
	X    exit(1);                           /* quit */
	X}
	X
	Xrename(o, n)
	Xchar *o, *n;
	X{
	X    return(link(o, n) || unlink(o));
	X}
	X
	Xmakenames(rawfn)
	Xchar *rawfn;
	X{
	X    char pathtemp[STRLEN];             /* temporary path holder */
	X    char nametemp[STRLEN];             /* temporary arcname holder */
	X    char *buf;                         /* temporary pointer */
	X    char *i, *strrchr();               /* string indexing junk */
	X    long getpid();                     /* process id function */
	X
	X    strcpy(pathtemp,rawfn);
	X    if (i = strrchr(buf=rawfn,'/'))    /* if names are part of paths */
	X    {                                  /* lots to do */
	X        buf=i+1;
	X        pathtemp[strlen(rawfn)-strlen(buf)]='\0';
	X
	X        makefnam(buf,".arc",nametemp); /* make 'arcname' */
	X        sprintf(arcname,"%s%s",pathtemp,nametemp);
	X
	X        makefnam(buf,".bak",nametemp); /* make 'bakname' */
	X        sprintf(bakname,"%s%s",pathtemp,nametemp);
	X
	X        sprintf(arctemp,"%s.Arc%ld",pathtemp,getpid());
	X    }
	X    else                               /* not so much to do */
	X    {
	X        makefnam(rawfn,".arc",arcname);
	X        makefnam(rawfn,".bak",bakname);
	X
	X        sprintf(arctemp,".Arc%ld",getpid());
	X    }
	X    sprintf(newname,"%s.arc",arctemp);
	X}
	X
	Xonintr()                               /* SIGNAL was caught */
	X{
	X    abort("User Requested Abort");
	X}
	X
	X/*
	X * This function sorts the command line file arguments.  Needed since
	X * the add, update, etc., function does no sorting, and could result in
	X * multiple archive entries for the same file name.
	X */
	Xsortarg(num,arg)                       /* sort argument list, remove dups */
	Xint num;
	Xchar *arg[];
	X{
	X    char *temp;                        /* temporary pointer */
	X    INT top, seek;                     /* placeholders for sorting */
	X    INT dups = 0;                      /* how many duplicates are there */
	X    char *strrchr(), *i;               /* string indexing stuff */
	X    char *buf1, *buf2;                 /* pointers for strcmp to use */
	X
	X    /* sort the arguments, ignoring pathnames */
	X
	X    for (top = 0;top < num-1;top++)
	X        for (seek = top+1;seek<num;seek++)
	X        {
	X            buf1 = arg[top];
	X            buf2 = arg[seek];
	X            if (i = strrchr(arg[top],'/')) buf1 = i + 1;
	X            if (i = strrchr(arg[seek],'/')) buf2 = i + 1;
	X            if (strcmp(buf1,buf2) > 0)
	X            {
	X                temp = arg[top];
	X                arg[top] = arg[seek];
	X                arg[seek] = temp;
	X            }
	X        }
	X
	X    /* find any occurences of 'arcname', and remove them */
	X
	X    for (top = 0;top < num;top++)
	X        while (strcmp(arg[top],arcname) == 0)
	X        {
	X            for (seek = top; seek < num;seek++)
	X                arg[seek] = arg[seek + 1];
	X            arg[--num] = '\0';
	X            dups++;
	X        }
	X
	X    /* find any other duplicate arguments (ignoring pathnames), */
	X    /* and remove the second and subsequent occurences */
	X
	X    for (top = 0;top < num-1;top++)
	X    {
	X        buf1 = arg[top];
	X        buf2 = arg[top + 1];
	X        if (i = strrchr(arg[top],'/')) buf1 = i + 1;
	X        if (i = strrchr(arg[top + 1],'/')) buf2 = i + 1;
	X        while (strcmp(buf1,buf2) == 0)
	X        {
	X            for (seek = top + 1;seek < num;seek++)
	X                arg[seek] = arg[seek + 1];
	X            arg[--num] = '\0';
	X            buf2 = arg[top + 1];
	X            if (i = strrchr(arg[top + 1],'/')) buf2 = i + 1;
	X            dups++;
	X        }
	X    }
	X    return(dups);              /* tell main() how many we found */
	X}
SHAR_EOF
if test 5966 -ne "`wc -c < 'arcmisc.c'`"
then
	echo shar: "error transmitting 'arcmisc.c'" '(should have been 5966 characters)'
fi
fi
echo shar: "extracting 'arcpack.c'" '(9147 characters)'
if test -f 'arcpack.c'
then
	echo shar: "will not over-write existing file 'arcpack.c'"
else
sed 's/^	X//' << \SHAR_EOF > 'arcpack.c'
	X/*
	X *	arcpack.c	1.2
	X *
	X *	Author: Thom Henderson
	X *	Original System V port: Mike Stump
	X *	Enhancements, Bug fixes, and cleanup: Chris Seaman
	X *	Date: Fri Mar 20 09:57:02 1987
	X *	Last Mod.	5/15/87
	X *
	X */
	X
	X/*
	X * ARC - Archive utility - ARCPACK
	X * 
	X * Version 3.37, created on 02/03/86 at 22:58:01
	X * 
	X * (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
	X * 
	X *     Description:
	X *          This file contains the routines used to compress a file
	X *          when placing it in an archive.
	X */
	X
	X#include "arc.h"
	X
	X/* stuff for non-repeat packing */
	X
	X#define DLE 0x90                       /* repeat sequence marker */
	X
	Xstatic unsigned char state;            /* current packing state */
	X
	X/* non-repeat packing states */
	X
	X#define NOHIST  0                      /* don't consider previous input*/
	X#define SENTCHAR 1                     /* lastchar set, no lookahead yet */
	X#define SENDNEWC 2                     /* run over, send new char next */
	X#define SENDCNT 3                      /* newchar set, send count next */
	X
	X/* packing results */
	X
	Xstatic long stdlen;                    /* length for standard packing */
	Xstatic INT crcval;                     /* CRC check value */
	X
	XINT pack(f,t,hdr)                      /* pack file into an archive */
	XFILE *f, *t;                           /* source, destination */
	Xstruct heads *hdr;                     /* pointer to header data */
	X{
	X    INT c;                             /* one character of stream */
	X    long ncrlen;                       /* length after packing */
	X    long huflen;                       /* length after squeezing */
	X    long lzwlen;                       /* length after crunching */
	X    long pred_sq(), file_sq();         /* stuff for squeezing */
	X    long pred_cm();                    /* dynamic crunching cleanup */
	X    char tnam[STRLEN];                 /* temporary name buffer */
	X    FILE *crn = NULL;                  /* temporary crunch file */
	X    INT getch();
	X    INT getc_ncr();
	X    INT putc_pak();
	X
	X    /* first pass - see which method is best */
	X
	X    if (!nocomp)                       /* if storage kludge not active */
	X    {
	X        if (note)
	X        {
	X            printf(" analyzing, ");
	X            fflush(stdout);
	X        }
	X
	X        sprintf(tnam,"%s.crn",arctemp);
	X        crn = fopen(tnam,"w+");
	X        state = NOHIST;                /* initialize ncr packing */
	X        stdlen =  ncrlen = 0;          /* reset size counters */
	X        crcval = 0;                    /* initialize CRC check value */
	X        setcode();                     /* initialize encryption */
	X
	X        init_cm(f,crn);                /* initialize for crunching */
	X        init_sq();                     /* initialize for squeeze scan */
	X
	X        while ((c=getc_ncr(f))!=EOF)   /* for each byte of file */
	X        {
	X            ncrlen++;                  /* one more packed byte */
	X            scan_sq(c);                /* see what squeezing can do */
	X            putc_cm(c,crn);            /* see what crunching can do */
	X        }
	X        huflen = pred_sq();            /* finish up after squeezing */
	X        lzwlen = pred_cm(crn);         /* finish up after crunching */
	X    }
	X    else                               /* else kludge the method */
	X    {
	X        stdlen = 0;                    /* make standard look best */
	X        ncrlen = huflen = lzwlen = 1;
	X    }
	X
	X    /* standard set-ups common to all methods */
	X
	X    fseek(f,0L,0);                     /* rewind input */
	X    hdr->crc = crcval;                 /* note CRC check value */
	X    hdr->length = stdlen;              /* set actual file length */
	X    state = NOHIST;                    /* reinitialize ncr packing */
	X    setcode();                         /* reinitialize encryption */
	X
	X    /* choose and use the shortest method */
	X
	X    if (stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen)
	X    {
	X        if (note)                      /* store w/out compression */
	X        {
	X            printf("storing, ");
	X            fflush(stdout);
	X        }
	X        hdrver = 2;                    /* note packing method */
	X        stdlen = crcval = 0;           /* recalc these for kludge */
	X        while ((c=getch(f))!=EOF)      /* store it straight */
	X            putc_pak(c,t);
	X        hdr->crc = crcval;
	X        hdr->length = hdr->size = stdlen;
	X    }
	X    else if (ncrlen<huflen && ncrlen<lzwlen)
	X    {
	X        if (note)                      /* pack w/repeat suppression */
	X        {
	X            printf("packing, ");
	X            fflush(stdout);
	X        }
	X        hdrver = 3;                    /* note packing method */
	X        hdr->size = ncrlen;            /* set data length */
	X        while ((c=getc_ncr(f))!=EOF)
	X            putc_pak(c,t);
	X    }
	X    else if (huflen<lzwlen)
	X    {
	X        if (note)
	X        {
	X            printf("squeezing, ");
	X            fflush(stdout);
	X        }
	X        hdrver = 4;                    /* note packing method */
	X        hdr->size = file_sq(f,t);      /* note final size */
	X    }
	X    else
	X    {
	X        if (note)
	X        {
	X            printf("crunching, ");
	X            fflush(stdout);
	X        }
	X        hdrver = 8;
	X        hdr->size = lzwlen;            /* size should not change */
	X        if (crn)                       /* if temp was created */
	X        {
	X            fseek(crn,0L,0);           /* then copy over crunched temp */
	X            while ((c=fgetc(crn))!=EOF)
	X                putc_tst(c,t);
	X        }
	X        else                           /* else re-crunch */
	X        {
	X            init_cm(f,t);
	X            while ((c=getc_ncr(f))!=EOF)
	X                putc_cm(c,t);
	X            pred_cm(t);                /* finish up after crunching */
	X        }
	X    }
	X
	X    /* standard cleanups common to all methods */
	X
	X    if (crn)                           /* get rid of crunch temporary */
	X    {
	X        fclose(crn);
	X        if (unlink(tnam) && warn)
	X        {
	X            printf("Cannot delete temporary file %s\n",tnam);
	X            nerrs++;
	X        }
	X    }
	X    if (note)
	X        printf("done.\n");
	X}
	X
	X/*
	X *  Non-repeat compression - text is passed through normally, except that
	X *  a run of more than two is encoded as:
	X *
	X *       <char> <DLE> <count>
	X *
	X *  Special case: a count of zero indicates that the DLE is really a DLE,
	X *  not a repeat marker.
	X */
	X
	XINT getc_ncr(f)                        /* get bytes with collapsed runs */
	XFILE *f;                               /* file to get from */
	X{
	X    static INT lastc;                  /* value returned on last call */
	X    static INT repcnt;                 /* repetition counter */
	X    static INT c;                      /* latest value seen */
	X
	X    switch(state)                      /* depends on our state */
	X    {
	X    case NOHIST:                       /* no relevant history */
	X        state = SENTCHAR;
	X        return(lastc = getch(f));      /* remember the value next time */
	X    case SENTCHAR:                     /* char was sent. look ahead */
	X        switch(lastc)                  /* action depends on char */
	X        {
	X        case DLE:                      /* if we sent a real DLE */
	X            state = NOHIST;            /* then start over again */
	X            return(0);                 /* but note that the DLE was real */
	X        case EOF:                      /* EOF is always a special case */
	X            return(EOF);
	X        default:                       /* else test for a repeat */
	X            for (repcnt=1; (c=getch(f))==lastc && repcnt<255; repcnt++)
	X                ;                      /* find end of run */
	X            switch(repcnt)             /* action depends on run size */
	X            {
	X            case 1:                    /* not a repeat */
	X                return(lastc = c);     /* but remember value next time */
	X            case 2:                    /* a repeat, but too short */
	X                state = SENDNEWC;      /* send the second one next time */
	X                return(lastc);
	X            default:                   /* a run - compress it */
	X                state = SENDCNT;       /* send repeat count next time */
	X                return(DLE);           /* send repeat marker this time */
	X            }
	X        }
	X    case SENDNEWC:                     /* send second char of short run */
	X        state = SENTCHAR;
	X        return(lastc = c);
	X    case SENDCNT:                      /* sent DLE, now send count */
	X        state = SENDNEWC;
	X        return(repcnt);
	X    default:
	X        abort("Bug - bad ncr state\n");
	X    }
	X}
	X
	Xstatic INT getch(f)                    /* special get char for packing */
	XFILE *f;                               /* file to get from */
	X{
	X    INT c;                             /* a char from the file */
	X
	X    if ((c=fgetc(f))!=EOF)             /* if not the end of file */
	X    {
	X        crcval = addcrc(crcval,c);     /* then update CRC check value */
	X        stdlen++;                      /* and bump length counter */
	X    }
	X
	X    return(c);
	X}
	X
	XINT putc_pak(c,f)                      /* put a packed byte into archive */
	Xchar c;                                /* byte to put */
	XFILE *f;                               /* archive to put it in */
	X{
	X    putc_tst(code(c),f);               /* put encoded byte, with checks */
	X}
SHAR_EOF
if test 9147 -ne "`wc -c < 'arcpack.c'`"
then
	echo shar: "error transmitting 'arcpack.c'" '(should have been 9147 characters)'
fi
fi
exit 0
#	End of shell archive
-- 
Brandon S. Allbery	{decvax,cbatt,cbosgd}!cwruecmp!ncoast!allbery
Tridelta Industries	{ames,mit-eddie,talcott}!necntc!ncoast!allbery
7350 Corporate Blvd.	necntc!ncoast!allbery@harvard.HARVARD.EDU
Mentor, OH 44060	+01 216 255 1080	(also eddie.MIT.EDU)