[comp.os.vms] ARC_C.SHAR19_OF_19

ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)

$Part19:
$ File_is="SQUASH.C"
$ Check_Sum_is=240194511
$ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY
X/*  ARC - Archive utility - SQUASH
X
X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X
X`009This is a quick hack to ARCLZW to make it handle squashed archives.
X`009Dan Lanciani (ddl@harvard.*) July 87
X
X*/
X#include <stdio.h>
X#include "arc.h"
X
X/* definitions for the new dynamic Lempel-Zev crunching */
X
X#define BITS   13                      /* maximum bits per code */
X#define HSIZE  10007                    /* 80% occupancy */
X#define INIT_BITS 9                    /* initial number of bits/code */
X
Xstatic INT n_bits;                     /* number of bits/code */
Xstatic INT maxcode;                    /* maximum code, given n_bits */
X#define MAXCODE(n)      ((1<<(n)) - 1) /* maximum code calculation */
Xstatic INT maxcodemax =  1 << BITS;    /* largest possible code (+1) */
X
Xstatic unsigned char buf[BITS];        /* input/output buffer */
X
Xstatic unsigned char lmask[9] =        /* left side masks */
X{   0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 };
Xstatic unsigned char rmask[9] =        /* right side masks */
X{   0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
X
Xstatic INT offset;                     /* byte offset for code output */
Xstatic long in_count;                  /* length of input */
Xstatic long bytes_out;                 /* length of compressed output */
Xstatic unsigned INT ent;
X
Xstatic long htab[HSIZE];               /* hash code table   (crunch) */
Xstatic unsigned INT codetab[HSIZE];    /* string code table (crunch) */
X
Xstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
X
Xstatic unsigned char suffix[HSIZE];    /* suffix table (uncrunch) */
Xstatic INT free_ent;                   /* first unused entry */
Xstatic INT firstcmp;                   /* true at start of compression */
Xstatic unsigned char stack[HSIZE];     /* local push/pop stack */
X
X/*
X * block compression parameters -- after all codes are used up,
X * and compression rate changes, start over.
X */
X
Xstatic INT clear_flg;
Xstatic long ratio;
X#define CHECK_GAP 10000                /* ratio check interval */
Xstatic long checkpoint;
X
X/*
X * the next two codes should not be changed lightly, as they must not
X * lie within the contiguous general code space.
X */
X#define FIRST   257                    /* first free entry */
X#define CLEAR   256                    /* table clear output code */
X
Xstatic INT cl_block(t)                     /* table clear for block compress */
XFILE *t;                               /* our output file */
X{
X    long rat;
X    INT putcode();
X
X    checkpoint = in_count + CHECK_GAP;
X
X    if(in_count > 0x007fffff)          /* shift will overflow */
X    {    rat = bytes_out >> 8;
X         if(rat == 0)                  /* Don't divide by zero */
X              rat = 0x7fffffff;
X         else rat = in_count / rat;
X    }
X    else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */
X
X    if(rat > ratio)
X         ratio = rat;
X    else
X    {    ratio = 0;
X         setmem`009(htab,HSIZE*sizeof(long),0xff);
X         free_ent = FIRST;
X         clear_flg = 1;
X         putcode(CLEAR,t);
X    }
X}
X
X/*****************************************************************
X *
X * Output a given code.
X * Inputs:
X *      code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes
X *              that n_bits =< (long)wordsize - 1.
X * Outputs:
X *      Outputs code to the file.
X * Assumptions:
X *      Chars are 8 bits long.
X * Algorithm:
X *      Maintain a BITS character long buffer (so that 8 codes will
X * fit in it exactly).  When the buffer fills up empty it and start over.
X */
X
Xstatic INT putcode(code,t)                 /* output a code */
XINT code;                              /* code to output */
XFILE *t;                               /* where to put it */
X{
X INT r_off = offset;                /* right offset */
X INT bits = n_bits;                 /* bits to go */
X unsigned char *bp = buf;           /* buffer pointer */
X INT n;                             /* index */
X
X    if(code >= 0)                      /* if a real code */
X    {    /*
X          * Get to the first byte.
X          */
X         bp += (r_off >> 3);
X         r_off &= 7;
X
X         /*
X          * Since code is always >= 8 bits, only need to mask the first
X          * hunk on the left.
X          */
X         *bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off];
X         bp++;
X         bits -= (8 - r_off);
X         code >>= (8 - r_off);
X
X         /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X         if(bits >= 8)
X         {    *bp++ = code;
X              code >>= 8;
X              bits -= 8;
X         }
X
X         /* Last bits. */
X         if(bits)
X              *bp = code;
X
X         offset += n_bits;
X
X         if(offset == (n_bits << 3))
X         {    bp = buf;
X              bits = n_bits;
X              bytes_out += bits;
X              do
X                   putc_pak(*bp++,t);
X              while(--bits);
X              offset = 0;
X         }
X
X         /*
X          * If the next entry is going to be too big for the code size,
X          * then increase it, if possible.
X          */
X         if(free_ent>maxcode || clear_flg>0)
X         {    /*
X               * Write the whole buffer, because the input side won't
X               * discover the size increase until after it has read it.
X               */
X              if(offset > 0)
X              {    bp = buf;           /* reset pointer for writing */
X                   bytes_out += n = n_bits;
X                   while(n--)
X                        putc_pak(*bp++,t);
X              }
X              offset = 0;
X
X              if(clear_flg)            /* reset if clearing */
X              {    maxcode = MAXCODE(n_bits = INIT_BITS);
X                   clear_flg = 0;
X              }
X              else                     /* else use more bits */
X              {    n_bits++;
X                   if(n_bits == BITS)
X                        maxcode = maxcodemax;
X                   else
X                        maxcode = MAXCODE(n_bits);
X              }
X         }
X    }
X
X    else                               /* dump the buffer on EOF */
X    {    bytes_out += n = (offset+7) / 8;
X
X         if(offset > 0)
X              while(n--)
X                   putc_pak(*bp++,t);
X         offset = 0;
X    }
X}
X
X/*****************************************************************
X *
X * Read one code from the standard input.  If EOF, return -1.
X * Inputs:
X *      cmpin
X * Outputs:
X *      code or -1 is returned.
X */
X
Xstatic INT getcode(f)                  /* get a code */
XFILE *f;                               /* file to get from */
X{
X INT code;
X    static INT offset = 0, size = 0;
X INT r_off, bits;
X    unsigned char *bp = buf;
X
X    if(clear_flg > 0 || offset >= size || free_ent > maxcode)
X    {    /*
X          * If the next entry will be too big for the current code
X          * size, then we must increase the size.  This implies reading
X          * a new buffer full, too.
X          */
X         if(free_ent > maxcode)
X         {    n_bits++;
X              if(n_bits == BITS)
X                   maxcode = maxcodemax;    /* won't get any bigger now */
X              else maxcode = MAXCODE(n_bits);
X         }
X         if(clear_flg > 0)
X         {    maxcode = MAXCODE(n_bits = INIT_BITS);
X              clear_flg = 0;
X         }
X
X         for(size=0; size<n_bits; size++)
X         {    if((code=getc_unp(f))==EOF)
X                   break;
X              else buf[size] = code;
X         }
X         if(size <= 0)
X              return -1;               /* end of file */
X
X         offset = 0;
X         /* Round size down to integral number of codes */
X         size = (size << 3)-(n_bits - 1);
X    }
X    r_off = offset;
X    bits = n_bits;
X
X    /*
X     * Get to the first byte.
X     */
X    bp +=(r_off >> 3);
X    r_off &= 7;
X
X    /* Get first part (low order bits) */
X    code = (*bp++ >> r_off);
X    bits -= 8 - r_off;
X    r_off = 8 - r_off;                 /* now, offset into code word */
X
X    /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X    if(bits >= 8)
X    {    code |= *bp++ << r_off;
X         r_off += 8;
X         bits -= 8;
X    }
X    /* high order bits. */
X    code |= (*bp & rmask[bits]) << r_off;
X    offset += n_bits;
X
X    return code;
X}
X
X/*
X * compress a file
X *
X * Algorithm:  use open addressing double hashing (no chaining) on the
X * prefix code / next character combination.  We do a variant of Knuth's
X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
X * secondary probe.  Here, the modular division first probe is gives way
X * to a faster exclusive-or manipulation.  Also do block compression with
X * an adaptive reset, where the code table is cleared when the compression
X * ratio decreases, but after the table fills.  The variable-length output
X * codes are re-sized at this point, and a special CLEAR code is generated
X * for the decompressor.
X */
X
XINT sqinit_cm(f,t)                           /* initialize for compression */
XFILE *f;                               /* file we will be compressing */
XFILE *t;                               /* where we will put it */
X{
X    offset = 0;
X    bytes_out = 0;
X    clear_flg = 0;
X    ratio = 0;
X    in_count = 1;
X    checkpoint = CHECK_GAP;
X    maxcode = MAXCODE(n_bits = INIT_BITS);
X    free_ent = FIRST;
X    setmem(htab,HSIZE*sizeof(long),0xff);
X    n_bits = INIT_BITS;                /* set starting code size */
X
X    firstcmp = 1;                      /* next byte will be first */
X}
X
XINT sqputc_cm(c,t)                           /* compress a character */
Xunsigned char c;                       /* character to compress */
XFILE *t;                               /* where to put it */
X{
X    static long fcode;
X    static INT hshift;
X#ifdef VAXC
X register INT i;
X#else
X INT i;
X#endif VAXC
X INT disp;
X
X    if(firstcmp)                       /* special case for first byte */
X    {    ent = c;                      /* remember first byte */
X
X         hshift = 0;
X         for(fcode=(long)HSIZE;  fcode<65536L; fcode*=2L)
X              hshift++;
X         hshift = 8 - hshift;          /* set hash code range bund */
X
X         firstcmp = 0;                 /* no longer first */
X         return;
X    }
X
X    in_count++;
X    fcode =(long)(((long)c << BITS)+ent);
X    i = (c<<hshift)^ent;               /* xor hashing */
X
X    if(htab[i]==fcode)
X    {    ent = codetab[i];
X         return;
X    }
X    else if(htab[i]<0)                 /* empty slot */
X         goto nomatch;
X    disp = HSIZE - i;                  /* secondary hash (after G.Knott) */
X    if(i == 0)
X         disp = 1;
X
Xprobe:
X    if((i -= disp) < 0)
X         i += HSIZE;
X
X#ifdef VAXC
X    if (i > HSIZE) printf("%SQUASH-W-BOUND, i > HSIZE\n");
X#endif VAXC
X
X    if(htab[i] == fcode)
X    {    ent = codetab[i];
X         return;
X    }
X    if(htab[i] > 0)
X         goto probe;
X
Xnomatch:
X    putcode(ent,t);
X    ent = c;
X    if(free_ent < maxcodemax)
X    {    codetab[i] = free_ent++;      /* code -> hashtable */
X         htab[i] = fcode;
X    }
X    else if((long)in_count >= checkpoint)
X         cl_block(t);
X}
X
Xlong sqpred_cm(t)                        /* finish compressing a file */
XFILE *t;                               /* where to put it */
X{
X    putcode(ent,t);                    /* put out the final code */
X    putcode(-1,t);                     /* tell output we are done */
X
X    return bytes_out;                  /* say how big it got */
X}
X
X/*
X * Decompress a file.  This routine adapts to the codes in the file
X * building the string table on-the-fly; requiring no table to be stored
X * in the compressed file.  The tables used herein are shared with those of
X * the compress() routine.  See the definitions above.
X */
X
XINT sqdecomp(f,t)                            /* decompress a file */
XFILE *f;                               /* file to read codes from */
XFILE *t;                               /* file to write text to */
X{
X    unsigned char *stackp;
X INT finchar;
X INT code, oldcode, incode;
X
X    n_bits = INIT_BITS;                /* set starting code size */
X    clear_flg = 0;
X
X    /*
X     * As above, initialize the first 256 entries in the table.
X     */
X    maxcode = MAXCODE(n_bits=INIT_BITS);
X    for(code = 255; code >= 0; code--)
X    {    prefix[code] = 0;
X         suffix[code] = (unsigned char)code;
X    }
X    free_ent = FIRST;
X
X    finchar = oldcode = getcode(f);
X    if(oldcode == -1)                  /* EOF already? */
X         return;                       /* Get out of here */
X    putc_unp((char)finchar,t);         /* first code must be 8 bits=char */
X    stackp = stack;
X
X    while((code = getcode(f))> -1)
X    {    if(code==CLEAR)
X         {    for(code = 255; code >= 0; code--)
X                   prefix[code] = 0;
X              clear_flg = 1;
X              free_ent = FIRST - 1;
X              if((code=getcode(f))==-1)/* O, untimely death! */
X                   break;
X         }
X         incode = code;
X         /*
X          * Special case for KwKwK string.
X          */
X         if(code >= free_ent)
X         {    *stackp++ = finchar;
X              code = oldcode;
X         }
X
X         /*
X          * Generate output characters in reverse order
X          */
X         while(code >= 256)
X         {    *stackp++ = suffix[code];
X              code = prefix[code];
X         }
X         *stackp++ = finchar = suffix[code];
X
X         /*
X          * And put them out in forward order
X          */
X         do
X              putc_unp(*--stackp,t);
X         while(stackp > stack);
X
X         /*
X          * Generate the new entry.
X          */
X         if((code=free_ent) < maxcodemax)
X         {    prefix[code] = (unsigned short)oldcode;
X              suffix[code] = finchar;
X              free_ent = code+1;
X         }
X         /*
X          * Remember previous code.
X          */
X         oldcode = incode;
X    }
X}
X
$ GoSub Convert_File
$ Exit