ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
$Part19: $ File_is="SQUASH.C" $ Check_Sum_is=240194511 $ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY X/* ARC - Archive utility - SQUASH X X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED X X`009This is a quick hack to ARCLZW to make it handle squashed archives. X`009Dan Lanciani (ddl@harvard.*) July 87 X X*/ X#include <stdio.h> X#include "arc.h" X X/* definitions for the new dynamic Lempel-Zev crunching */ X X#define BITS 13 /* maximum bits per code */ X#define HSIZE 10007 /* 80% occupancy */ X#define INIT_BITS 9 /* initial number of bits/code */ X Xstatic INT n_bits; /* number of bits/code */ Xstatic INT maxcode; /* maximum code, given n_bits */ X#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */ Xstatic INT maxcodemax = 1 << BITS; /* largest possible code (+1) */ X Xstatic unsigned char buf[BITS]; /* input/output buffer */ X Xstatic unsigned char lmask[9] = /* left side masks */ X{ 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 }; Xstatic unsigned char rmask[9] = /* right side masks */ X{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; X Xstatic INT offset; /* byte offset for code output */ Xstatic long in_count; /* length of input */ Xstatic long bytes_out; /* length of compressed output */ Xstatic unsigned INT ent; X Xstatic long htab[HSIZE]; /* hash code table (crunch) */ Xstatic unsigned INT codetab[HSIZE]; /* string code table (crunch) */ X Xstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */ X Xstatic unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */ Xstatic INT free_ent; /* first unused entry */ Xstatic INT firstcmp; /* true at start of compression */ Xstatic unsigned char stack[HSIZE]; /* local push/pop stack */ X X/* X * block compression parameters -- after all codes are used up, X * and compression rate changes, start over. X */ X Xstatic INT clear_flg; Xstatic long ratio; X#define CHECK_GAP 10000 /* ratio check interval */ Xstatic long checkpoint; X X/* X * the next two codes should not be changed lightly, as they must not X * lie within the contiguous general code space. X */ X#define FIRST 257 /* first free entry */ X#define CLEAR 256 /* table clear output code */ X Xstatic INT cl_block(t) /* table clear for block compress */ XFILE *t; /* our output file */ X{ X long rat; X INT putcode(); X X checkpoint = in_count + CHECK_GAP; X X if(in_count > 0x007fffff) /* shift will overflow */ X { rat = bytes_out >> 8; X if(rat == 0) /* Don't divide by zero */ X rat = 0x7fffffff; X else rat = in_count / rat; X } X else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */ X X if(rat > ratio) X ratio = rat; X else X { ratio = 0; X setmem`009(htab,HSIZE*sizeof(long),0xff); X free_ent = FIRST; X clear_flg = 1; X putcode(CLEAR,t); X } X} X X/***************************************************************** X * X * Output a given code. X * Inputs: X * code: A n_bits-bit integer. If == -1, then EOF. This assumes X * that n_bits =< (long)wordsize - 1. X * Outputs: X * Outputs code to the file. X * Assumptions: X * Chars are 8 bits long. X * Algorithm: X * Maintain a BITS character long buffer (so that 8 codes will X * fit in it exactly). When the buffer fills up empty it and start over. X */ X Xstatic INT putcode(code,t) /* output a code */ XINT code; /* code to output */ XFILE *t; /* where to put it */ X{ X INT r_off = offset; /* right offset */ X INT bits = n_bits; /* bits to go */ X unsigned char *bp = buf; /* buffer pointer */ X INT n; /* index */ X X if(code >= 0) /* if a real code */ X { /* X * Get to the first byte. X */ X bp += (r_off >> 3); X r_off &= 7; X X /* X * Since code is always >= 8 bits, only need to mask the first X * hunk on the left. X */ X *bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off]; X bp++; X bits -= (8 - r_off); X code >>= (8 - r_off); X X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ X if(bits >= 8) X { *bp++ = code; X code >>= 8; X bits -= 8; X } X X /* Last bits. */ X if(bits) X *bp = code; X X offset += n_bits; X X if(offset == (n_bits << 3)) X { bp = buf; X bits = n_bits; X bytes_out += bits; X do X putc_pak(*bp++,t); X while(--bits); X offset = 0; X } X X /* X * If the next entry is going to be too big for the code size, X * then increase it, if possible. X */ X if(free_ent>maxcode || clear_flg>0) X { /* X * Write the whole buffer, because the input side won't X * discover the size increase until after it has read it. X */ X if(offset > 0) X { bp = buf; /* reset pointer for writing */ X bytes_out += n = n_bits; X while(n--) X putc_pak(*bp++,t); X } X offset = 0; X X if(clear_flg) /* reset if clearing */ X { maxcode = MAXCODE(n_bits = INIT_BITS); X clear_flg = 0; X } X else /* else use more bits */ X { n_bits++; X if(n_bits == BITS) X maxcode = maxcodemax; X else X maxcode = MAXCODE(n_bits); X } X } X } X X else /* dump the buffer on EOF */ X { bytes_out += n = (offset+7) / 8; X X if(offset > 0) X while(n--) X putc_pak(*bp++,t); X offset = 0; X } X} X X/***************************************************************** X * X * Read one code from the standard input. If EOF, return -1. X * Inputs: X * cmpin X * Outputs: X * code or -1 is returned. X */ X Xstatic INT getcode(f) /* get a code */ XFILE *f; /* file to get from */ X{ X INT code; X static INT offset = 0, size = 0; X INT r_off, bits; X unsigned char *bp = buf; X X if(clear_flg > 0 || offset >= size || free_ent > maxcode) X { /* X * If the next entry will be too big for the current code X * size, then we must increase the size. This implies reading X * a new buffer full, too. X */ X if(free_ent > maxcode) X { n_bits++; X if(n_bits == BITS) X maxcode = maxcodemax; /* won't get any bigger now */ X else maxcode = MAXCODE(n_bits); X } X if(clear_flg > 0) X { maxcode = MAXCODE(n_bits = INIT_BITS); X clear_flg = 0; X } X X for(size=0; size<n_bits; size++) X { if((code=getc_unp(f))==EOF) X break; X else buf[size] = code; X } X if(size <= 0) X return -1; /* end of file */ X X offset = 0; X /* Round size down to integral number of codes */ X size = (size << 3)-(n_bits - 1); X } X r_off = offset; X bits = n_bits; X X /* X * Get to the first byte. X */ X bp +=(r_off >> 3); X r_off &= 7; X X /* Get first part (low order bits) */ X code = (*bp++ >> r_off); X bits -= 8 - r_off; X r_off = 8 - r_off; /* now, offset into code word */ X X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ X if(bits >= 8) X { code |= *bp++ << r_off; X r_off += 8; X bits -= 8; X } X /* high order bits. */ X code |= (*bp & rmask[bits]) << r_off; X offset += n_bits; X X return code; X} X X/* X * compress a file X * X * Algorithm: use open addressing double hashing (no chaining) on the X * prefix code / next character combination. We do a variant of Knuth's X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime X * secondary probe. Here, the modular division first probe is gives way X * to a faster exclusive-or manipulation. Also do block compression with X * an adaptive reset, where the code table is cleared when the compression X * ratio decreases, but after the table fills. The variable-length output X * codes are re-sized at this point, and a special CLEAR code is generated X * for the decompressor. X */ X XINT sqinit_cm(f,t) /* initialize for compression */ XFILE *f; /* file we will be compressing */ XFILE *t; /* where we will put it */ X{ X offset = 0; X bytes_out = 0; X clear_flg = 0; X ratio = 0; X in_count = 1; X checkpoint = CHECK_GAP; X maxcode = MAXCODE(n_bits = INIT_BITS); X free_ent = FIRST; X setmem(htab,HSIZE*sizeof(long),0xff); X n_bits = INIT_BITS; /* set starting code size */ X X firstcmp = 1; /* next byte will be first */ X} X XINT sqputc_cm(c,t) /* compress a character */ Xunsigned char c; /* character to compress */ XFILE *t; /* where to put it */ X{ X static long fcode; X static INT hshift; X#ifdef VAXC X register INT i; X#else X INT i; X#endif VAXC X INT disp; X X if(firstcmp) /* special case for first byte */ X { ent = c; /* remember first byte */ X X hshift = 0; X for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L) X hshift++; X hshift = 8 - hshift; /* set hash code range bund */ X X firstcmp = 0; /* no longer first */ X return; X } X X in_count++; X fcode =(long)(((long)c << BITS)+ent); X i = (c<<hshift)^ent; /* xor hashing */ X X if(htab[i]==fcode) X { ent = codetab[i]; X return; X } X else if(htab[i]<0) /* empty slot */ X goto nomatch; X disp = HSIZE - i; /* secondary hash (after G.Knott) */ X if(i == 0) X disp = 1; X Xprobe: X if((i -= disp) < 0) X i += HSIZE; X X#ifdef VAXC X if (i > HSIZE) printf("%SQUASH-W-BOUND, i > HSIZE\n"); X#endif VAXC X X if(htab[i] == fcode) X { ent = codetab[i]; X return; X } X if(htab[i] > 0) X goto probe; X Xnomatch: X putcode(ent,t); X ent = c; X if(free_ent < maxcodemax) X { codetab[i] = free_ent++; /* code -> hashtable */ X htab[i] = fcode; X } X else if((long)in_count >= checkpoint) X cl_block(t); X} X Xlong sqpred_cm(t) /* finish compressing a file */ XFILE *t; /* where to put it */ X{ X putcode(ent,t); /* put out the final code */ X putcode(-1,t); /* tell output we are done */ X X return bytes_out; /* say how big it got */ X} X X/* X * Decompress a file. This routine adapts to the codes in the file X * building the string table on-the-fly; requiring no table to be stored X * in the compressed file. The tables used herein are shared with those of X * the compress() routine. See the definitions above. X */ X XINT sqdecomp(f,t) /* decompress a file */ XFILE *f; /* file to read codes from */ XFILE *t; /* file to write text to */ X{ X unsigned char *stackp; X INT finchar; X INT code, oldcode, incode; X X n_bits = INIT_BITS; /* set starting code size */ X clear_flg = 0; X X /* X * As above, initialize the first 256 entries in the table. X */ X maxcode = MAXCODE(n_bits=INIT_BITS); X for(code = 255; code >= 0; code--) X { prefix[code] = 0; X suffix[code] = (unsigned char)code; X } X free_ent = FIRST; X X finchar = oldcode = getcode(f); X if(oldcode == -1) /* EOF already? */ X return; /* Get out of here */ X putc_unp((char)finchar,t); /* first code must be 8 bits=char */ X stackp = stack; X X while((code = getcode(f))> -1) X { if(code==CLEAR) X { for(code = 255; code >= 0; code--) X prefix[code] = 0; X clear_flg = 1; X free_ent = FIRST - 1; X if((code=getcode(f))==-1)/* O, untimely death! */ X break; X } X incode = code; X /* X * Special case for KwKwK string. X */ X if(code >= free_ent) X { *stackp++ = finchar; X code = oldcode; X } X X /* X * Generate output characters in reverse order X */ X while(code >= 256) X { *stackp++ = suffix[code]; X code = prefix[code]; X } X *stackp++ = finchar = suffix[code]; X X /* X * And put them out in forward order X */ X do X putc_unp(*--stackp,t); X while(stackp > stack); X X /* X * Generate the new entry. X */ X if((code=free_ent) < maxcodemax) X { prefix[code] = (unsigned short)oldcode; X suffix[code] = finchar; X free_ent = code+1; X } X /* X * Remember previous code. X */ X oldcode = incode; X } X} X $ GoSub Convert_File $ Exit