ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
X if(htab[i] > 0)
X goto probe;
X
Xnomatch:
X putcode(ent,t);
X ent = c;
X if(free_ent < maxcodemax)
X { codetab[i] = free_ent++; /* code -> hashtable */
X htab[i] = fcode;
X }
X else if((long)in_count >= checkpoint)
X cl_block(t);
X}
X
Xlong pred_cm(t) /* finish compressing a file */
XFILE *t; /* where to put it */
X{
X putcode(ent,t); /* put out the final code */
X putcode(-1,t); /* tell output we are done */
X
X return bytes_out; /* say how big it got */
X}
X
X/*
X * Decompress a file. This routine adapts to the codes in the file
X * building the string table on-the-fly; requiring no table to be stored
X * in the compressed file. The tables used herein are shared with those of
X * the compress() routine. See the definitions above.
X */
X
XINT decomp(f,t) /* decompress a file */
XFILE *f; /* file to read codes from */
XFILE *t; /* file to write text to */
X{
X unsigned char *stackp;
X INT finchar;
X INT code, oldcode, incode;
X
X if((code=getc_unp(f))!=BITS)
X abort("File packed with %d bits, I can only handle %d",code,BITS);
X
X n_bits = INIT_BITS; /* set starting code size */
X clear_flg = 0;
X
X /*
X * As above, initialize the first 256 entries in the table.
X */
X maxcode = MAXCODE(n_bits=INIT_BITS);
X for(code = 255; code >= 0; code--)
X { prefix[code] = 0;
X suffix[code] = (unsigned char)code;
X }
X free_ent = FIRST;
X
X finchar = oldcode = getcode(f);
X if(oldcode == -1) /* EOF already? */
X return; /* Get out of here */
X putc_ncr((char)finchar,t); /* first code must be 8 bits=char */
X stackp = stack;
X
X while((code = getcode(f))> -1)
X { if(code==CLEAR)
X { for(code = 255; code >= 0; code--)
X prefix[code] = 0;
X clear_flg = 1;
X free_ent = FIRST - 1;
X if((code=getcode(f))==-1)/* O, untimely death! */
X break;
X }
X incode = code;
X /*
X * Special case for KwKwK string.
X */
X if(code >= free_ent)
X { *stackp++ = finchar;
X code = oldcode;
X }
X
X /*
X * Generate output characters in reverse order
X */
X while(code >= 256)
X { *stackp++ = suffix[code];
X code = prefix[code];
X }
X *stackp++ = finchar = suffix[code];
X
X /*
X * And put them out in forward order
X */
X do
X putc_ncr(*--stackp,t);
X while(stackp > stack);
X
X /*
X * Generate the new entry.
X */
X if((code=free_ent) < maxcodemax)
X { prefix[code] = (unsigned short)oldcode;
X suffix[code] = finchar;
X free_ent = code+1;
X }
X /*
X * Remember previous code.
X */
X oldcode = incode;
X }
X}
X
X
X/*************************************************************************
X * Please note how much trouble it can be to maintain upwards *
X * compatibility. All that follows is for the sole purpose of unpacking *
X * files which were packed using an older method. *
X *************************************************************************/
X
X
X/* The h() pointer points to the routine to use for calculating a hash
X value. It is set in the init routines to point to either of oldh()
X or newh().
X
X oldh() calculates a hash value by taking the middle twelve bits
X of the square of the key.
X
X newh() works somewhat differently, and was tried because it makes
X ARC about 23% faster. This approach was abandoned because dynamic
X Lempel-Zev (above) works as well, and packs smaller also. However,
X inadvertent release of a developmental copy forces us to leave this in.
X*/
X
Xstatic unsigned INT (*h)(); /* pointer to hash function */
X
Xstatic unsigned INT oldh(pred,foll) /* old hash function */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* value of following char */
X{
X long local; /* local hash value */
X
X local = ((pred + foll) | 0x0800) & 0xffff; /* create the hash key */
X /*local = (pred + foll) | 0x0800; /* create the hash key */
X local *= local; /* square it */
X return (local >> 6) & 0x0FFF; /* return the middle 12 bits */
X}
X
Xstatic unsigned INT newh(pred,foll) /* new hash function */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* value of following char */
X{
X return (((pred+foll)&0xffff)*15073)&0xFFF; /* faster hash */
X /*return ((pred+foll)*15073)&0xFFF; /* faster hash */
X}
X
X/* The eolist() function is used to trace down a list of entries with
X duplicate keys until the last duplicate is found.
X*/
X
Xstatic unsigned INT eolist(index) /* find last duplicate */
Xunsigned INT index;
X{
X INT temp;
X
X while(temp=string_tab[index].next) /* while more duplicates */
X index = temp;
X
X return index;
X}
X
X/* The hash() routine is used to find a spot in the hash table for a new
X entry. It performs a "hash and linear probe" lookup, using h() to
X calculate the starting hash value and eolist() to perform the linear
X probe. This routine DOES NOT detect a table full condition. That
X MUST be checked for elsewhere.
X*/
X
Xstatic unsigned INT hash(pred,foll) /* find spot in the string table */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned char foll; /* char following string */
X{
X unsigned INT local, tempnext; /* scratch storage */
X struct entry *ep; /* allows faster table handling */
X
X local = (*h)(pred,foll); /* get initial hash value */
X
X if(!string_tab[local].used) /* if that spot is free */
X return local; /* then that's all we need */
X
X else /* else a collision has occured */
X { local = eolist(local); /* move to last duplicate */
X
X /* We must find an empty spot. We start looking 101 places
X down the table from the last duplicate.
X */
X
X tempnext = (local+101) & 0x0FFF;
X ep = &string_tab[tempnext]; /* initialize pointer */
X
X while(ep->used) /* while empty spot not found */
X { if(++tempnext==TABSIZE) /* if we are at the end */
X { tempnext = 0; /* wrap to beginning of table*/
X ep = string_tab;
X }
X else ++ep; /* point to next element in table */
X }
X
X /* local still has the pointer to the last duplicate, while
X tempnext has the pointer to the spot we found. We use
X this to maintain the chain of pointers to duplicates.
X */
X
X string_tab[local].next = tempnext;
X
X return tempnext;
X }
X}
X
X/* The unhash() function is used to search the hash table for a given key.
X Like hash(), it performs a hash and linear probe search. It returns
X either the number of the entry (if found) or NOT_FND (if not found).
X*/
X
Xstatic unsigned INT unhash(pred,foll) /* search string table for a key */
Xunsigned INT pred; /* code of preceeding string */
Xunsigned char foll; /* character following string */
X{
X unsigned INT local, offset; /* scratch storage */
X struct entry *ep; /* this speeds up access */
X
X local = (*h)(pred,foll); /* initial hash */
X
X while(1)
X { ep = &string_tab[local]; /* speed up table access */
X
X if((ep->predecessor==pred) && (ep->follower==foll))
X return local; /* we have a match */
X
X if(!ep->next) /* if no more duplicates */
X return NOT_FND; /* then key is not listed */
X
X local = ep->next; /* move on to next duplicate */
X }
X}
X
X/* The init_tab() routine is used to initialize our hash table.
X You realize, of course, that "initialize" is a complete misnomer.
X*/
X
Xstatic INT init_tab() /* set ground state in hash table */
X{
X unsigned INT i; /* table index */
X INT upd_tab();
X
X setmem((char *)string_tab,sizeof(string_tab),0);
X
X for(i=0; i<256; i++) /* list all single byte strings */
X upd_tab(NO_PRED,i);
X
X inbuf = EMPTY; /* nothing is in our buffer */
X}
X
X/* The upd_tab routine is used to add a new entry to the string table.
X As previously stated, no checks are made to ensure that the table
X has any room. This must be done elsewhere.
X*/
X
XINT upd_tab(pred,foll) /* add an entry to the table */
Xunsigned INT pred; /* code for preceeding string */
Xunsigned INT foll; /* character which follows string */
X{
X struct entry *ep; /* pointer to current entry */
X
X /* calculate offset just once */
X
X ep = &string_tab[hash(pred,foll)];
X
X ep->used = TRUE; /* this spot is now in use */
X ep->next = 0; /* no duplicates after this yet */
X ep->predecessor = pred; /* note code of preceeding string */
X ep->follower = foll; /* note char after string */
X}
X
X/* This algorithm encoded a file into twelve bit strings (three nybbles).
X The gocode() routine is used to read these strings a byte (or two)
X at a time.
X*/
X
Xstatic INT gocode(fd) /* read in a twelve bit code */
XFILE *fd; /* file to get code from */
X{
X unsigned INT localbuf, returnval;
X
X if(inbuf==EMPTY) /* if on a code boundary */
X { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */
X return EOF; /* pass back end of file status */
X localbuf &= 0xFF; /* mask down to true byte value */
X if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
X return EOF; /* this should never happen */
X inbuf &= 0xFF; /* mask down to true byte value */
X
X returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
X inbuf &= 0x000F; /* leave partial code pending */
X }
X
X else /* buffer contains first nybble */
X { if((localbuf=getc_unp(fd))==EOF)
X return EOF;
X localbuf &= 0xFF;
X
X returnval = localbuf + ((inbuf<<8)&0xF00);
X inbuf = EMPTY; /* note no hanging nybbles */
X }
X return returnval; /* pass back assembled code */
X}
X
Xstatic INT push(c) /* push char onto stack */
XINT c; /* character to push */
X{
X stack[sp] = ((char) c); /* coerce integer into a char */
X
X if(++sp >= TABSIZE)
X abort("Stack overflow\n");
X}
X
Xstatic INT pop() /* pop character from stack */
X{
X if(sp>0)
X return ((INT) stack[--sp]); /* leave ptr at next empty slot */
X
X else return EMPTY;
X}
X
X/***** LEMPEL-ZEV DECOMPRESSION *****/
X
Xstatic INT code_count; /* needed to detect table full */
Xstatic unsigned INT code; /* where we are so far */
Xstatic INT firstc; /* true only on first character */
X
XINT init_ucr(new) /* get set for uncrunching */
XINT new; /* true to use new hash function */
X{
X if(new) /* set proper hash function */
X h = newh;
X else h = oldh;
X
X sp = 0; /* clear out the stack */
X init_tab(); /* set up atomic code definitions */
X code_count = TABSIZE - 256; /* note space left in table */
X firstc = 1; /* true only on first code */
X}
X
XINT getc_ucr(f) /* get next uncrunched byte */
XFILE *f; /* file containing crunched data */
X{
X unsigned INT c; /* a character of input */
X INT code, newcode;
X static INT oldcode, finchar;
X struct entry *ep; /* allows faster table handling */
X
X if(firstc) /* first code is always known */
X { firstc = FALSE; /* but next will not be first */
X oldcode = gocode(f);
X return finchar = string_tab[oldcode].follower;
X }
X
X if(!sp) /* if stack is empty */
X { if((code=newcode=gocode(f))==EOF)
X return EOF;
X
X ep = &string_tab[code]; /* initialize pointer */
X
X if(!ep->used) /* if code isn't known */
X { code = oldcode;
X ep = &string_tab[code]; /* re-initialize pointer */
X push(finchar);
X }
X
X while(ep->predecessor!=NO_PRED)
X { push(ep->follower); /* decode string backwards */
X code = ep->predecessor;
X ep = &string_tab[code];
X }
X
X push(finchar=ep->follower); /* save first character also */
X
X /* The above loop will terminate, one way or another,
X with string_tab[code].follower equal to the first
X character in the string.
X */
X
X if(code_count) /* if room left in string table */
X { upd_tab(oldcode,finchar);
X --code_count;
X }
X
X oldcode = newcode;
X }
X
X return pop(); /* return saved character */
X}
X
$ GoSub Convert_File
$ Goto Part09