ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
X if(htab[i] > 0) X goto probe; X Xnomatch: X putcode(ent,t); X ent = c; X if(free_ent < maxcodemax) X { codetab[i] = free_ent++; /* code -> hashtable */ X htab[i] = fcode; X } X else if((long)in_count >= checkpoint) X cl_block(t); X} X Xlong pred_cm(t) /* finish compressing a file */ XFILE *t; /* where to put it */ X{ X putcode(ent,t); /* put out the final code */ X putcode(-1,t); /* tell output we are done */ X X return bytes_out; /* say how big it got */ X} X X/* X * Decompress a file. This routine adapts to the codes in the file X * building the string table on-the-fly; requiring no table to be stored X * in the compressed file. The tables used herein are shared with those of X * the compress() routine. See the definitions above. X */ X XINT decomp(f,t) /* decompress a file */ XFILE *f; /* file to read codes from */ XFILE *t; /* file to write text to */ X{ X unsigned char *stackp; X INT finchar; X INT code, oldcode, incode; X X if((code=getc_unp(f))!=BITS) X abort("File packed with %d bits, I can only handle %d",code,BITS); X X n_bits = INIT_BITS; /* set starting code size */ X clear_flg = 0; X X /* X * As above, initialize the first 256 entries in the table. X */ X maxcode = MAXCODE(n_bits=INIT_BITS); X for(code = 255; code >= 0; code--) X { prefix[code] = 0; X suffix[code] = (unsigned char)code; X } X free_ent = FIRST; X X finchar = oldcode = getcode(f); X if(oldcode == -1) /* EOF already? */ X return; /* Get out of here */ X putc_ncr((char)finchar,t); /* first code must be 8 bits=char */ X stackp = stack; X X while((code = getcode(f))> -1) X { if(code==CLEAR) X { for(code = 255; code >= 0; code--) X prefix[code] = 0; X clear_flg = 1; X free_ent = FIRST - 1; X if((code=getcode(f))==-1)/* O, untimely death! */ X break; X } X incode = code; X /* X * Special case for KwKwK string. X */ X if(code >= free_ent) X { *stackp++ = finchar; X code = oldcode; X } X X /* X * Generate output characters in reverse order X */ X while(code >= 256) X { *stackp++ = suffix[code]; X code = prefix[code]; X } X *stackp++ = finchar = suffix[code]; X X /* X * And put them out in forward order X */ X do X putc_ncr(*--stackp,t); X while(stackp > stack); X X /* X * Generate the new entry. X */ X if((code=free_ent) < maxcodemax) X { prefix[code] = (unsigned short)oldcode; X suffix[code] = finchar; X free_ent = code+1; X } X /* X * Remember previous code. X */ X oldcode = incode; X } X} X X X/************************************************************************* X * Please note how much trouble it can be to maintain upwards * X * compatibility. All that follows is for the sole purpose of unpacking * X * files which were packed using an older method. * X *************************************************************************/ X X X/* The h() pointer points to the routine to use for calculating a hash X value. It is set in the init routines to point to either of oldh() X or newh(). X X oldh() calculates a hash value by taking the middle twelve bits X of the square of the key. X X newh() works somewhat differently, and was tried because it makes X ARC about 23% faster. This approach was abandoned because dynamic X Lempel-Zev (above) works as well, and packs smaller also. However, X inadvertent release of a developmental copy forces us to leave this in. X*/ X Xstatic unsigned INT (*h)(); /* pointer to hash function */ X Xstatic unsigned INT oldh(pred,foll) /* old hash function */ Xunsigned INT pred; /* code for preceeding string */ Xunsigned char foll; /* value of following char */ X{ X long local; /* local hash value */ X X local = ((pred + foll) | 0x0800) & 0xffff; /* create the hash key */ X /*local = (pred + foll) | 0x0800; /* create the hash key */ X local *= local; /* square it */ X return (local >> 6) & 0x0FFF; /* return the middle 12 bits */ X} X Xstatic unsigned INT newh(pred,foll) /* new hash function */ Xunsigned INT pred; /* code for preceeding string */ Xunsigned char foll; /* value of following char */ X{ X return (((pred+foll)&0xffff)*15073)&0xFFF; /* faster hash */ X /*return ((pred+foll)*15073)&0xFFF; /* faster hash */ X} X X/* The eolist() function is used to trace down a list of entries with X duplicate keys until the last duplicate is found. X*/ X Xstatic unsigned INT eolist(index) /* find last duplicate */ Xunsigned INT index; X{ X INT temp; X X while(temp=string_tab[index].next) /* while more duplicates */ X index = temp; X X return index; X} X X/* The hash() routine is used to find a spot in the hash table for a new X entry. It performs a "hash and linear probe" lookup, using h() to X calculate the starting hash value and eolist() to perform the linear X probe. This routine DOES NOT detect a table full condition. That X MUST be checked for elsewhere. X*/ X Xstatic unsigned INT hash(pred,foll) /* find spot in the string table */ Xunsigned INT pred; /* code for preceeding string */ Xunsigned char foll; /* char following string */ X{ X unsigned INT local, tempnext; /* scratch storage */ X struct entry *ep; /* allows faster table handling */ X X local = (*h)(pred,foll); /* get initial hash value */ X X if(!string_tab[local].used) /* if that spot is free */ X return local; /* then that's all we need */ X X else /* else a collision has occured */ X { local = eolist(local); /* move to last duplicate */ X X /* We must find an empty spot. We start looking 101 places X down the table from the last duplicate. X */ X X tempnext = (local+101) & 0x0FFF; X ep = &string_tab[tempnext]; /* initialize pointer */ X X while(ep->used) /* while empty spot not found */ X { if(++tempnext==TABSIZE) /* if we are at the end */ X { tempnext = 0; /* wrap to beginning of table*/ X ep = string_tab; X } X else ++ep; /* point to next element in table */ X } X X /* local still has the pointer to the last duplicate, while X tempnext has the pointer to the spot we found. We use X this to maintain the chain of pointers to duplicates. X */ X X string_tab[local].next = tempnext; X X return tempnext; X } X} X X/* The unhash() function is used to search the hash table for a given key. X Like hash(), it performs a hash and linear probe search. It returns X either the number of the entry (if found) or NOT_FND (if not found). X*/ X Xstatic unsigned INT unhash(pred,foll) /* search string table for a key */ Xunsigned INT pred; /* code of preceeding string */ Xunsigned char foll; /* character following string */ X{ X unsigned INT local, offset; /* scratch storage */ X struct entry *ep; /* this speeds up access */ X X local = (*h)(pred,foll); /* initial hash */ X X while(1) X { ep = &string_tab[local]; /* speed up table access */ X X if((ep->predecessor==pred) && (ep->follower==foll)) X return local; /* we have a match */ X X if(!ep->next) /* if no more duplicates */ X return NOT_FND; /* then key is not listed */ X X local = ep->next; /* move on to next duplicate */ X } X} X X/* The init_tab() routine is used to initialize our hash table. X You realize, of course, that "initialize" is a complete misnomer. X*/ X Xstatic INT init_tab() /* set ground state in hash table */ X{ X unsigned INT i; /* table index */ X INT upd_tab(); X X setmem((char *)string_tab,sizeof(string_tab),0); X X for(i=0; i<256; i++) /* list all single byte strings */ X upd_tab(NO_PRED,i); X X inbuf = EMPTY; /* nothing is in our buffer */ X} X X/* The upd_tab routine is used to add a new entry to the string table. X As previously stated, no checks are made to ensure that the table X has any room. This must be done elsewhere. X*/ X XINT upd_tab(pred,foll) /* add an entry to the table */ Xunsigned INT pred; /* code for preceeding string */ Xunsigned INT foll; /* character which follows string */ X{ X struct entry *ep; /* pointer to current entry */ X X /* calculate offset just once */ X X ep = &string_tab[hash(pred,foll)]; X X ep->used = TRUE; /* this spot is now in use */ X ep->next = 0; /* no duplicates after this yet */ X ep->predecessor = pred; /* note code of preceeding string */ X ep->follower = foll; /* note char after string */ X} X X/* This algorithm encoded a file into twelve bit strings (three nybbles). X The gocode() routine is used to read these strings a byte (or two) X at a time. X*/ X Xstatic INT gocode(fd) /* read in a twelve bit code */ XFILE *fd; /* file to get code from */ X{ X unsigned INT localbuf, returnval; X X if(inbuf==EMPTY) /* if on a code boundary */ X { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */ X return EOF; /* pass back end of file status */ X localbuf &= 0xFF; /* mask down to true byte value */ X if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */ X return EOF; /* this should never happen */ X inbuf &= 0xFF; /* mask down to true byte value */ X X returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F); X inbuf &= 0x000F; /* leave partial code pending */ X } X X else /* buffer contains first nybble */ X { if((localbuf=getc_unp(fd))==EOF) X return EOF; X localbuf &= 0xFF; X X returnval = localbuf + ((inbuf<<8)&0xF00); X inbuf = EMPTY; /* note no hanging nybbles */ X } X return returnval; /* pass back assembled code */ X} X Xstatic INT push(c) /* push char onto stack */ XINT c; /* character to push */ X{ X stack[sp] = ((char) c); /* coerce integer into a char */ X X if(++sp >= TABSIZE) X abort("Stack overflow\n"); X} X Xstatic INT pop() /* pop character from stack */ X{ X if(sp>0) X return ((INT) stack[--sp]); /* leave ptr at next empty slot */ X X else return EMPTY; X} X X/***** LEMPEL-ZEV DECOMPRESSION *****/ X Xstatic INT code_count; /* needed to detect table full */ Xstatic unsigned INT code; /* where we are so far */ Xstatic INT firstc; /* true only on first character */ X XINT init_ucr(new) /* get set for uncrunching */ XINT new; /* true to use new hash function */ X{ X if(new) /* set proper hash function */ X h = newh; X else h = oldh; X X sp = 0; /* clear out the stack */ X init_tab(); /* set up atomic code definitions */ X code_count = TABSIZE - 256; /* note space left in table */ X firstc = 1; /* true only on first code */ X} X XINT getc_ucr(f) /* get next uncrunched byte */ XFILE *f; /* file containing crunched data */ X{ X unsigned INT c; /* a character of input */ X INT code, newcode; X static INT oldcode, finchar; X struct entry *ep; /* allows faster table handling */ X X if(firstc) /* first code is always known */ X { firstc = FALSE; /* but next will not be first */ X oldcode = gocode(f); X return finchar = string_tab[oldcode].follower; X } X X if(!sp) /* if stack is empty */ X { if((code=newcode=gocode(f))==EOF) X return EOF; X X ep = &string_tab[code]; /* initialize pointer */ X X if(!ep->used) /* if code isn't known */ X { code = oldcode; X ep = &string_tab[code]; /* re-initialize pointer */ X push(finchar); X } X X while(ep->predecessor!=NO_PRED) X { push(ep->follower); /* decode string backwards */ X code = ep->predecessor; X ep = &string_tab[code]; X } X X push(finchar=ep->follower); /* save first character also */ X X /* The above loop will terminate, one way or another, X with string_tab[code].follower equal to the first X character in the string. X */ X X if(code_count) /* if room left in string table */ X { upd_tab(oldcode,finchar); X --code_count; X } X X oldcode = newcode; X } X X return pop(); /* return saved character */ X} X $ GoSub Convert_File $ Goto Part09