[comp.os.vms] ARC_C.SHAR08_OF_19

ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
X    if(htab[i] > 0)
X         goto probe;
X
Xnomatch:
X    putcode(ent,t);
X    ent = c;
X    if(free_ent < maxcodemax)
X    {    codetab[i] = free_ent++;      /* code -> hashtable */
X         htab[i] = fcode;
X    }
X    else if((long)in_count >= checkpoint)
X         cl_block(t);
X}
X
Xlong pred_cm(t)                        /* finish compressing a file */
XFILE *t;                               /* where to put it */
X{
X    putcode(ent,t);                    /* put out the final code */
X    putcode(-1,t);                     /* tell output we are done */
X
X    return bytes_out;                  /* say how big it got */
X}
X
X/*
X * Decompress a file.  This routine adapts to the codes in the file
X * building the string table on-the-fly; requiring no table to be stored
X * in the compressed file.  The tables used herein are shared with those of
X * the compress() routine.  See the definitions above.
X */
X
XINT decomp(f,t)                            /* decompress a file */
XFILE *f;                               /* file to read codes from */
XFILE *t;                               /* file to write text to */
X{
X    unsigned char *stackp;
X INT finchar;
X INT code, oldcode, incode;
X
X    if((code=getc_unp(f))!=BITS)
X         abort("File packed with %d bits, I can only handle %d",code,BITS);
X
X    n_bits = INIT_BITS;                /* set starting code size */
X    clear_flg = 0;
X
X    /*
X     * As above, initialize the first 256 entries in the table.
X     */
X    maxcode = MAXCODE(n_bits=INIT_BITS);
X    for(code = 255; code >= 0; code--)
X    {    prefix[code] = 0;
X         suffix[code] = (unsigned char)code;
X    }
X    free_ent = FIRST;
X
X    finchar = oldcode = getcode(f);
X    if(oldcode == -1)                  /* EOF already? */
X         return;                       /* Get out of here */
X    putc_ncr((char)finchar,t);         /* first code must be 8 bits=char */
X    stackp = stack;
X
X    while((code = getcode(f))> -1)
X    {    if(code==CLEAR)
X         {    for(code = 255; code >= 0; code--)
X                   prefix[code] = 0;
X              clear_flg = 1;
X              free_ent = FIRST - 1;
X              if((code=getcode(f))==-1)/* O, untimely death! */
X                   break;
X         }
X         incode = code;
X         /*
X          * Special case for KwKwK string.
X          */
X         if(code >= free_ent)
X         {    *stackp++ = finchar;
X              code = oldcode;
X         }
X
X         /*
X          * Generate output characters in reverse order
X          */
X         while(code >= 256)
X         {    *stackp++ = suffix[code];
X              code = prefix[code];
X         }
X         *stackp++ = finchar = suffix[code];
X
X         /*
X          * And put them out in forward order
X          */
X         do
X              putc_ncr(*--stackp,t);
X         while(stackp > stack);
X
X         /*
X          * Generate the new entry.
X          */
X         if((code=free_ent) < maxcodemax)
X         {    prefix[code] = (unsigned short)oldcode;
X              suffix[code] = finchar;
X              free_ent = code+1;
X         }
X         /*
X          * Remember previous code.
X          */
X         oldcode = incode;
X    }
X}
X
X
X/*************************************************************************
X * Please note how much trouble it can be to maintain upwards            *
X * compatibility.  All that follows is for the sole purpose of unpacking *
X * files which were packed using an older method.                        *
X *************************************************************************/
X
X
X/*  The h() pointer points to the routine to use for calculating a hash
X    value.  It is set in the init routines to point to either of oldh()
X    or newh().
X
X    oldh() calculates a hash value by taking the middle twelve bits
X    of the square of the key.
X
X    newh() works somewhat differently, and was tried because it makes
X    ARC about 23% faster.  This approach was abandoned because dynamic
X    Lempel-Zev (above) works as well, and packs smaller also.  However,
X    inadvertent release of a developmental copy forces us to leave this in.
X*/
X
Xstatic unsigned INT (*h)();                /* pointer to hash function */
X
Xstatic unsigned INT oldh(pred,foll)        /* old hash function */
Xunsigned INT pred;                     /* code for preceeding string */
Xunsigned char foll;                    /* value of following char */
X{
X    long local;                        /* local hash value */
X
X    local = ((pred + foll) | 0x0800) & 0xffff;    /* create the hash key */
X    /*local = (pred + foll) | 0x0800;    /* create the hash key */
X    local *= local;                    /* square it */
X    return (local >> 6) & 0x0FFF;      /* return the middle 12 bits */
X}
X
Xstatic unsigned INT newh(pred,foll)        /* new hash function */
Xunsigned INT pred;                     /* code for preceeding string */
Xunsigned char foll;                    /* value of following char */
X{
X    return (((pred+foll)&0xffff)*15073)&0xFFF;  /* faster hash */
X    /*return ((pred+foll)*15073)&0xFFF;  /* faster hash */
X}
X
X/*  The eolist() function is used to trace down a list of entries with
X    duplicate keys until the last duplicate is found.
X*/
X
Xstatic unsigned INT eolist(index)          /* find last duplicate */
Xunsigned INT index;
X{
X INT temp;
X
X    while(temp=string_tab[index].next) /* while more duplicates */
X         index = temp;
X
X    return index;
X}
X
X/*  The hash() routine is used to find a spot in the hash table for a new
X    entry.  It performs a "hash and linear probe" lookup, using h() to
X    calculate the starting hash value and eolist() to perform the linear
X    probe.  This routine DOES NOT detect a table full condition.  That
X    MUST be checked for elsewhere.
X*/
X
Xstatic unsigned INT hash(pred,foll)        /* find spot in the string table */
Xunsigned INT pred;                     /* code for preceeding string */
Xunsigned char foll;                    /* char following string */
X{
X    unsigned INT local, tempnext;      /* scratch storage */
X    struct entry *ep;                  /* allows faster table handling */
X
X    local = (*h)(pred,foll);           /* get initial hash value */
X
X    if(!string_tab[local].used)        /* if that spot is free */
X         return local;                 /* then that's all we need */
X
X    else                               /* else a collision has occured */
X    {    local = eolist(local);        /* move to last duplicate */
X
X         /*   We must find an empty spot. We start looking 101 places
X              down the table from the last duplicate.
X         */
X
X         tempnext = (local+101) & 0x0FFF;
X         ep = &string_tab[tempnext];   /* initialize pointer */
X
X         while(ep->used)               /* while empty spot not found */
X         {    if(++tempnext==TABSIZE)  /* if we are at the end */
X              {    tempnext = 0;       /* wrap to beginning of table*/
X                   ep = string_tab;
X              }
X              else ++ep;               /* point to next element in table */
X         }
X
X         /*   local still has the pointer to the last duplicate, while
X              tempnext has the pointer to the spot we found.  We use
X              this to maintain the chain of pointers to duplicates.
X         */
X
X         string_tab[local].next = tempnext;
X
X         return tempnext;
X    }
X}
X
X/*  The unhash() function is used to search the hash table for a given key.
X    Like hash(), it performs a hash and linear probe search.  It returns
X    either the number of the entry (if found) or NOT_FND (if not found).
X*/
X
Xstatic unsigned INT unhash(pred,foll)      /* search string table for a key */
Xunsigned INT pred;                     /* code of preceeding string */
Xunsigned char foll;                    /* character following string */
X{
X    unsigned INT local, offset;        /* scratch storage */
X    struct entry *ep;                  /* this speeds up access */
X
X    local = (*h)(pred,foll);           /* initial hash */
X
X    while(1)
X    {    ep = &string_tab[local];      /* speed up table access */
X
X         if((ep->predecessor==pred) && (ep->follower==foll))
X              return local;            /* we have a match */
X
X         if(!ep->next)                 /* if no more duplicates */
X              return NOT_FND;          /* then key is not listed */
X
X         local = ep->next;             /* move on to next duplicate */
X    }
X}
X
X/*  The init_tab() routine is used to initialize our hash table.
X    You realize, of course, that "initialize" is a complete misnomer.
X*/
X
Xstatic INT init_tab()                      /* set ground state in hash table */
X{
X    unsigned INT i;                    /* table index */
X    INT upd_tab();
X
X    setmem((char *)string_tab,sizeof(string_tab),0);
X
X    for(i=0; i<256; i++)               /* list all single byte strings */
X         upd_tab(NO_PRED,i);
X
X    inbuf = EMPTY;                     /* nothing is in our buffer */
X}
X
X/*  The upd_tab routine is used to add a new entry to the string table.
X    As previously stated, no checks are made to ensure that the table
X    has any room.  This must be done elsewhere.
X*/
X
XINT upd_tab(pred,foll)                     /* add an entry to the table */
Xunsigned INT pred;                     /* code for preceeding string */
Xunsigned INT foll;                     /* character which follows string */
X{
X    struct entry *ep;                  /* pointer to current entry */
X
X    /* calculate offset just once */
X
X    ep = &string_tab[hash(pred,foll)];
X
X    ep->used = TRUE;                   /* this spot is now in use */
X    ep->next = 0;                      /* no duplicates after this yet */
X    ep->predecessor = pred;            /* note code of preceeding string */
X    ep->follower = foll;               /* note char after string */
X}
X
X/*  This algorithm encoded a file into twelve bit strings (three nybbles).
X    The gocode() routine is used to read these strings a byte (or two)
X    at a time.
X*/
X
Xstatic INT gocode(fd)                      /* read in a twelve bit code */
XFILE *fd;                              /* file to get code from */
X{
X    unsigned INT localbuf, returnval;
X
X    if(inbuf==EMPTY)                   /* if on a code boundary */
X    {    if((localbuf=getc_unp(fd))==EOF)   /* get start of next code */
X              return EOF;              /* pass back end of file status */
X         localbuf &= 0xFF;             /* mask down to true byte value */
X         if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
X              return EOF;              /* this should never happen */
X         inbuf &= 0xFF;                /* mask down to true byte value */
X
X         returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
X         inbuf &= 0x000F;              /* leave partial code pending */
X    }
X
X    else                               /* buffer contains first nybble */
X    {    if((localbuf=getc_unp(fd))==EOF)
X              return EOF;
X         localbuf &= 0xFF;
X
X         returnval = localbuf + ((inbuf<<8)&0xF00);
X         inbuf = EMPTY;                /* note no hanging nybbles */
X    }
X    return returnval;                  /* pass back assembled code */
X}
X
Xstatic INT push(c)                         /* push char onto stack */
XINT c;                                 /* character to push */
X{
X    stack[sp] = ((char) c);            /* coerce integer into a char */
X
X    if(++sp >= TABSIZE)
X         abort("Stack overflow\n");
X}
X
Xstatic INT pop()                       /* pop character from stack */
X{
X    if(sp>0)
X         return ((INT) stack[--sp]);   /* leave ptr at next empty slot */
X
X    else return EMPTY;
X}
X
X/***** LEMPEL-ZEV DECOMPRESSION *****/
X
Xstatic INT code_count;                 /* needed to detect table full */
Xstatic unsigned INT code;                  /* where we are so far */
Xstatic INT firstc;                     /* true only on first character */
X
XINT init_ucr(new)                          /* get set for uncrunching */
XINT new;                               /* true to use new hash function */
X{
X    if(new)                            /* set proper hash function */
X         h = newh;
X    else h = oldh;
X
X    sp = 0;                            /* clear out the stack */
X    init_tab();                        /* set up atomic code definitions */
X    code_count = TABSIZE - 256;        /* note space left in table */
X    firstc = 1;                        /* true only on first code */
X}
X
XINT getc_ucr(f)                        /* get next uncrunched byte */
XFILE *f;                               /* file containing crunched data */
X{
X    unsigned INT c;                    /* a character of input */
X INT code, newcode;
X    static INT oldcode, finchar;
X    struct entry *ep;                  /* allows faster table handling */
X
X    if(firstc)                         /* first code is always known */
X    {    firstc = FALSE;               /* but next will not be first */
X         oldcode = gocode(f);
X         return finchar = string_tab[oldcode].follower;
X    }
X
X    if(!sp)                            /* if stack is empty */
X    {    if((code=newcode=gocode(f))==EOF)
X              return EOF;
X
X         ep = &string_tab[code];       /* initialize pointer */
X
X         if(!ep->used)                 /* if code isn't known */
X         {    code = oldcode;
X              ep = &string_tab[code];  /* re-initialize pointer */
X              push(finchar);
X         }
X
X         while(ep->predecessor!=NO_PRED)
X         {    push(ep->follower);      /* decode string backwards */
X              code = ep->predecessor;
X              ep = &string_tab[code];
X         }
X
X         push(finchar=ep->follower);   /* save first character also */
X
X         /*   The above loop will terminate, one way or another,
X              with string_tab[code].follower equal to the first
X              character in the string.
X         */
X
X         if(code_count)                /* if room left in string table */
X         {    upd_tab(oldcode,finchar);
X              --code_count;
X         }
X
X         oldcode = newcode;
X    }
X
X    return pop();                      /* return saved character */
X}
X
$ GoSub Convert_File
$ Goto Part09