[comp.sources.misc] arc that UnSquashes 2/2

ddl@husc6.UUCP (Dan Lanciani) (07/18/87)

-----------continue here------------

static unsigned INT eolist(index)          /* find last duplicate */
unsigned INT index;
{
 INT temp;

    while(temp=string_tab[index].next) /* while more duplicates */
         index = temp;

    return index;
}

/*  The hash() routine is used to find a spot in the hash table for a new
    entry.  It performs a "hash and linear probe" lookup, using h() to
    calculate the starting hash value and eolist() to perform the linear
    probe.  This routine DOES NOT detect a table full condition.  That
    MUST be checked for elsewhere.
*/

static unsigned INT hash(pred,foll)        /* find spot in the string table */
unsigned INT pred;                     /* code for preceeding string */
unsigned char foll;                    /* char following string */
{
    unsigned INT local, tempnext;      /* scratch storage */
    struct entry *ep;                  /* allows faster table handling */

    local = (*h)(pred,foll);           /* get initial hash value */

    if(!string_tab[local].used)        /* if that spot is free */
         return local;                 /* then that's all we need */

    else                               /* else a collision has occured */
    {    local = eolist(local);        /* move to last duplicate */

         /*   We must find an empty spot. We start looking 101 places
              down the table from the last duplicate.
         */

         tempnext = (local+101) & 0x0FFF;
         ep = &string_tab[tempnext];   /* initialize pointer */

         while(ep->used)               /* while empty spot not found */
         {    if(++tempnext==TABSIZE)  /* if we are at the end */
              {    tempnext = 0;       /* wrap to beginning of table*/
                   ep = string_tab;
              }
              else ++ep;               /* point to next element in table */
         }

         /*   local still has the pointer to the last duplicate, while
              tempnext has the pointer to the spot we found.  We use
              this to maintain the chain of pointers to duplicates.
         */

         string_tab[local].next = tempnext;

         return tempnext;
    }
}

/*  The unhash() function is used to search the hash table for a given key.
    Like hash(), it performs a hash and linear probe search.  It returns
    either the number of the entry (if found) or NOT_FND (if not found).
*/

static unsigned INT unhash(pred,foll)      /* search string table for a key */
unsigned INT pred;                     /* code of preceeding string */
unsigned char foll;                    /* character following string */
{
    unsigned INT local, offset;        /* scratch storage */
    struct entry *ep;                  /* this speeds up access */

    local = (*h)(pred,foll);           /* initial hash */

    while(1)
    {    ep = &string_tab[local];      /* speed up table access */

         if((ep->predecessor==pred) && (ep->follower==foll))
              return local;            /* we have a match */

         if(!ep->next)                 /* if no more duplicates */
              return NOT_FND;          /* then key is not listed */

         local = ep->next;             /* move on to next duplicate */
    }
}

/*  The init_tab() routine is used to initialize our hash table.
    You realize, of course, that "initialize" is a complete misnomer.
*/

static INT init_tab()                      /* set ground state in hash table */
{
    unsigned INT i;                    /* table index */
    INT upd_tab();

    setmem((char *)string_tab,sizeof(string_tab),0);

    for(i=0; i<256; i++)               /* list all single byte strings */
         upd_tab(NO_PRED,i);

    inbuf = EMPTY;                     /* nothing is in our buffer */
}

/*  The upd_tab routine is used to add a new entry to the string table.
    As previously stated, no checks are made to ensure that the table
    has any room.  This must be done elsewhere.
*/

INT upd_tab(pred,foll)                     /* add an entry to the table */
unsigned INT pred;                     /* code for preceeding string */
unsigned INT foll;                     /* character which follows string */
{
    struct entry *ep;                  /* pointer to current entry */

    /* calculate offset just once */

    ep = &string_tab[hash(pred,foll)];

    ep->used = TRUE;                   /* this spot is now in use */
    ep->next = 0;                      /* no duplicates after this yet */
    ep->predecessor = pred;            /* note code of preceeding string */
    ep->follower = foll;               /* note char after string */
}

/*  This algorithm encoded a file into twelve bit strings (three nybbles).
    The gocode() routine is used to read these strings a byte (or two)
    at a time.
*/

static INT gocode(fd)                      /* read in a twelve bit code */
FILE *fd;                              /* file to get code from */
{
    unsigned INT localbuf, returnval;

    if(inbuf==EMPTY)                   /* if on a code boundary */
    {    if((localbuf=getc_unp(fd))==EOF)   /* get start of next code */
              return EOF;              /* pass back end of file status */
         localbuf &= 0xFF;             /* mask down to true byte value */
         if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
              return EOF;              /* this should never happen */
         inbuf &= 0xFF;                /* mask down to true byte value */

         returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
         inbuf &= 0x000F;              /* leave partial code pending */
    }

    else                               /* buffer contains first nybble */
    {    if((localbuf=getc_unp(fd))==EOF)
              return EOF;
         localbuf &= 0xFF;

         returnval = localbuf + ((inbuf<<8)&0xF00);
         inbuf = EMPTY;                /* note no hanging nybbles */
    }
    return returnval;                  /* pass back assembled code */
}

static INT push(c)                         /* push char onto stack */
INT c;                                 /* character to push */
{
    stack[sp] = ((char) c);            /* coerce integer into a char */

    if(++sp >= TABSIZE)
         abort("Stack overflow\n");
}

static INT pop()                       /* pop character from stack */
{
    if(sp>0)
         return ((INT) stack[--sp]);   /* leave ptr at next empty slot */

    else return EMPTY;
}

/***** LEMPEL-ZEV DECOMPRESSION *****/

static INT code_count;                 /* needed to detect table full */
static unsigned INT code;                  /* where we are so far */
static INT firstc;                     /* true only on first character */

INT init_ucr(new)                          /* get set for uncrunching */
INT new;                               /* true to use new hash function */
{
    if(new)                            /* set proper hash function */
         h = newh;
    else h = oldh;

    sp = 0;                            /* clear out the stack */
    init_tab();                        /* set up atomic code definitions */
    code_count = TABSIZE - 256;        /* note space left in table */
    firstc = 1;                        /* true only on first code */
}

INT getc_ucr(f)                        /* get next uncrunched byte */
FILE *f;                               /* file containing crunched data */
{
    unsigned INT c;                    /* a character of input */
 INT code, newcode;
    static INT oldcode, finchar;
    struct entry *ep;                  /* allows faster table handling */

    if(firstc)                         /* first code is always known */
    {    firstc = FALSE;               /* but next will not be first */
         oldcode = gocode(f);
         return finchar = string_tab[oldcode].follower;
    }

    if(!sp)                            /* if stack is empty */
    {    if((code=newcode=gocode(f))==EOF)
              return EOF;

         ep = &string_tab[code];       /* initialize pointer */

         if(!ep->used)                 /* if code isn't known */
         {    code = oldcode;
              ep = &string_tab[code];  /* re-initialize pointer */
              push(finchar);
         }

         while(ep->predecessor!=NO_PRED)
         {    push(ep->follower);      /* decode string backwards */
              code = ep->predecessor;
              ep = &string_tab[code];
         }

         push(finchar=ep->follower);   /* save first character also */

         /*   The above loop will terminate, one way or another,
              with string_tab[code].follower equal to the first
              character in the string.
         */

         if(code_count)                /* if room left in string table */
         {    upd_tab(oldcode,finchar);
              --code_count;
         }

         oldcode = newcode;
    }

    return pop();                      /* return saved character */
}
SHAR_EOF
fi # end of overwriting check
if test -f 'arcm.h'
then
	echo shar: will not over-write existing file "'arcm.h'"
else
cat << \SHAR_EOF > 'arcm.h'
/*
 * $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $
 */

/*
 * $Log:	arcm.h,v $
 * Revision 1.2  86/07/15  07:53:40  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:01:25  turner
 * initial version
 * 
 * 
 */

/*
 *
 * ARC archive utility - standard MACRO insert file.
 *
 * parameters:
 *
 */

#define ARCMARK 26 /*                   special archive marker        */
#define ARCVER 9   /*                   archive header version code   */
#define STRLEN 100 /*                   system standard string length */
#define FNLEN 13   /*                   file name length              */
#define MAXARG 25  /*                   maximum number of arguments   */

#define ARC 1
#define XARC 0
SHAR_EOF
fi # end of overwriting check
if test -f 'arcmatch.c'
then
	echo shar: will not over-write existing file "'arcmatch.c'"
else
cat << \SHAR_EOF > 'arcmatch.c'
static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $";

/*
 * $Log:	arcmatch.c,v $
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
 *
 * Revision 1.2  86/07/15  07:53:42  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:00:34  turner
 * initial version
 * 
 * 
 */

/*  ARC - Archive utility - ARCMATCH

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.17), created on $tag(
TED_DATE DB =12/17/85) at $tag(
TED_TIME DB =20:32:18))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
         This file contains service routines needed to maintain an archive.

    Language:
         Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

INT match(n,t)                         /* test name against template */
char *n;                               /* name to test */
char *t;                               /* template to test against */
{

#if MSDOS
    upper(n); upper(t);                /* avoid case problems */
#endif

    /* first match name part */

    while((*n && *n!='.') || (*t && *t!='.'))
    {    if(*n!=*t && *t!='?')         /* match fail? */
         {    if(*t!='*')              /* wildcard fail? */
                   return 0;           /* then no match */
              else                     /* else jump over wildcard */
              {    while(*n && *n!='.')
                        n++;
                   while(*t && *t!='.')
                        t++;
                   break;              /* name part matches wildcard */
              }
         }
         else                          /* match good for this char */
         {    n++;                     /* advance to next char */
              t++;
         }
    }

    if(*n && *n=='.') n++;             /* skip extension delimiters */
    if(*t && *t=='.') t++;

    /* now match name part */

    while(*n || *t)
    {    if(*n!=*t && *t!='?')         /* match fail? */
         {    if(*t!='*')              /* wildcard fail? */
                   return 0;           /* then no match */
              else return 1;           /* else good enough */
         }
         else                          /* match good for this char */
         {    n++;                     /* advance to next char */
              t++;
         }
    }

    return 1;                          /* match worked */
}

INT rempath(nargs,arg)                     /* remove paths from filenames */
INT nargs;                             /* number of names */
char *arg[];                           /* pointers to names */
{
    char *i, *rindex();                /* string index, reverse indexer */
 INT n;                             /* index */

    for(n=0; n<nargs; n++)             /* for each supplied name */
    {    if(!(i=rindex(arg[n],'\\')))  /* search for end of path */
              if(!(i=rindex(arg[n],'/')))
                   i=rindex(arg[n],':');
         if(i)                         /* if path was found */
              arg[n] = i+1;            /* then skip it */
    }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'arcmisc.c'
then
	echo shar: will not over-write existing file "'arcmisc.c'"
else
cat << \SHAR_EOF > 'arcmisc.c'
#include <stdio.h>
#include "arc.h"

/*	split up a file name (subroutine for makefnam)
 *
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
*/

static INT _makefn(source,dest)
unsigned char *source;
unsigned char *dest;
{
 INT j;

    setmem (dest, 17, 0);	/* clear result field */
    if (strlen (source) > 1 && source[1] == ':')
	for (j = 0; j < 2;)
	    dest[j++] = *source++;
    for (j = 3; *source && *source != '.'; ++source)
	if (j < 11)
	    dest[j++] = *source;
    for (j = 12; *source; ++source)
	if (j < 16)
	    dest[j++] = *source;
}
/*	make a file name using a template
*/

char *makefnam(rawfn,template,result)
unsigned char *rawfn;			/* the original file name */
unsigned char *template;		/* the template data */
unsigned char *result;			/* where to place the result */
{
  unsigned char et[17],er[17];

  _makefn(template,et);
  _makefn(rawfn,er);
  *result=0;			/* assure no data */
  strcat(result,er[0]?er:et);
  strcat(result,er[3]?er+3:et+3);
  strcat(result,er[12]?er+12:et+12);
  return ((char *)&result[0]);
}

INT freedir(dirs)
register struct direct **dirs;
{
	register INT ii;

	if(dirs == (struct direct **)0)
		return(-1);
	for(ii = 0; dirs[ii] != (struct direct *)0; ii++)
		free(dirs[ii]);
	free(dirs);
	return(0);
}

#if MSDOS
#include <dir.h>

INT alphasort(dirptr1, dirptr2)
struct direct **dirptr1, **dirptr2;
{
	return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name));
}

#endif
SHAR_EOF
fi # end of overwriting check
if test -f 'arcpack.c'
then
	echo shar: will not over-write existing file "'arcpack.c'"
else
cat << \SHAR_EOF > 'arcpack.c'
static char *RCSid = "$Header: arcpack.c,v 1.2 86/07/15 07:53:48 turner Exp $";

/*
 * $Log:	arcpack.c,v $
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
 *
 * Revision 1.2  86/07/15  07:53:48  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:00:37  turner
 * initial version
 * 
 * 
 */

/*  ARC - Archive utility - ARCPACK

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.37), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:58:01))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
         This file contains the routines used to compress a file
         when placing it in an archive.

    Language:
         Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

/* stuff for non-repeat packing */

#define DLE 0x90                       /* repeat sequence marker */

static unsigned char state;            /* current packing state */

/* non-repeat packing states */

#define NOHIST  0                      /* don't consider previous input*/
#define SENTCHAR 1                     /* lastchar set, no lookahead yet */
#define SENDNEWC 2                     /* run over, send new char next */
#define SENDCNT 3                      /* newchar set, send count next */

/* packing results */

static long stdlen;                    /* length for standard packing */
static INT crcval;                     /* CRC check value */

INT pack(f,t,hdr)                          /* pack file into an archive */
FILE *f, *t;                           /* source, destination */
struct heads *hdr;                     /* pointer to header data */
{
 INT c;                             /* one character of stream */
    long ncrlen;                       /* length after packing */
    long huflen;                       /* length after squeezing */
    long lzwlen;                       /* length after crunching */
    long pred_sq(), file_sq();         /* stuff for squeezing */
    long pred_cm();                    /* dynamic crunching cleanup */
    char tnam[STRLEN];                /* temporary name buffer */
    char *makefnam();                  /* filename fixer upper */
    FILE *crn = NULL;                  /* temporary crunch file */
    INT getch();
    INT getc_ncr();
    INT putc_pak();

    /* first pass - see which method is best */

    if(!nocomp)                        /* if storage kludge not active */
    {    if(note)
            { printf(" analyzing, "); fflush(stdout);}

         if(arctemp)                   /* use temp area if specified */
              sprintf(tnam,"%s.CRN",arctemp);
         else makefnam("$ARCTEMP.CRN",arcname,tnam);
#if MSDOS
         crn = fopen(tnam,"wrb");
#endif
#if BSD | ST
         crn = fopen(tnam,"w+");
#endif
         state = NOHIST;               /* initialize ncr packing */
         stdlen =  ncrlen = 0;         /* reset size counters */
         crcval = 0;                   /* initialize CRC check value */
         setcode();                    /* initialize encryption */

         init_cm(f,crn);               /* initialize for crunching */
         init_sq();                    /* initialize for squeeze scan */

         while((c=getc_ncr(f))!=EOF)   /* for each byte of file */
         {    ncrlen++;                /* one more packed byte */
              scan_sq(c);              /* see what squeezing can do */
              putc_cm(c,crn);          /* see what crunching can do */
         }
         huflen = pred_sq();           /* finish up after squeezing */
         lzwlen = pred_cm(crn);        /* finish up after crunching */
    }
    else                               /* else kludge the method */
    {    stdlen = 0;                   /* make standard look best */
         ncrlen = huflen = lzwlen = 1;
    }

    /* standard set-ups common to all methods */

    fseek(f,0L,0);                     /* rewind input */
    hdr->crc = crcval;                 /* note CRC check value */
    hdr->length = stdlen;              /* set actual file length */
    state = NOHIST;                    /* reinitialize ncr packing */
    setcode();                         /* reinitialize encryption */

    /* choose and use the shortest method */

    if(stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-stdlen);
         if(note)
          { printf("storing, "); fflush(stdout);}/* store w/out compression */
         hdrver = 2;                   /* note packing method */
         stdlen = crcval = 0;          /* recalc these for kludge */
         while((c=getch(f))!=EOF)      /* store it straight */
              putc_pak(c,t);
         hdr->crc = crcval;
         hdr->length = hdr->size = stdlen;
    }

    else if(ncrlen<huflen && ncrlen<lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-ncrlen);
         if(note)
              { printf("packing, ");
              fflush(stdout);} /* pack w/repeat suppression */
         hdrver = 3;                   /* note packing method */
         hdr->size = ncrlen;           /* set data length */
         while((c=getc_ncr(f))!=EOF)
              putc_pak(c,t);
    }

    else if(huflen<lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-huflen);
         if(note)
            { printf("squeezing, "); fflush(stdout);}
         hdrver = 4;                   /* note packing method */
         hdr->size = file_sq(f,t);     /* note final size */
    }

    else
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",huflen-lzwlen);
         if(note)
            { printf("crunching, "); fflush(stdout);}
         hdrver = 8;
         hdr->size = lzwlen;           /* size should not change */
         if(crn)                       /* if temp was created */
         {    fseek(crn,0L,0);         /* then copy over crunched temp */
              while((c=fgetc(crn))!=EOF)
                   putc_tst(c,t);
         }
         else                          /* else re-crunch */
         {    init_cm(f,t);
              while((c=getc_ncr(f))!=EOF)
                   putc_cm(c,t);
              pred_cm(t);              /* finish up after crunching */
         }
    }

    /* standard cleanups common to all methods */

    if(crn)                            /* get rid of crunch temporary */
    {    fclose(crn);
         if(unlink(tnam) && warn)
         {    printf("Cannot delete temporary file %s\n",tnam);
              nerrs++;
         }
    }
    if(note)
         printf("done.\n");
}

/*  Non-repeat compression - text is passed through normally, except that
    a run of more than two is encoded as:

         <char> <DLE> <count>

    Special case: a count of zero indicates that the DLE is really a DLE,
    not a repeat marker.
*/

INT getc_ncr(f)                        /* get bytes with collapsed runs */
FILE *f;                               /* file to get from */
{
    static INT lastc;                  /* value returned on last call */
    static INT repcnt;                 /* repetition counter */
    static INT c;                      /* latest value seen */

    switch(state)                      /* depends on our state */
    {
    case NOHIST:                       /* no relevant history */
         state = S