[alt.sources] arc that UnSquashes 2/2

ddl@husc6.UUCP (07/16/87)

-----------continue here------------

static unsigned INT eolist(index)          /* find last duplicate */
unsigned INT index;
{
 INT temp;

    while(temp=string_tab[index].next) /* while more duplicates */
         index = temp;

    return index;
}

/*  The hash() routine is used to find a spot in the hash table for a new
    entry.  It performs a "hash and linear probe" lookup, using h() to
    calculate the starting hash value and eolist() to perform the linear
    probe.  This routine DOES NOT detect a table full condition.  That
    MUST be checked for elsewhere.
*/

static unsigned INT hash(pred,foll)        /* find spot in the string table */
unsigned INT pred;                     /* code for preceeding string */
unsigned char foll;                    /* char following string */
{
    unsigned INT local, tempnext;      /* scratch storage */
    struct entry *ep;                  /* allows faster table handling */

    local = (*h)(pred,foll);           /* get initial hash value */

    if(!string_tab[local].used)        /* if that spot is free */
         return local;                 /* then that's all we need */

    else                               /* else a collision has occured */
    {    local = eolist(local);        /* move to last duplicate */

         /*   We must find an empty spot. We start looking 101 places
              down the table from the last duplicate.
         */

         tempnext = (local+101) & 0x0FFF;
         ep = &string_tab[tempnext];   /* initialize pointer */

         while(ep->used)               /* while empty spot not found */
         {    if(++tempnext==TABSIZE)  /* if we are at the end */
              {    tempnext = 0;       /* wrap to beginning of table*/
                   ep = string_tab;
              }
              else ++ep;               /* point to next element in table */
         }

         /*   local still has the pointer to the last duplicate, while
              tempnext has the pointer to the spot we found.  We use
              this to maintain the chain of pointers to duplicates.
         */

         string_tab[local].next = tempnext;

         return tempnext;
    }
}

/*  The unhash() function is used to search the hash table for a given key.
    Like hash(), it performs a hash and linear probe search.  It returns
    either the number of the entry (if found) or NOT_FND (if not found).
*/

static unsigned INT unhash(pred,foll)      /* search string table for a key */
unsigned INT pred;                     /* code of preceeding string */
unsigned char foll;                    /* character following string */
{
    unsigned INT local, offset;        /* scratch storage */
    struct entry *ep;                  /* this speeds up access */

    local = (*h)(pred,foll);           /* initial hash */

    while(1)
    {    ep = &string_tab[local];      /* speed up table access */

         if((ep->predecessor==pred) && (ep->follower==foll))
              return local;            /* we have a match */

         if(!ep->next)                 /* if no more duplicates */
              return NOT_FND;          /* then key is not listed */

         local = ep->next;             /* move on to next duplicate */
    }
}

/*  The init_tab() routine is used to initialize our hash table.
    You realize, of course, that "initialize" is a complete misnomer.
*/

static INT init_tab()                      /* set ground state in hash table */
{
    unsigned INT i;                    /* table index */
    INT upd_tab();

    setmem((char *)string_tab,sizeof(string_tab),0);

    for(i=0; i<256; i++)               /* list all single byte strings */
         upd_tab(NO_PRED,i);

    inbuf = EMPTY;                     /* nothing is in our buffer */
}

/*  The upd_tab routine is used to add a new entry to the string table.
    As previously stated, no checks are made to ensure that the table
    has any room.  This must be done elsewhere.
*/

INT upd_tab(pred,foll)                     /* add an entry to the table */
unsigned INT pred;                     /* code for preceeding string */
unsigned INT foll;                     /* character which follows string */
{
    struct entry *ep;                  /* pointer to current entry */

    /* calculate offset just once */

    ep = &string_tab[hash(pred,foll)];

    ep->used = TRUE;                   /* this spot is now in use */
    ep->next = 0;                      /* no duplicates after this yet */
    ep->predecessor = pred;            /* note code of preceeding string */
    ep->follower = foll;               /* note char after string */
}

/*  This algorithm encoded a file into twelve bit strings (three nybbles).
    The gocode() routine is used to read these strings a byte (or two)
    at a time.
*/

static INT gocode(fd)                      /* read in a twelve bit code */
FILE *fd;                              /* file to get code from */
{
    unsigned INT localbuf, returnval;

    if(inbuf==EMPTY)                   /* if on a code boundary */
    {    if((localbuf=getc_unp(fd))==EOF)   /* get start of next code */
              return EOF;              /* pass back end of file status */
         localbuf &= 0xFF;             /* mask down to true byte value */
         if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
              return EOF;              /* this should never happen */
         inbuf &= 0xFF;                /* mask down to true byte value */

         returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
         inbuf &= 0x000F;              /* leave partial code pending */
    }

    else                               /* buffer contains first nybble */
    {    if((localbuf=getc_unp(fd))==EOF)
              return EOF;
         localbuf &= 0xFF;

         returnval = localbuf + ((inbuf<<8)&0xF00);
         inbuf = EMPTY;                /* note no hanging nybbles */
    }
    return returnval;                  /* pass back assembled code */
}

static INT push(c)                         /* push char onto stack */
INT c;                                 /* character to push */
{
    stack[sp] = ((char) c);            /* coerce integer into a char */

    if(++sp >= TABSIZE)
         abort("Stack overflow\n");
}

static INT pop()                       /* pop character from stack */
{
    if(sp>0)
         return ((INT) stack[--sp]);   /* leave ptr at next empty slot */

    else return EMPTY;
}

/***** LEMPEL-ZEV DECOMPRESSION *****/

static INT code_count;                 /* needed to detect table full */
static unsigned INT code;                  /* where we are so far */
static INT firstc;                     /* true only on first character */

INT init_ucr(new)                          /* get set for uncrunching */
INT new;                               /* true to use new hash function */
{
    if(new)                            /* set proper hash function */
         h = newh;
    else h = oldh;

    sp = 0;                            /* clear out the stack */
    init_tab();                        /* set up atomic code definitions */
    code_count = TABSIZE - 256;        /* note space left in table */
    firstc = 1;                        /* true only on first code */
}

INT getc_ucr(f)                        /* get next uncrunched byte */
FILE *f;                               /* file containing crunched data */
{
    unsigned INT c;                    /* a character of input */
 INT code, newcode;
    static INT oldcode, finchar;
    struct entry *ep;                  /* allows faster table handling */

    if(firstc)                         /* first code is always known */
    {    firstc = FALSE;               /* but next will not be first */
         oldcode = gocode(f);
         return finchar = string_tab[oldcode].follower;
    }

    if(!sp)                            /* if stack is empty */
    {    if((code=newcode=gocode(f))==EOF)
              return EOF;

         ep = &string_tab[code];       /* initialize pointer */

         if(!ep->used)                 /* if code isn't known */
         {    code = oldcode;
              ep = &string_tab[code];  /* re-initialize pointer */
              push(finchar);
         }

         while(ep->predecessor!=NO_PRED)
         {    push(ep->follower);      /* decode string backwards */
              code = ep->predecessor;
              ep = &string_tab[code];
         }

         push(finchar=ep->follower);   /* save first character also */

         /*   The above loop will terminate, one way or another,
              with string_tab[code].follower equal to the first
              character in the string.
         */

         if(code_count)                /* if room left in string table */
         {    upd_tab(oldcode,finchar);
              --code_count;
         }

         oldcode = newcode;
    }

    return pop();                      /* return saved character */
}
SHAR_EOF
fi # end of overwriting check
if test -f 'arcm.h'
then
	echo shar: will not over-write existing file "'arcm.h'"
else
cat << \SHAR_EOF > 'arcm.h'
/*
 * $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $
 */

/*
 * $Log:	arcm.h,v $
 * Revision 1.2  86/07/15  07:53:40  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:01:25  turner
 * initial version
 * 
 * 
 */

/*
 *
 * ARC archive utility - standard MACRO insert file.
 *
 * parameters:
 *
 */

#define ARCMARK 26 /*                   special archive marker        */
#define ARCVER 9   /*                   archive header version code   */
#define STRLEN 100 /*                   system standard string length */
#define FNLEN 13   /*                   file name length              */
#define MAXARG 25  /*                   maximum number of arguments   */

#define ARC 1
#define XARC 0
SHAR_EOF
fi # end of overwriting check
if test -f 'arcmatch.c'
then
	echo shar: will not over-write existing file "'arcmatch.c'"
else
cat << \SHAR_EOF > 'arcmatch.c'
static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $";

/*
 * $Log:	arcmatch.c,v $
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
 *
 * Revision 1.2  86/07/15  07:53:42  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:00:34  turner
 * initial version
 * 
 * 
 */

/*  ARC - Archive utility - ARCMATCH

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.17), created on $tag(
TED_DATE DB =12/17/85) at $tag(
TED_TIME DB =20:32:18))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
         This file contains service routines needed to maintain an archive.

    Language:
         Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

INT match(n,t)                         /* test name against template */
char *n;                               /* name to test */
char *t;                               /* template to test against */
{

#if MSDOS
    upper(n); upper(t);                /* avoid case problems */
#endif

    /* first match name part */

    while((*n && *n!='.') || (*t && *t!='.'))
    {    if(*n!=*t && *t!='?')         /* match fail? */
         {    if(*t!='*')              /* wildcard fail? */
                   return 0;           /* then no match */
              else                     /* else jump over wildcard */
              {    while(*n && *n!='.')
                        n++;
                   while(*t && *t!='.')
                        t++;
                   break;              /* name part matches wildcard */
              }
         }
         else                          /* match good for this char */
         {    n++;                     /* advance to next char */
              t++;
         }
    }

    if(*n && *n=='.') n++;             /* skip extension delimiters */
    if(*t && *t=='.') t++;

    /* now match name part */

    while(*n || *t)
    {    if(*n!=*t && *t!='?')         /* match fail? */
         {    if(*t!='*')              /* wildcard fail? */
                   return 0;           /* then no match */
              else return 1;           /* else good enough */
         }
         else                          /* match good for this char */
         {    n++;                     /* advance to next char */
              t++;
         }
    }

    return 1;                          /* match worked */
}

INT rempath(nargs,arg)                     /* remove paths from filenames */
INT nargs;                             /* number of names */
char *arg[];                           /* pointers to names */
{
    char *i, *rindex();                /* string index, reverse indexer */
 INT n;                             /* index */

    for(n=0; n<nargs; n++)             /* for each supplied name */
    {    if(!(i=rindex(arg[n],'\\')))  /* search for end of path */
              if(!(i=rindex(arg[n],'/')))
                   i=rindex(arg[n],':');
         if(i)                         /* if path was found */
              arg[n] = i+1;            /* then skip it */
    }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'arcmisc.c'
then
	echo shar: will not over-write existing file "'arcmisc.c'"
else
cat << \SHAR_EOF > 'arcmisc.c'
#include <stdio.h>
#include "arc.h"

/*	split up a file name (subroutine for makefnam)
 *
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
*/

static INT _makefn(source,dest)
unsigned char *source;
unsigned char *dest;
{
 INT j;

    setmem (dest, 17, 0);	/* clear result field */
    if (strlen (source) > 1 && source[1] == ':')
	for (j = 0; j < 2;)
	    dest[j++] = *source++;
    for (j = 3; *source && *source != '.'; ++source)
	if (j < 11)
	    dest[j++] = *source;
    for (j = 12; *source; ++source)
	if (j < 16)
	    dest[j++] = *source;
}
/*	make a file name using a template
*/

char *makefnam(rawfn,template,result)
unsigned char *rawfn;			/* the original file name */
unsigned char *template;		/* the template data */
unsigned char *result;			/* where to place the result */
{
  unsigned char et[17],er[17];

  _makefn(template,et);
  _makefn(rawfn,er);
  *result=0;			/* assure no data */
  strcat(result,er[0]?er:et);
  strcat(result,er[3]?er+3:et+3);
  strcat(result,er[12]?er+12:et+12);
  return ((char *)&result[0]);
}

INT freedir(dirs)
register struct direct **dirs;
{
	register INT ii;

	if(dirs == (struct direct **)0)
		return(-1);
	for(ii = 0; dirs[ii] != (struct direct *)0; ii++)
		free(dirs[ii]);
	free(dirs);
	return(0);
}

#if MSDOS
#include <dir.h>

INT alphasort(dirptr1, dirptr2)
struct direct **dirptr1, **dirptr2;
{
	return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name));
}

#endif
SHAR_EOF
fi # end of overwriting check
if test -f 'arcpack.c'
then
	echo shar: will not over-write existing file "'arcpack.c'"
else
cat << \SHAR_EOF > 'arcpack.c'
static char *RCSid = "$Header: arcpack.c,v 1.2 86/07/15 07:53:48 turner Exp $";

/*
 * $Log:	arcpack.c,v $
 * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
 * 	Bludgeoned into submission for VAX 11/780 BSD4.2
 *	(ugly code, but fewer core dumps)
 *
 * Revision 1.2  86/07/15  07:53:48  turner
 * 
 * 
 * Revision 1.1  86/06/26  15:00:37  turner
 * initial version
 * 
 * 
 */

/*  ARC - Archive utility - ARCPACK

$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =3.37), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:58:01))#
$undefine(tag)#
    $version

(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
         This file contains the routines used to compress a file
         when placing it in an archive.

    Language:
         Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"

/* stuff for non-repeat packing */

#define DLE 0x90                       /* repeat sequence marker */

static unsigned char state;            /* current packing state */

/* non-repeat packing states */

#define NOHIST  0                      /* don't consider previous input*/
#define SENTCHAR 1                     /* lastchar set, no lookahead yet */
#define SENDNEWC 2                     /* run over, send new char next */
#define SENDCNT 3                      /* newchar set, send count next */

/* packing results */

static long stdlen;                    /* length for standard packing */
static INT crcval;                     /* CRC check value */

INT pack(f,t,hdr)                          /* pack file into an archive */
FILE *f, *t;                           /* source, destination */
struct heads *hdr;                     /* pointer to header data */
{
 INT c;                             /* one character of stream */
    long ncrlen;                       /* length after packing */
    long huflen;                       /* length after squeezing */
    long lzwlen;                       /* length after crunching */
    long pred_sq(), file_sq();         /* stuff for squeezing */
    long pred_cm();                    /* dynamic crunching cleanup */
    char tnam[STRLEN];                /* temporary name buffer */
    char *makefnam();                  /* filename fixer upper */
    FILE *crn = NULL;                  /* temporary crunch file */
    INT getch();
    INT getc_ncr();
    INT putc_pak();

    /* first pass - see which method is best */

    if(!nocomp)                        /* if storage kludge not active */
    {    if(note)
            { printf(" analyzing, "); fflush(stdout);}

         if(arctemp)                   /* use temp area if specified */
              sprintf(tnam,"%s.CRN",arctemp);
         else makefnam("$ARCTEMP.CRN",arcname,tnam);
#if MSDOS
         crn = fopen(tnam,"wrb");
#endif
#if BSD | ST
         crn = fopen(tnam,"w+");
#endif
         state = NOHIST;               /* initialize ncr packing */
         stdlen =  ncrlen = 0;         /* reset size counters */
         crcval = 0;                   /* initialize CRC check value */
         setcode();                    /* initialize encryption */

         init_cm(f,crn);               /* initialize for crunching */
         init_sq();                    /* initialize for squeeze scan */

         while((c=getc_ncr(f))!=EOF)   /* for each byte of file */
         {    ncrlen++;                /* one more packed byte */
              scan_sq(c);              /* see what squeezing can do */
              putc_cm(c,crn);          /* see what crunching can do */
         }
         huflen = pred_sq();           /* finish up after squeezing */
         lzwlen = pred_cm(crn);        /* finish up after crunching */
    }
    else                               /* else kludge the method */
    {    stdlen = 0;                   /* make standard look best */
         ncrlen = huflen = lzwlen = 1;
    }

    /* standard set-ups common to all methods */

    fseek(f,0L,0);                     /* rewind input */
    hdr->crc = crcval;                 /* note CRC check value */
    hdr->length = stdlen;              /* set actual file length */
    state = NOHIST;                    /* reinitialize ncr packing */
    setcode();                         /* reinitialize encryption */

    /* choose and use the shortest method */

    if(stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-stdlen);
         if(note)
          { printf("storing, "); fflush(stdout);}/* store w/out compression */
         hdrver = 2;                   /* note packing method */
         stdlen = crcval = 0;          /* recalc these for kludge */
         while((c=getch(f))!=EOF)      /* store it straight */
              putc_pak(c,t);
         hdr->crc = crcval;
         hdr->length = hdr->size = stdlen;
    }

    else if(ncrlen<huflen && ncrlen<lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-ncrlen);
         if(note)
              { printf("packing, ");
              fflush(stdout);} /* pack w/repeat suppression */
         hdrver = 3;                   /* note packing method */
         hdr->size = ncrlen;           /* set data length */
         while((c=getc_ncr(f))!=EOF)
              putc_pak(c,t);
    }

    else if(huflen<lzwlen)
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",lzwlen-huflen);
         if(note)
            { printf("squeezing, "); fflush(stdout);}
         hdrver = 4;                   /* note packing method */
         hdr->size = file_sq(f,t);     /* note final size */
    }

    else
    {    if(kludge)                    /*DEBUG*/
              printf("(%ld) ",huflen-lzwlen);
         if(note)
            { printf("crunching, "); fflush(stdout);}
         hdrver = 8;
         hdr->size = lzwlen;           /* size should not change */
         if(crn)                       /* if temp was created */
         {    fseek(crn,0L,0);         /* then copy over crunched temp */
              while((c=fgetc(crn))!=EOF)
                   putc_tst(c,t);
         }
         else                          /* else re-crunch */
         {    init_cm(f,t);
              while((c=getc_ncr(f))!=EOF)
                   putc_cm(c,t);
              pred_cm(t);              /* finish up after crunching */
         }
    }

    /* standard cleanups common to all methods */

    if(crn)                            /* get rid of crunch temporary */
    {    fclose(crn);
         if(unlink(tnam) && warn)
         {    printf("Cannot delete temporary file %s\n",tnam);
              nerrs++;
         }
    }
    if(note)
         printf("done.\n");
}

/*  Non-repeat compression - text is passed through normally, except that
    a run of more than two is encoded as:

         <char> <DLE> <count>

    Special case: a count of zero indicates that the DLE is really a DLE,
    not a repeat marker.
*/

INT getc_ncr(f)                        /* get bytes with collapsed runs */
FILE *f;                               /* file to get from */
{
    static INT lastc;                  /* value returned on last call */
    static INT repcnt;                 /* repetition counter */
    static INT c;                      /* latest value seen */

    switch(state)                      /* depends on our state */
    {
    case NOHIST:                       /* no relevant history */
         state = S

wfp@dasys1.UUCP (07/22/87)

The two postings comprising this program were truncated when they arrived
here.  (Decimated, actually: only about 10% of each file arrived!)  They
have been reposted to comp.sources.misc, and those copies also are about
the same here.  Is some node doing this, or what?  I'd really like to have
the program, as I am a dialup user of this system and it would be nice to 
be able to arc stuff before downloading it (like long postings, for example);
also, the adminstrators here take a dim view of users cluttering up their
disk space with files.  Anybody know what the problem is?  (I noticed a few
other complaints on the ibm.pc group as well - various problems with these
postings.)
 


-- 
William Phillips                 {allegra,philabs,cmcl2}!phri\
Big Electric Cat Public Unix           {bellcore,cmcl2}!cucard!dasys1!wfp
New York, NY, USA                (-: Just say "NO" to OS/2! :-)