[comp.os.vms] ARC_C.SHAR14_OF_19

ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)

$Part15:
$ File_is="ARCSQ.C"
$ Check_Sum_is=290919933
$ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY
Xstatic char *RCSid = "$Header: arcsq.c,v 1.2 86/07/15 07:54:05 turner Exp $";
X
X/*
X * $Log:`009arcsq.c,v $
X * Hack-attack 1.3  86/12/20  01:23:45  wilhite@usceast.uucp
X * `009Bludgeoned into submission for VAX 11/780 BSD4.2
X *`009(ugly code, but fewer core dumps)
X *
X * Revision 1.2  86/07/15  07:54:05  turner
X *
X *
X * Revision 1.1  86/06/26  15:00:48  turner
X * initial version
X *
X *
X */
X
X/*  ARC - Archive utility - ARCSQ
X
X$define(tag,$$segment(@1,$$index(@1,=)+1))#
X$define(version,Version $tag(
XTED_VERSION DB =3.10), created on $tag(
XTED_DATE DB =01/30/86) at $tag(
XTED_TIME DB =20:10:46))#
X$undefine(tag)#
X    $version
X
X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X
X    By:  Thom Henderson
X
X    Description:
X         This file contains the routines used to squeeze a file
X         when placing it in an archive.
X
X    Language:
X         Computer Innovations Optimizing C86
X
X    Programming notes:
X         Most of the routines used for the Huffman squeezing algorithm
X         were lifted from the SQ program by Dick Greenlaw, as adapted
X         to CI-C86 by Robert J. Beilstein.
X*/
X#include <stdio.h>
X#include "arc.h"
X
X/* stuff for Huffman squeezing */
X
X#define TRUE 1
X#define FALSE 0
X#define ERROR (-1)
X#define SPEOF 256                      /* special endfile token */
X#define NOCHILD (-1)                   /* marks end of path through tree */
X#define NUMVALS 257                    /* 256 data values plus SPEOF*/
X#define NUMNODES (NUMVALS+NUMVALS-1)   /* number of nodes */
X#define MAXCOUNT (unsigned INT) 65535      /* biggest unsigned integer */
X
X/* The following array of structures are the nodes of the
X   binary trees. The first NUMVALS nodes become the leaves of the
X   final tree and represent the values of the data bytes being
X   encoded and the special endfile, SPEOF.
X   The remaining nodes become the internal nodes of the final tree.
X*/
X
Xstruct nd                              /* shared by unsqueezer */
X{   unsigned INT weight;                   /* number of appearances */
X INT tdepth;                        /* length on longest path in tree */
X INT lchild, rchild;                /* indices to next level */
X}   node[NUMNODES];                    /* use large buffer */
X
Xstatic INT dctreehd;                   /* index to head of final tree */
X
X/* This is the encoding table:
X   The bit strings have first bit in low bit.
X   Note that counts were scaled so code fits unsigned integer.
X*/
X
Xstatic INT codelen[NUMVALS];           /* number of bits in code */
Xstatic unsigned INT code[NUMVALS];         /* code itself, right adjusted */
Xstatic unsigned INT tcode;                 /* temporary code value */
Xstatic long valcount[NUMVALS];         /* actual count of times seen */
X
X/* Variables used by encoding process */
X
Xstatic INT curin;                      /* value currently being encoded */
Xstatic INT cbitsrem;                   /* # of code string bits left */
Xstatic unsigned INT ccode;                 /* current code right justified */
X
XINT init_sq()                              /* prepare for scanning pass */
X{
X INT i;                             /* node index */
X
X    /* Initialize all nodes to single element binary trees
X       with zero weight and depth.
X    */
X
X    for(i=0; i<NUMNODES; ++i)
X    {    node[i].weight = 0;
X         node[i].tdepth = 0;
X         node[i].lchild = NOCHILD;
X         node[i].rchild = NOCHILD;
X    }
X
X    for(i=0; i<NUMVALS; i++)
X         valcount[i] = 0;
X}
X
XINT scan_sq(c)                             /* add a byte to the tables */
XINT c;                                 /* byte to add */
X{
X    unsigned INT *wp;                      /* speeds up weight counting */
X
X    /* Build frequency info in tree */
X
X    if(c == EOF)                       /* it's traditional */
X         c = SPEOF;                    /* dumb, but traditional */
X
X    if(*(wp = &node[c].weight) !=  MAXCOUNT)
X         ++(*wp);                      /* bump weight counter */
X
X    valcount[c]++;                     /* bump byte counter */
X}
X
Xlong pred_sq()                         /* predict size of squeezed file */
X{
X INT i;
X INT btlist[NUMVALS];               /* list of intermediate b-trees */
X INT listlen;                       /* length of btlist */
X    unsigned INT ceiling;                  /* limit for scaling */
X    long size = 0;                     /* predicted size */
X INT numnodes;                      /* # of nodes in simplified tree */
X    INT scale();
X    INT heap();
X    INT bld_tree();
X    INT buildenc();
X    INT init_enc();
X
X    scan_sq(EOF);                      /* signal end of input */
X
X    ceiling = MAXCOUNT;
X
X    /* Keep trying to scale and encode */
X
X    do
X    {    scale(ceiling);
X         ceiling /= 2;                 /* in case we rescale */
X
X         /* Build list of single node binary trees having
X            leaves for the input values with non-zero counts
X         */
X
X         for(i=listlen=0; i<NUMVALS; ++i)
X         {    if(node[i].weight != 0)
X              {    node[i].tdepth = 0;
X                   btlist[listlen++] = i;
X              }
X         }
X
X         /* Arrange list of trees into a heap with the entry
X            indexing the node with the least weight at the top.
X         */
X
X         heap(btlist,listlen);
X
X         /* Convert the list of trees to a single decoding tree */
X
X         bld_tree(btlist,listlen);
X
X         /* Initialize the encoding table */
X
X         init_enc();
X
X         /* Try to build encoding table.
X            Fail if any code is > 16 bits long.
X         */
X    }    while(buildenc(0,dctreehd) == ERROR);
X
X    /* Initialize encoding variables */
X
X    cbitsrem = 0;                      /* force initial read */
X    curin = 0;                         /* anything but endfile */
X
X    for(i=0; i<NUMVALS; i++)           /* add bits for each code */
X         size += valcount[i] * codelen[i];
X
X    size = (size+7)/8;                 /* reduce to number of bytes */
X
X    numnodes = dctreehd<NUMVALS ? 0 : dctreehd-(NUMVALS-1);
X
X    size += sizeof(INT) + 2*numnodes*sizeof(INT);
X
X    return size;
X}
X
X/* The count of number of occurrances of each input value
X   have already been prevented from exceeding MAXCOUNT.
X   Now we must scale them so that their sum doesn't exceed
X   ceiling and yet no non-zero count can become zero.
X   This scaling prevents errors in the weights of the
X   interior nodes of the Huffman tree and also ensures that
X   the codes will fit in an unsigned integer. Rescaling is
X   used if necessary to limit the code length.
X*/
X
Xstatic INT scale(ceil)
Xunsigned INT ceil;                         /* upper limit on total weight */
X{
X    register INT i,c;
X INT ovflw, divisor;
X    unsigned INT w, sum;
X    unsigned char increased;           /* flag */
X
X    do
X    {    for(i=sum=ovflw=0; i<NUMVALS; ++i)
X         {    if(node[i].weight > (ceil-sum))
X                   ++ovflw;
X              sum += node[i].weight;
X         }
X
X         divisor = ovflw + 1;
X
X         /* Ensure no non-zero values are lost */
X
X         increased = FALSE;
X         for(i=0; i<NUMVALS; ++i)
X         {    w = node[i].weight;
X              if(w<divisor && w!=0)
X              {    /* Don't fail to provide a code if it's used at all */
X
X                   node[i].weight = divisor;
X                   increased = TRUE;
X              }
X         }
X    }    while(increased);
X
X    /* Scaling factor choosen, now scale */
X
X    if(divisor>1)
X         for(i=0; i<NUMVALS; ++i)
X              node[i].weight /= divisor;
X}
X
X/* heap() and adjust() maintain a list of binary trees as a
X   heap with the top indexing the binary tree on the list
X   which has the least weight or, in case of equal weights,
X   least depth in its longest path. The depth part is not
X   strictly necessary, but tends to avoid long codes which
X   might provoke rescaling.
X*/
X
Xstatic INT heap(list,length)
XINT list[], length;
X{
X    register INT i;
X    INT adjust();
X
X    for(i=(length-2)/2; i>=0; --i)
X         adjust(list,i,length-1);
X}
X
X/* Make a heap from a heap with a new top */
X
Xstatic INT adjust(list,top,bottom)
XINT list[], top, bottom;
X{
X    register INT k, temp;
X    INT cmptrees();
X
X    k = 2 * top + 1;                   /* left child of top */
X    temp = list[top];                  /* remember root node of top tree */
X
X    if(k<=bottom)
X    {    if(k<bottom && cmptrees(list[k],list[k+1]))
X              ++k;
X