ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
X /* k indexes "smaller" child (in heap of trees) of top */ X /* now make top index "smaller" of old top and smallest child */ X X if(cmptrees(temp,list[k])) X { list[top] = list[k]; X list[k] = temp; X X /* Make the changed list a heap */ X X adjust(list,k,bottom); /* recursive */ X } X } X} X X/* Compare two trees, if a > b return true, else return false. X Note comparison rules in previous comments. X*/ X Xstatic INT cmptrees(a,b) XINT a, b; /* root nodes of trees */ X{ X if(node[a].weight > node[b].weight) X return TRUE; X if(node[a].weight == node[b].weight) X if(node[a].tdepth > node[b].tdepth) X return TRUE; X return FALSE; X} X X/* HUFFMAN ALGORITHM: develops the single element trees X into a single binary tree by forming subtrees rooted in X interior nodes having weights equal to the sum of weights of all X their descendents and having depth counts indicating the X depth of their longest paths. X X When all trees have been formed into a single tree satisfying X the heap property (on weight, with depth as a tie breaker) X then the binary code assigned to a leaf (value to be encoded) X is then the series of left (0) and right (1) X paths leading from the root to the leaf. X Note that trees are removed from the heaped list by X moving the last element over the top element and X reheaping the shorter list. X*/ X Xstatic INT bld_tree(list,len) XINT list[]; XINT len; X{ X register INT freenode; /* next free node in tree */ X register struct nd *frnp; /* free node pointer */ X INT lch, rch; /* temps for left, right children */ X INT i; X INT maxchar(); X X /* Initialize index to next available (non-leaf) node. X Lower numbered nodes correspond to leaves (data values). X */ X X freenode = NUMVALS; X X while(len>1) X { /* Take from list two btrees with least weight X and build an interior node pointing to them. X This forms a new tree. X */ X X lch = list[0]; /* This one will be left child */ X X /* delete top (least) tree from the list of trees */ X X list[0] = list[--len]; X adjust(list,0,len-1); X X /* Take new top (least) tree. Reuse list slot later */ X X rch = list[0]; /* This one will be right child */ X X /* Form new tree from the two least trees using X a free node as root. Put the new tree in the list. X */ X X frnp = &node[freenode]; /* address of next free node */ X list[0] = freenode++; /* put at top for now */ X frnp->lchild = lch; X frnp->rchild = rch; X frnp->weight = node[lch].weight + node[rch].weight; X frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth); X X /* reheap list to get least tree at top */ X X adjust(list,0,len-1); X } X dctreehd = list[0]; /* head of final tree */ X} X Xstatic INT maxchar(a,b) X{ X return a>b ? a : b; X} X Xstatic INT init_enc() X{ X register INT i; X X /* Initialize encoding table */ X X for(i=0; i<NUMVALS; ++i) X codelen[i] = 0; X} X X/* Recursive routine to walk the indicated subtree and level X and maintain the current path code in bstree. When a leaf X is found the entire code string and length are put into X the encoding table entry for the leaf's data value . X X Returns ERROR if codes are too long. X*/ X Xstatic INT buildenc(level,root) XINT level; /* level of tree being examined, from zero */ XINT root; /* root of subtree is also data value if leaf */ X{ X register INT l, r; X X l = node[root].lchild; X r = node[root].rchild; X X if(l==NOCHILD && r==NOCHILD) X { /* Leaf. Previous path determines bit string X code of length level (bits 0 to level - 1). X Ensures unused code bits are zero. X */ X X codelen[root] = level; X code[root] = tcode & (((unsigned INT)~0) >> (16-level)); X return (level>16) ? ERROR : NULL; X } X X else X { if(l!=NOCHILD) X { /* Clear path bit and continue deeper */ X X tcode &= ~(1 << level); X if(buildenc(level+1,l)==ERROR) X return ERROR; /* pass back bad statuses */ X } X if(r!=NOCHILD) X { /* Set path bit and continue deeper */ X X tcode |= 1 << level; X if(buildenc(level+1,r)==ERROR) X return ERROR; /* pass back bad statuses */ X } X } X return NULL; /* it worked if we reach here */ X} X Xstatic INT put_int(n,f) /* output an integer */ XINT n; /* integer to output */ XFILE *f; /* file to put it to */ X{ X putc_pak(n&0xff,f); /* first the low byte */ X putc_pak(n>>8,f); /* then the high byte */ X} X X/* Write out the header of the compressed file */ X Xstatic long wrt_head(ob) XFILE *ob; X{ X register INT l,r; X INT i, k; X INT numnodes; /* # of nodes in simplified tree */ X X /* Write out a simplified decoding tree. Only the interior X nodes are written. When a child is a leaf index X (representing a data value) it is recoded as X -(index + 1) to distinguish it from interior indexes X which are recoded as positive indexes in the new tree. X X Note that this tree will be empty for an empty file. X */ X X numnodes = dctreehd<NUMVALS ? 0 : dctreehd-(NUMVALS-1); X put_int(numnodes,ob); X X for(k=0, i=dctreehd; k<numnodes; ++k, --i) X { l = node[i].lchild; X r = node[i].rchild; X l = l<NUMVALS ? -(l+1) : dctreehd-l; X r = r<NUMVALS ? -(r+1) : dctreehd-r; X put_int(l,ob); X put_int(r,ob); X } X X return sizeof(INT) + numnodes*2*sizeof(INT); X} X X/* Get an encoded byte or EOF. Reads from specified stream AS NEEDED. X X There are two unsynchronized bit-byte relationships here. X The input stream bytes are converted to bit strings of X various lengths via the static variables named c... X These bit strings are concatenated without padding to X become the stream of encoded result bytes, which this X function returns one at a time. The EOF (end of file) is X converted to SPEOF for convenience and encoded like any X other input value. True EOF is returned after that. X*/ X Xstatic INT gethuff(ib) /* Returns bytes except for EOF */ XFILE *ib; X{ X INT rbyte; /* Result byte value */ X INT need, take; /* numbers of bits */ X X rbyte = 0; X need = 8; /* build one byte per call */ X X /* Loop to build a byte of encoded data. X Initialization forces read the first time. X */ X Xloop: X if(cbitsrem>=need) /* if current code is big enough */ X { if(need==0) X return rbyte; X X rbyte |= ccode << (8-need); /* take what we need */ X ccode >>= need; /* and leave the rest */ X cbitsrem -= need; X return rbyte & 0xff; X } X X /* We need more than current code */ X X if(cbitsrem>0) X { rbyte |= ccode << (8-need); /* take what there is */ X need -= cbitsrem; X } X X /* No more bits in current code string */ X X if(curin==SPEOF) X { /* The end of file token has been encoded. If X result byte has data return it and do EOF next time. X */ X X cbitsrem = 0; X return (need==8) ? EOF : rbyte + 0; X } X X /* Get an input byte */ X X if((curin=getc_ncr(ib)) == EOF) X curin = SPEOF; /* convenient for encoding */ X X ccode = code[curin]; /* get the new byte's code */ X cbitsrem = codelen[curin]; X X goto loop; X} X X/* This routine is used to perform the actual squeeze operation. It can X only be called after the file has been scanned. It returns the true X length of the squeezed entry. X*/ X Xlong file_sq(f,t) /* squeeze a file into an archive */ XFILE *f; /* file to squeeze */ XFILE *t; /* archive to receive file */ X{ X INT c; /* one byte of squeezed data */ X long size; /* size after squeezing */ X X size = wrt_head(t); /* write out the decode tree */ X X while((c=gethuff(f))!=EOF) X { putc_pak(c,t); X size++; X } X X return size; /* report true size */ X} $ GoSub Convert_File $ Goto Part15