[alt.sources] dynhuff.c - C source of dynamic huffman alg

gtoal@tharr.UUCP (Graham Toal) (09/03/90)

Archive-name: dynhuff.c

Well, after posting that last article, I felt a bit antisocial leaving
the conversion to C to some luckless soul, so I had a quick hack at it
myself.  This C version wraps up the previous code into an actual
usable program, complete with error checking and magic numbers.

----- cut here -----
/*
  ALGORITHM 673,

  ACM Transactions on Mathematical Software, Vol 15, No 2, Pages 158-167. 

  Jeffrey Scott Vitter
  Brown University

  This file coded up from the paper above by Graham Toal <gtoal@ed.ac.uk>

  This is a one-pass dynamic Huffman code generator.  I supply a trivial
  interface for testing.  Real-world use would require that you firstly
  translate this into C (please post here when you do) and secondly
  modify the IO to write binary files. (This writes a text file of
  0's and 1's for demonstration purposes)

  Also it needs some logic for what happens on } of file.

*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define HUFF_MAGIC 240859L

FILE *input_file;
FILE *output_file;

#ifndef TRUE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif

#define  n  256
#define  bb  (n*2 + 1)

int alpha[n+1];
int rep[n+1];
int block[bb+1];
int weight[bb+1];
int parent[bb+1];
int parity[bb+1];
int rtChild[bb+1];
int first[bb+1];
int last[bb+1];
int prevBlock[bb+1];
int nextBlock[bb+1];
int availBlock;
int stack[n+1];
int a;
int M, E, R, Z;

void Initialize(void)
{
int i;

  M = 0; E = 0; R = -1; Z = 2*n - 1;
  for (i = 1; i <=n; i++) {
    M = M+1; R = R+1;
    if (R*2 == M) { E = E+1; R = 0; }
    alpha[i] = i; rep[i] = i;
  }
  /* Initialize node n as the 0-node */
  block[n] = 1; prevBlock[1] = 1; nextBlock[1] = 1; weight[1] = 0;
  first[1] = n; last[1] = n; parity[1] = 0;
  /* Initialize available block list */
  availBlock = 2;
  for (i = availBlock; i <= Z-1; i++) nextBlock[i] = i+1;
  nextBlock[Z] = 0;
}

static int nextbitpos = 0;
void Transmit(int i)
{
static int byte = 0;

  byte = (byte << 1) | (i&1); nextbitpos ++;
  
  if (nextbitpos == 8) {
    if (fputc(byte, output_file) == EOF) {
      fprintf(stderr, "Error writing output file - disk full?\n");
      exit(0);
    }
    byte = 0; nextbitpos = 0;
  }
}

int Receive(void)
{
static int byte;
static int bitsleft = 0;
int bit;

  if (bitsleft == 0) {
    for (;;) {
      byte = fgetc(input_file);
      if (byte == EOF) {
        return(0);
      }
      break;
    }
    bitsleft = 8;
  }
  bit = (byte >> 7) & 1;
  byte = (byte << 1);
  bitsleft -= 1;
  return(bit);
}

void EncodeAndTransmit(int j)
{
int i, ii, q, t, root;

  q = rep[j]; i = 0;
  if (q <= M) { /* Encode letter of zero weight */
    q = q - 1;
    if (q < R*2) {
      t = E+1;
    } else {
      q = q - R; t = E;
    }
    for (ii = 1; ii <= t; ii++) {
      i = i + 1; stack[i] = q % 2;
      q = q / 2;
    }
    q = M;
  }
  if (M == n) root = n; else root = Z;
  while (q != root) { /* Traverse up the tree */
    i = i + 1; stack[i] = (first[block[q]] - q + parity[block[q]]) % 2;
    q = parent[block[q]] - (first[block[q]] - q + 1 - parity[block[q]]) / 2;
  }
  for (ii = i; ii >= 1; --ii) {
    Transmit(stack[ii]);
  }
}

int FindChild(int j, int parity)
{
int delta, right, gap;

  delta = 2*(first[block[j]]-j) + 1 - parity;
  right = rtChild[block[j]]; gap = right - last[block[right]];
  if (delta <= gap) return(right-delta);
  else {
    delta = delta - gap - 1;
    right = first[prevBlock[block[right]]]; gap = right - last[block[right]];
    if (delta <= gap) return(right-delta);
    else return(first[prevBlock[block[right]]] - delta + gap + 1);
  }
}

int ReceiveAndDecode(void)
{
int i, q;

  if (M == n) q = n; else q = Z; /* Set q to the root node */
  while (q > n) /* Traverse down the tree */
    q = FindChild(q, Receive());
  if (q == M) { /* Decode 0-node */
    q = 0;
    for (i = 1; i <= E; i++) q = q*2 + Receive();
    if (q < R) q = q*2 + Receive(); else q = q + R;
    q = q + 1;
  }
  return(alpha[q]);
}

void InterchangeLeaves(int e1, int e2)
{
int temp;

  rep[alpha[e1]] = e2; rep[alpha[e2]] = e1;
  temp = alpha[e1]; alpha[e1] = alpha[e2]; alpha[e2] = temp;
}

/* These should be local to Update, but visible to FindNode & SlideAndInc */
int q, leafToIncrement, bq, b, oldParent, oldParity, nbq, par, bpar;
int slide;

void FindNode(int k)
{
  q = rep[k]; leafToIncrement = 0;
  if (q <= M) { /* A zero weight becomes positive */
    InterchangeLeaves(q, M);
    if (R == 0) { R = M / 2; if (R > 0) E = E-1; }
    M = M-1; R = R-1; q = M+1; bq = block[q];
    if (M > 0) {
      /* Split the 0-node into an internal node with two children.  The new
        0-node is node M; the old 0-node is node M+1; the new parent of
        nodes M and M+1 is node M+n */
      block[M] = bq; last[bq] = M; oldParent = parent[bq];
      parent[bq] = M+n; parity[bq] = 1;
      /* Create a new internal block of zero weight for node M+n */
      b = availBlock; availBlock = nextBlock[availBlock];
      prevBlock[b] = bq; nextBlock[b] = nextBlock[bq];
      prevBlock[nextBlock[bq]] = b; nextBlock[bq] = b;
      parent[b] = oldParent; parity[b] = 0; rtChild[b] = q;
      block[M+n] = b; weight[b] = 0;
      first[b] = M+n; last[b] = M+n;
      leafToIncrement = q; q = M+n;
    }
  } else { /* Interchange q with the first node in q's block */
    InterchangeLeaves(q, first[block[q]]);
    q = first[block[q]];
    if ((q == M+1) && (M > 0)) {
      leafToIncrement = q; q = parent[block[q]];
    }
  }
}

void SlideAndIncrement(void)
{ /* q is currently the first node in its block */
  bq = block[q]; nbq = nextBlock[bq];
  par = parent[bq]; oldParent = par; oldParity = parity[bq];
  if ((
       (q <= n) && (first[nbq] > n) && (weight[nbq] == weight[bq]))
     ||
       ((q > n) && (first[nbq] <= n) && (weight[nbq] == weight[bq]+1)
     )) {     /* Slide q over the next block */
    slide = TRUE;
    oldParent = parent[nbq]; oldParity = parity[nbq];
    /* Adjust child pointers for next higher level in tree */
    if (par > 0) {
      bpar = block[par];
      if (rtChild[bpar] == q) {
        rtChild[bpar] = last[nbq];
      } else if (rtChild[bpar] == first[nbq]) {
        rtChild[bpar] = q;
      } else rtChild[bpar] = rtChild[bpar]+1;
      if (par != Z) {
        if (block[par+1] != bpar) {
          if (rtChild[block[par+1]] == first[nbq])
            rtChild[block[par+1]] = q;
          else if (block[rtChild[block[par+1]]] == nbq) {
            rtChild[block[par+1]] = rtChild[block[par+1]]+1;
          }
        }
      }
    }
    /* Adjust parent pointers for block nbq */
    parent[nbq] = parent[nbq] - 1 + parity[nbq]; parity[nbq] = 1-parity[nbq];
    nbq = nextBlock[nbq];
  } else slide = FALSE;
  if ((
      ((q <= n) && (first[nbq] <= n))
      ||
      ((q > n) && (first[nbq] > n))
     )
     && (weight[nbq] == weight[bq]+1)) {
    /* Merge q into the block of weight one higher */
    block[q] = nbq; last[nbq] = q;
    if (last[bq] == q) { /* q's old block disappears */
      nextBlock[prevBlock[bq]] = nextBlock[bq];
      prevBlock[nextBlock[bq]] = prevBlock[bq];
      nextBlock[bq] = availBlock; availBlock = bq;
    } else {
      if (q > n) rtChild[bq] = FindChild(q-1, 1);
      if (parity[bq] == 0) parent[bq] = parent[bq] - 1;
      parity[bq] = 1-parity[bq];
      first[bq] = q-1;
    }
  } else if (last[bq] == q) {
    if (slide) { /* q's block is slid forward in the block list */
      prevBlock[nextBlock[bq]] = prevBlock[bq];
      nextBlock[prevBlock[bq]] = nextBlock[bq];
      prevBlock[bq] = prevBlock[nbq]; nextBlock[bq] = nbq;
      prevBlock[nbq] = bq; nextBlock[prevBlock[bq]] = bq;
      parent[bq] = oldParent; parity[bq] = oldParity;
    }
    weight[bq] = weight[bq]+1;
  } else {
    /* A new block is created for q */
    b = availBlock; availBlock = nextBlock[availBlock];
    block[q] = b; first[b] = q; last[b] = q;
    if (q > n) {
      rtChild[b] = rtChild[bq];
      rtChild[bq] = FindChild(q-1, 1);
      if (rtChild[b] == q-1) parent[bq] = q;
      else if (parity[bq] == 0) parent[bq] = parent[bq]-1;
    } else if (parity[bq] == 0) parent[bq] = parent[bq]-1;
    first[bq] = q-1; parity[bq] = 1-parity[bq];
    /* Insert q's in block in its proper place in the block list */
    prevBlock[b] = prevBlock[nbq]; nextBlock[b] = nbq;
    prevBlock[nbq] = b; nextBlock[prevBlock[b]] = b;
    weight[b] = weight[bq]+1;
    parent[b] = oldParent; parity[b] = oldParity;
  }
  /* Move q one level higher in the tree */
  if (q <= n) q = oldParent; else q = par;
}

void Update(int k)
{
  /* Set q to the node whose weight should increase */
  FindNode(k);
  while (q > 0) {
    /* At this point, q is the first node in its block.  Increment q's weight
      by 1 and slide q if necessary over the next block to maintain the
      invariant.  Then set q to the node one level higher that needs
      incrementing next */
    SlideAndIncrement();
  }
  /* Finish up some special cases involving the 0-node */
  if (leafToIncrement != 0) {
    q = leafToIncrement;
    SlideAndIncrement();
  }
}

void hfputw(long w, FILE *f)
{
  fputc((int)((w>>24L)&255L), f);
  fputc((int)((w>>16L)&255L), f);
  fputc((int)((w>> 8L)&255L), f);
  fputc((int)((w     )&255L), f);
}

long hfgetw(FILE *f)
{
long w;
  w = fgetc(f); w = w<<8L;
  w |= fgetc(f); w = w<<8L;
  w |= fgetc(f); w = w<<8L;
  w |= fgetc(f);
  return(w);
}

int main(int argc, char **argv)
{
long fsize;

  if (argc != 4 || *argv[1] != '-') {
    fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
    exit(0);
  }

  Initialize();

  input_file = fopen(argv[2], "rb");
  if (input_file == NULL) {
    fprintf(stderr, "%s: cannot open input file %s\n", argv[0], argv[2]);
    exit(0);
  }

  if (strcmp(argv[1], "-e") == 0) {
    output_file = fopen(argv[3], "wb");
    if (output_file == NULL) {
      fprintf(stderr, "%s: cannot open output file %s\n", argv[0], argv[3]);
      fclose(input_file);
      exit(0);
    }
    fseek(input_file, 0L, SEEK_END);
    fsize = ftell(input_file);
    fclose(input_file);
    input_file = fopen(argv[2], "rb");
    hfputw(HUFF_MAGIC, output_file);
    hfputw(fsize, output_file);
    for (;;) {
      a = fgetc(input_file);
      if (a == EOF) break;
      EncodeAndTransmit(a);
      Update(a);
    }
    while (nextbitpos != 0) Transmit(1);
    fclose(input_file);
    fclose(output_file);
  } else if (strcmp(argv[1], "-d") == 0) {
    long check_magic;

    check_magic = hfgetw(input_file);
    if (check_magic != HUFF_MAGIC) {
      fprintf(stderr, "%s: input file %s not huffman encoded (%ld vs %ld)\n",
        argv[0], argv[2], check_magic, HUFF_MAGIC);
      exit(0);
    }
    output_file = fopen(argv[3], "wb");
    fsize = hfgetw(input_file);
    for (;;) {
      a = ReceiveAndDecode();
      if (fputc(a, output_file) == EOF) {
        fprintf(stderr, "Error writing output file - disk full?\n");
        exit(0);
      }
      if (--fsize == 0L) break;
      Update(a);
    }
    fclose(input_file);
    fclose(output_file);
  } else {
    fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
  }
  exit(0);
}