[alt.sources] run length encoding

gtoal@tharr.UUCP (Graham Toal) (09/06/90)

Archive-name: runlen.c

/*

(   by the way, this and the previous huffman prog aren't meant to
    be utilities for using - they're just for experimenting with;
    I *don't* want yet another non-standard compression prog floating
    around.  I'd never live it down ;-)   )

   More experimentation with compression: a simple run-length filter.

   This is the first idea that came into my head - these codings
   are chosen so that all but a <dle> on its own stay the same
   size or get smaller.  However maybe a better idea would be a
   coding which moved 3-byte sequences into 2-bytes; on data which
   is arrays of machine words, 2, 3, and 4 byte sequences are
   disproportionately common. (24 or 32-bit-wide picture files?)

   Anyway, this took a 1028K scan (b/w, 300dpi) of a line drawing
   (Doonesbury cartoon) and shrunk it to 395K; then the huffman
   code I posted last week compressed that to 206K.

   This compares middling well to 188K from 'compress'.


   A textual postscript file went from 310K, rle -> 249K,  huff -> 137K.


        <dle>                            <dle><0>
        <dle><dle>                       <dle><1>
        <dle><dle> ... n ... <dle><dle>  <dle><2><n-1>
        a                                a
        aa                               aa
        aaa                              aaa
        aaaa                             a<dle><3>
         :
        aaaaaaa ... n ... aa             a<dle><n-1>
         :
        aaa ... 256 ... aaaaa            a<dle><255>
        

    Maybe folks could hack up other simple filters, and experiment
    with putting them together in various orders.  By the way, I tried
    delta compression (processing the difference between the last byte
    and this byte, rather than the byte itself).  Didn't help. Got worse
    in fact :-(

 */



#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define RLE_MAGIC 290690L

FILE *input_file;
FILE *output_file;

#ifndef TRUE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif

int DLE = 234;

void output_run(FILE *f, int ch, int count) {
  if (ch == DLE) {
    fputc(DLE, f);
    if (count > 2) fputc(2, f);
    fputc(count-1, f);
  } else {
    fputc(ch, f);
    if (count == 1) return;
    if (count > 3) fputc(DLE, f); else fputc(ch, f);
    if (count == 3) fputc(ch, f);
    if (count > 3) fputc(count-1, f);
  }
}

void hfputw(long w, FILE *f)
{
  fputc((int)((w>>24L)&255L), f);
  fputc((int)((w>>16L)&255L), f);
  fputc((int)((w>> 8L)&255L), f);
  fputc((int)((w     )&255L), f);
}

long hfgetw(FILE *f)
{
long w;
  w = fgetc(f); w = w<<8L;
  w |= fgetc(f); w = w<<8L;
  w |= fgetc(f); w = w<<8L;
  w |= fgetc(f);
  return(w);
}

int main(int argc, char **argv)
{
long fsize;
int curr, last;
int count;
int a;


  if (argc != 4 || *argv[1] != '-') {
    fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
    exit(0);
  }

  input_file = fopen(argv[2], "rb");
  if (input_file == NULL) {
    fprintf(stderr, "%s: cannot open input file %s\n", argv[0], argv[2]);
    exit(0);
  }

  if (strcmp(argv[1], "-e") == 0) {
    output_file = fopen(argv[3], "wb");
    if (output_file == NULL) {
      fprintf(stderr, "%s: cannot open output file %s\n", argv[0], argv[3]);
      fclose(input_file);
      exit(0);
    }
    fseek(input_file, 0L, SEEK_END);
    fsize = ftell(input_file);
    fclose(input_file);
    input_file = fopen(argv[2], "rb");
    hfputw(RLE_MAGIC, output_file);
    hfputw(fsize, output_file);

    curr = fgetc(input_file); count = 1;
    for (;;) {
      for (;;) {
        a = fgetc(input_file);
        if (a == EOF) break;
        if (a != curr) break;
        count++;
        if (count == 257) break;
      }
      if (a == EOF) break;
      output_run(output_file, curr, count);
      curr = a; count = 1;
    }
    output_run(output_file, curr, count);

    fclose(input_file);
    fclose(output_file);
  } else if (strcmp(argv[1], "-d") == 0) {
    long check_magic;

    check_magic = hfgetw(input_file);
    if (check_magic != RLE_MAGIC) {
      fprintf(stderr, "%s: input file %s not huffman encoded (%ld vs %ld)\n",
        argv[0], argv[2], check_magic, RLE_MAGIC);
      exit(0);
    }
    output_file = fopen(argv[3], "wb");
    fsize = hfgetw(input_file);

    last = -1;
    for (;;) {
      int i;

      a = fgetc(input_file);
      if (a == EOF) break;
      if (a == DLE) {
        count = fgetc(input_file);
        if (count <= 2 && last != -1) fputc(last, output_file), last = -1;
        if (count == 0) {
          fputc(DLE, output_file);
        } else if (count == 1) {
          fputc(DLE, output_file);
          fputc(DLE, output_file);
        } else if (count == 2) {
          count = fgetc(input_file);
          for (i = 0; i < count+1; i++) fputc(DLE, output_file);
        } else {
          if (last == -1) fprintf(stderr, "Error #1\n");
          for (i = 0; i < count+1; i++) fputc(last, output_file);
        }
        last = -1;
      } else {
        if (last != -1) fputc(last, output_file);
        last = a;
      }
    }
    if (last != -1) fputc(last, output_file);

    fclose(input_file);
    fclose(output_file);
  } else {
    fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
  }
  exit(0);
}