gtoal@tharr.UUCP (Graham Toal) (09/06/90)
Archive-name: runlen.c /* ( by the way, this and the previous huffman prog aren't meant to be utilities for using - they're just for experimenting with; I *don't* want yet another non-standard compression prog floating around. I'd never live it down ;-) ) More experimentation with compression: a simple run-length filter. This is the first idea that came into my head - these codings are chosen so that all but a <dle> on its own stay the same size or get smaller. However maybe a better idea would be a coding which moved 3-byte sequences into 2-bytes; on data which is arrays of machine words, 2, 3, and 4 byte sequences are disproportionately common. (24 or 32-bit-wide picture files?) Anyway, this took a 1028K scan (b/w, 300dpi) of a line drawing (Doonesbury cartoon) and shrunk it to 395K; then the huffman code I posted last week compressed that to 206K. This compares middling well to 188K from 'compress'. A textual postscript file went from 310K, rle -> 249K, huff -> 137K. <dle> <dle><0> <dle><dle> <dle><1> <dle><dle> ... n ... <dle><dle> <dle><2><n-1> a a aa aa aaa aaa aaaa a<dle><3> : aaaaaaa ... n ... aa a<dle><n-1> : aaa ... 256 ... aaaaa a<dle><255> Maybe folks could hack up other simple filters, and experiment with putting them together in various orders. By the way, I tried delta compression (processing the difference between the last byte and this byte, rather than the byte itself). Didn't help. Got worse in fact :-( */ #include <stdio.h> #include <string.h> #include <stdlib.h> #define RLE_MAGIC 290690L FILE *input_file; FILE *output_file; #ifndef TRUE #define TRUE (0==0) #define FALSE (0!=0) #endif int DLE = 234; void output_run(FILE *f, int ch, int count) { if (ch == DLE) { fputc(DLE, f); if (count > 2) fputc(2, f); fputc(count-1, f); } else { fputc(ch, f); if (count == 1) return; if (count > 3) fputc(DLE, f); else fputc(ch, f); if (count == 3) fputc(ch, f); if (count > 3) fputc(count-1, f); } } void hfputw(long w, FILE *f) { fputc((int)((w>>24L)&255L), f); fputc((int)((w>>16L)&255L), f); fputc((int)((w>> 8L)&255L), f); fputc((int)((w )&255L), f); } long hfgetw(FILE *f) { long w; w = fgetc(f); w = w<<8L; w |= fgetc(f); w = w<<8L; w |= fgetc(f); w = w<<8L; w |= fgetc(f); return(w); } int main(int argc, char **argv) { long fsize; int curr, last; int count; int a; if (argc != 4 || *argv[1] != '-') { fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]); exit(0); } input_file = fopen(argv[2], "rb"); if (input_file == NULL) { fprintf(stderr, "%s: cannot open input file %s\n", argv[0], argv[2]); exit(0); } if (strcmp(argv[1], "-e") == 0) { output_file = fopen(argv[3], "wb"); if (output_file == NULL) { fprintf(stderr, "%s: cannot open output file %s\n", argv[0], argv[3]); fclose(input_file); exit(0); } fseek(input_file, 0L, SEEK_END); fsize = ftell(input_file); fclose(input_file); input_file = fopen(argv[2], "rb"); hfputw(RLE_MAGIC, output_file); hfputw(fsize, output_file); curr = fgetc(input_file); count = 1; for (;;) { for (;;) { a = fgetc(input_file); if (a == EOF) break; if (a != curr) break; count++; if (count == 257) break; } if (a == EOF) break; output_run(output_file, curr, count); curr = a; count = 1; } output_run(output_file, curr, count); fclose(input_file); fclose(output_file); } else if (strcmp(argv[1], "-d") == 0) { long check_magic; check_magic = hfgetw(input_file); if (check_magic != RLE_MAGIC) { fprintf(stderr, "%s: input file %s not huffman encoded (%ld vs %ld)\n", argv[0], argv[2], check_magic, RLE_MAGIC); exit(0); } output_file = fopen(argv[3], "wb"); fsize = hfgetw(input_file); last = -1; for (;;) { int i; a = fgetc(input_file); if (a == EOF) break; if (a == DLE) { count = fgetc(input_file); if (count <= 2 && last != -1) fputc(last, output_file), last = -1; if (count == 0) { fputc(DLE, output_file); } else if (count == 1) { fputc(DLE, output_file); fputc(DLE, output_file); } else if (count == 2) { count = fgetc(input_file); for (i = 0; i < count+1; i++) fputc(DLE, output_file); } else { if (last == -1) fprintf(stderr, "Error #1\n"); for (i = 0; i < count+1; i++) fputc(last, output_file); } last = -1; } else { if (last != -1) fputc(last, output_file); last = a; } } if (last != -1) fputc(last, output_file); fclose(input_file); fclose(output_file); } else { fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]); } exit(0); }