cur022%cluster@ukc.ac.uk (Bob Eager) (01/14/91)
I was asked by a few people to post this, so here it is. It is a small C program for compressing messages. It takes a list of messages and message numbers as input, and spits out a C function and some arrays. The function takes a message number as argument and returns the message. It is most efficient if the message numbers are a low numbered dense set. I have included a sample message file and a program for testing the function that is generated. My thanks to Peter Stephens of Edinburgh University (now Edinburgh Portable Compilers) for writing the code on which this C version is based. There are only three files, and this is not UNIX specific, so the three files are just concatenated below. ----------------------------------------------------------------- /* * File: compmess.c * * Program to compress a file of error messages, generating suitable * arrays to hold the compressed form, and also some code to expand * them again * * Bob Eager January 1991 * * Thanks for Peter Stephens of Edinburgh University for the original idea. * */ /* * Values for exit status: * 0 - Success * 1 - Argument error * 2 - Failed to open file * 3 - Workspace overflow * */ /* * Compile time options. * Define exactly one target system name to be 1; all the rest should be 0. * */ #define IBMPC 1 /* Normal IBM PC (8086,80186,80286) */ #include <ctype.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #define MAXLETT 1000 /* Max length of 'lett' array */ #define MAXLINE 80 /* Max input line length */ #define MAXWORD 2000 /* Max length of 'word' array */ #if IBMPC typedef long INT; #endif /* Forward references */ static void do_listing(FILE *); static void do_output(FILE *); static int getword(char **,char *); static void lit(int *,char *,FILE *); static void mess(char *,int); static FILE *openio(char *,char *); static int readn(FILE *); static void squash(char *); static void usage(void); /* Local data */ static INT lett[MAXLETT]; /* Encoded words */ static int next = 0; /* Index of next free slot in 'lett' array */ static int nmax = 0; /* Highest message number seen */ static int num = 0; /* Next free slot in 'word' array */ static int nummax = 0; /* Pointer to last message in 'word' */ static char *progname; /* Program name */ static int word[MAXWORD]; /* Message and word numbers */ const char intt[128] = { /* Array to map characters to 6-bit code */ 63,63,63,63,63,63,63,63, 63,63,63,63,63,63,63,63, 63,63,63,63,63,63,63,63, 63,63,63,63,63,63,63,63, 63,63,63,61,63,60,27,30, 31,32,63,63,63,28,59,29, 63,63,63,63,63,63,63,63, 63,63,63,63,63,63,63,63, 63, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23, 24,25,26,63,63,63,63,63, 63,33,34,35,36,37,38,39, 40,41,42,43,44,45,46,47, 48,49,50,51,52,53,54,55, 56,57,58,63,63,63,63,63 }; const char outtt[64] = { /* Array to map 6-bit codes to characters */ '?','A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O', 'P','Q','R','S','T','U','V','W', 'X','Y','Z','&','-','/','\'','(', ')','a','b','c','d','e','f','g', 'h','i','j','k','l','m','n','o', 'p','q','r','s','t','u','v','w', 'x','y','z','.','%','#','?','?' }; void main(int argc,char *argv[]) { int n; /* Current message number */ int ch,i; char *ptr; /* Pointer to next input word */ FILE *infp,*outfp,*listfp; /* I/O file pointers */ char input[MAXLINE]; /* Input line buffer */ char wk1[MAXLINE]; /* String work area */ progname = argv[0]; ptr = strrchr(progname,'.'); if(ptr != (char *) NULL) *ptr = '\0'; ptr = strrchr(progname,'\\'); if(ptr != (char *) NULL) progname = ++ptr; for(i = 0; progname[i] != '\0'; i++) progname[i] = tolower(progname[i]); if(argc != 4) usage(); infp = openio(argv[1],"r"); outfp = openio(argv[2],"w"); listfp = openio(argv[3],"w"); for(i = 0; i < MAXWORD; i++) word[i] = 0; lett[0] = (INT) 0; /* Initialise terminator */ /* Main loop - once for each message */ while((n = readn(infp)) != 0) { if(n > nmax) nmax = n; /* Update high water mark */ if(num >= MAXWORD) { fputs("Overflow of 'word' array\n",stderr); exit(3); } word[num] = n; /* Store message number at start */ nummax = num; /* Store number of last message so far */ for(ch = fgetc(infp); ch != '\n'; ch = fgetc(infp)) { if(ch != '"') continue; /* Scan for start of message */ fscanf(infp,"%[^\"]",&input[0]); /* Read message within quotes */ while(ch != '\n') ch = fgetc(infp); /* Skip trailing junk on line */ break; } squash(&input[0]); /* Squash multiple spaces */ if((strlen(input) == 0) || (strcmp(input," ") == 0)) continue; /* Ignore empty lines and messages */ num++; /* Point beyond message number */ fprintf(listfp,"\n%3d",n); /* Output message number to listing */ ptr = &input[0]; while(getword(&ptr,&wk1[0])) { /* Get next word in message */ if(strlen(wk1) != 0) { lit(&i,&wk1[0],listfp); /* Get word index to 'i' */ if(num >= MAXWORD) { fputs("Overflow of 'word' array\n",stderr); exit(3); } word[num++] = i | 0x8000; /* Store with continuation bit */ } } } if(num >= MAXWORD) { fputs("Overflow of 'word' array\n",stderr); exit(3); } word[num] = 0; /* Store word list terminator */ fputc('\n',listfp); fclose(infp); /* Generate listing of messages */ do_listing(listfp); fclose(listfp); /* Generate actual output file */ do_output(outfp); fclose(outfp); fprintf(stderr,"%s: function 'message' generated\n",progname); exit(0); } /* * Routine to output the listing file * */ static void do_listing(FILE *listfp) { char temp[MAXLINE]; int i; fputs("\n\n",listfp); for(i = 1; i <= nmax; i++) { mess(&temp[0],i); /* Get message 'i' to 'temp' */ if(strlen(temp) != 0) { fprintf(listfp,"%3d %s\n",i,temp); } } } /* * Routine to generate the main output file * * C version * */ static void do_output(FILE *outfp) { int i,j,k; char m[MAXLINE]; fputs("/*\n * File: message.c\n *\n",outfp); fprintf(outfp," * This file is generated automatically by the '%s' program\n",progname); fputs(" *\n * It should never be edited; rather, alter the message file then\n",outfp); fprintf(outfp," * rerun '%s'\n *\n */\n\n",progname); fprintf(outfp,"#include <stdio.h>\n\n/",progname); for(i = 1; i <= 70; i++) fputc('*',outfp); fputs("\n * Outputs an error message stored in a compressed format *\n",outfp); fputs(" *",outfp); for(i = 1; i <= 68; i++) fputc(' ',outfp); fputc('*',outfp); fputc('\n',outfp); for(i = 1; i <= nmax; i++) { mess(&m[0],i); /* Get message 'i' to 'm' */ k = strlen(m); if(k != 0) { /* If message exists */ fprintf(outfp," * %3d %s",i,m); for(j = 1; j <= 58 - k; j++) fputc(' ',outfp); fputs("*\n",outfp); } } fputs(" *",outfp); for(i = 1; i <= 68; i++) fputc(' ',outfp); fputs("*\n *",outfp); for(i = 1; i <= 69; i++) fputc('*',outfp); fputs("/\n\n",outfp); next--; /* Point to last used slot in 'lett' */ fputs("typedef\tlong\tINT;\n\n",outfp); fprintf(outfp,"#define\tMWORDMAX\t%d\n",num+1); fprintf(outfp,"#define\tDEFAULT\t\t%d\n\n",nummax+1); fputs("const char outtt[64] = {\n",outfp); fputs("\t'?','A','B','C','D','E','F','G',\n",outfp); fputs("\t 'H','I','J','K','L','M','N',\n",outfp); fputs("\t 'O','P','Q','R','S','T','U',\n",outfp); fputs("\t 'V','W','X','Y','Z','&','-',\n",outfp); fputs("\t '/','\\'','(',')',\n",outfp); fputs("\t 'a','b','c','d','e','f','g',\n",outfp); fputs("\t 'h','i','j','k','l','m','n',\n",outfp); fputs("\t 'o','p','q','r','s','t','u',\n",outfp); fputs("\t 'v','w','x','y','z','.','%',\n",outfp); fputs("\t '#','?','?'\n};\n\n",outfp); fputs("const int mword[MWORDMAX+1] = {\n",outfp); fputs("\t0,\n\t",outfp); for(i = 0; i <= num; i++) { fprintf(outfp,"0x%04x",word[i]); if(i != num) fputc(',',outfp); else fputc('\n',outfp); if(((i + 1) % 8) == 0) { fputc('\n',outfp); if(i != num) fputc('\t',outfp); } } fputs("};\n",outfp); fprintf(outfp,"\nconst INT mlett[%d] = {\n\t0,\n\t",next+2); for(i = 0; i <= next; i++) { #if IBMPC fprintf(outfp,"0x%08lx",lett[i]); #else fprintf(outfp,"0x%08x",lett[i]); #endif if(i != next) fputc(',',outfp); if(((i + 1) % 4) == 0) { fputc('\n',outfp); if(i != next) fputc('\t',outfp); } } if((next+1)%4 != 0) fputc('\n',outfp); fputs("};\n\n",outfp); fprintf(outfp,"void message(char *mes,int n)\n"); fputs("{\tint i,j,k,q;\n",outfp); fputs("\tINT m,sh;\n\n",outfp); fputs("\t*mes++ = \' \';\n",outfp); fputs("\t*mes = \'\\0\';\n",outfp); fputs("\tj = 0;\n",outfp); fputs("\tfor(i = 0; i < MWORDMAX+1; i++) {\n",outfp); fputs("\t\tif(n == mword[i]) {\n",outfp); fputs("\t\t\tj = 1;\n",outfp); fputs("\t\t\tbreak;\n",outfp); fputs("\t\t}\n",outfp); fputs("\t}\n\n",outfp); fputs("\tif(j == 0) {\n",outfp); fputs("\t\ti = DEFAULT;\n",outfp); fputs("\t\tj = 1;\n",outfp); fputs("\t}\n\n",outfp); fputs("\twhile(1) {\n",outfp); fputs("\t\tk = mword[i+j];\n",outfp); fputs("\t\tif((k & 0x8000) == 0) break;\n",outfp); fputs("\t\tk &= 0x7fff;\n",outfp); fputs("\t\tif(j != 1) *mes++ = ' ';\n",outfp); fputs("\t\tdo {\n",outfp); fputs("\t\t\tm = mlett[k+1];\n",outfp); fputs("\t\t\tsh = 25;\n",outfp); fputs("\t\t\tdo {\n",outfp); fputs("\t\t\t\tq = (int) ((m >> sh) & 0x3f);\n",outfp); fputs("\t\t\t\tif(q != 0) *mes++ = outtt[q];\n",outfp); fputs("\t\t\t\tsh -= 6;\n",outfp); fputs("\t\t\t} while(sh >= 0);\n",outfp); fputs("\t\t\tk++;\n",outfp); fputs("\t\t} while((m & 1) != 0);\n",outfp); fputs("\t\tj++;\n",outfp); fputs("\t}\n",outfp); fputs("\t*mes = '\\0';\t\t\t/* Terminate string */\n",outfp); fputs("}\n",outfp); fputs("\n/*\n * End of file: message.c\n *\n*/\n",outfp); } /* * Function to extract the next word from the string 's' and place * it in 'word'. Returns zero if no words left, otherwise returns 1. * */ static int getword(char **s,char *word) { char ch; char *p = *s; /* Get working copy of pointer */ if(*p == '\0') return(0); /* String empty */ while(1) { /* Lose leading spaces */ ch = *p++; if(ch != ' ') break; } while((ch != ' ') && (ch != '\0')) { *word++ = ch; ch = *p++; } if(ch == '\0') p--; /* Back off to null terminator */ *word = '\0'; /* Add terminator */ /* 'p' now points beyond the space. */ *s = p; /* Copy back pointer */ return(1); /* A word has been read */ } /* * This routine searches for the word 'txt' in the current word list. * If found, it returns the word index in 'p'. * If not found, it adds the word to the word list and again returns * the word index in 'p'. * */ static void lit(int *p,char *txt,FILE *listfp) { int ch = 0; int i,j; int l = 0; int txtlen = strlen(txt); INT w = 0; INT sh = 25; while(ch < txtlen) { i = txt[ch++]; /* Get next character in word */ i = intt[i]; /* Convert to 6-bit code */ w = w | (((INT) i) << sh); sh -= 6; if(sh >= 0) continue; if(ch < txtlen) w |= 1; if(next+l >= MAXLETT) { fprintf(stderr,"Overflow of 'lett' array\n"); exit(3); } lett[next+l] = w; w = 0; sh = 25; l++; } if(sh != 25) { if(next+l >= MAXLETT) { fprintf(stderr,"Overflow of 'lett' array\n"); exit(3); } lett[next+l] = w; l++; } /* Store any remainder */ for(i = 0; i < next; i++) { for(j = 0; j < l; j++) { if(lett[i+j] != lett[next+j]) goto fail; } goto found; fail:; } *p = next; next += l; fprintf(listfp," word entered"); return; found: *p = i; fprintf(listfp," word found "); } /* * Routine to return a string corresponding to message 'n'. * The string is stored in the character array 'mes'. * */ static void mess(char *mes,int n) { int i,j,k,q; INT m,sh; j = 0; for(i = 0; i < num; i++) { if(n == word[i]) { j = 1; break; } } if(j == 0) { *mes = '\0'; return; } while(1) { k = word[i+j]; if((k & 0x8000) == 0) break; k &= 0x7fff; if(j != 1) *mes++ = ' '; do { m = lett[k]; sh = 25; do { q = (int) ((m >> sh) & 0x3f); if(q != 0) *mes++ = outtt[q]; sh -= 6; } while(sh >= 0); k++; } while((m & 1) != 0); j++; } *mes = '\0'; /* Terminate string */ } /* * Function to open a file in a specified mode * Does not return if there is a failure, but outputs an error * message and exits * */ static FILE *openio(char *file,char *mode) { FILE *fp; fp = fopen(file,mode); if(fp == (FILE *) NULL) { fprintf(stderr,"%s: cannot open '%s'\n",progname,file); exit(2); } } /* * Function to read a number from input 'fp' and return its value. * */ static int readn(FILE *fp) { int res; int ch = '0'; for(res = 0; isdigit(ch); ch = fgetc(fp)) { res = res*10 + (ch - '0'); } return(res); } /* * Routine to convert multiple spaces to single spaces in the string 's'. * */ static void squash(char *s) { char ch; char lastch = 'x'; int i; int j = 0; for(i = 0; s[i] != '\0'; i++) { ch = s[i]; if(!((ch == ' ') && (lastch == ' '))) { s[j++] = ch; lastch = ch; } } s[j] = '\0'; } /* * Routine to output brief usage information, then exit. * */ static void usage() { fprintf(stderr,"Usage: %s input output listing\n",progname); exit(1); } /* * End of file: compmess.c * */ ----------------------------------------------------------------- 1 "REPEAT is not required" 2 "Label & has already been set in this block" 4 "& is not a Switch name at current textual level" 5 "Switch name & in expression or assignment" 6 "Switch label &(#) set a second time" 7 "Name & has already been declared" 0 ----------------------------------------------------------------- /* * File: testmess.c * * Test program for message compression system * * Link with message.c for testing * */ #include <stdio.h> #include <stdlib.h> /* External references */ extern void message(char *,int); void main(argc,argv) int argc; char *argv[]; { int n; char mes[80]; while(1) { fscanf(stdin,"%d",&n); /* Read a message number */ if(n == 0) break; message(&mes[0],n); fprintf(stdout,"Message %d => \\%s\\\n",n,mes); } exit(0); } /* * End of file: testmess.c * */ -------------------------+------------------------------------------------- Bob Eager | University of Kent at Canterbury | +44 227 764000 ext 7589 -------------------------+-------------------------------------------------