[alt.sources] Message compression utility

cur022%cluster@ukc.ac.uk (Bob Eager) (01/14/91)

I was asked by a few people to post this, so here it is. It is a small
C program for compressing messages. It takes a list of messages and message
numbers as input, and spits out a C function and some arrays. The function
takes a message number as argument and returns the message. It is most
efficient if the message numbers are a low numbered dense set.

I have included a sample message file and a program for testing the function
that is generated.

My thanks to Peter Stephens of Edinburgh University (now Edinburgh Portable
Compilers) for writing the code on which this C version is based.

There are only three files, and this is not UNIX specific, so the three files
are just concatenated below.
-----------------------------------------------------------------
/*
 * File: compmess.c
 *
 * Program to compress a file of error messages, generating suitable
 * arrays to hold the compressed form, and also some code to expand
 * them again
 *
 * Bob Eager   January 1991
 *
 * Thanks for Peter Stephens of Edinburgh University for the original idea.
 *
 */

/*
 * Values for exit status:
 *    0 - Success
 *    1 - Argument error
 *    2 - Failed to open file
 *    3 - Workspace overflow 
 *
 */

/*
 * Compile time options.
 * Define exactly one target system name to be 1; all the rest should be 0.
 *
 */

#define	IBMPC	1			/* Normal IBM PC (8086,80186,80286) */

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define	MAXLETT		1000		/* Max length of 'lett' array */
#define	MAXLINE		80		/* Max input line length */
#define	MAXWORD		2000		/* Max length of 'word' array */

#if	IBMPC
typedef	long	INT;
#endif

/* Forward references */

static	void	do_listing(FILE *);
static	void	do_output(FILE *);
static	int	getword(char **,char *);
static	void	lit(int *,char *,FILE *);
static	void	mess(char *,int);
static	FILE	*openio(char *,char *);
static	int	readn(FILE *);
static	void	squash(char *);
static	void	usage(void);

/* Local data */

static	INT	lett[MAXLETT];	/* Encoded words */
static	int	next = 0;	/* Index of next free slot in 'lett' array */
static	int	nmax = 0;	/* Highest message number seen */
static	int	num = 0;	/* Next free slot in 'word' array */
static	int	nummax = 0;	/* Pointer to last message in 'word' */
static	char	*progname;	/* Program name */
static	int	word[MAXWORD];	/* Message and word numbers */

const	char	intt[128] = {	/* Array to map characters to 6-bit code */
	63,63,63,63,63,63,63,63,
	63,63,63,63,63,63,63,63,
	63,63,63,63,63,63,63,63,
	63,63,63,63,63,63,63,63,
	63,63,63,61,63,60,27,30,
	31,32,63,63,63,28,59,29,
	63,63,63,63,63,63,63,63,
	63,63,63,63,63,63,63,63,
	63, 1, 2, 3, 4, 5, 6, 7,
	 8, 9,10,11,12,13,14,15,
	16,17,18,19,20,21,22,23,
	24,25,26,63,63,63,63,63,
	63,33,34,35,36,37,38,39,
	40,41,42,43,44,45,46,47,
	48,49,50,51,52,53,54,55,
	56,57,58,63,63,63,63,63
};
const char outtt[64] = {	/* Array to map 6-bit codes to characters */
	'?','A','B','C','D','E','F','G',
	'H','I','J','K','L','M','N','O',
	'P','Q','R','S','T','U','V','W',
	'X','Y','Z','&','-','/','\'','(',
	')','a','b','c','d','e','f','g',
	'h','i','j','k','l','m','n','o',
	'p','q','r','s','t','u','v','w',
	'x','y','z','.','%','#','?','?'
};


void main(int argc,char *argv[])
{	int n;				/* Current message number */
	int ch,i;
	char *ptr;			/* Pointer to next input word */
	FILE *infp,*outfp,*listfp;	/* I/O file pointers */
	char input[MAXLINE];		/* Input line buffer */
	char wk1[MAXLINE];		/* String work area */

	progname = argv[0];
	ptr = strrchr(progname,'.');
	if(ptr != (char *) NULL) *ptr = '\0';
	ptr = strrchr(progname,'\\');
	if(ptr != (char *) NULL) progname = ++ptr;
	for(i = 0; progname[i] != '\0'; i++)
		progname[i] = tolower(progname[i]);

	if(argc != 4) usage();

	infp = openio(argv[1],"r");
	outfp = openio(argv[2],"w");
	listfp = openio(argv[3],"w");

	for(i = 0; i < MAXWORD; i++) word[i] = 0;
	lett[0] = (INT) 0;		/* Initialise terminator */

	/* Main loop - once for each message */

	while((n = readn(infp)) != 0) {
		if(n > nmax) nmax = n;	/* Update high water mark */
		if(num >= MAXWORD) {
			fputs("Overflow of 'word' array\n",stderr);
			exit(3);
		}
		word[num] = n;		/* Store message number at start */
		nummax = num;		/* Store number of last message so far */

		for(ch = fgetc(infp); ch != '\n'; ch = fgetc(infp)) {
			if(ch != '"') continue;
					/* Scan for start of message */
			fscanf(infp,"%[^\"]",&input[0]);
					/* Read message within quotes */
			while(ch != '\n') ch = fgetc(infp);
					/* Skip trailing junk on line */
			break;
		}

		squash(&input[0]);	/* Squash multiple spaces */
		if((strlen(input) == 0) || (strcmp(input," ") == 0))
			continue;	/* Ignore empty lines and messages */
		num++;			/* Point beyond message number */
		fprintf(listfp,"\n%3d",n);
					/* Output message number to listing */
		
		ptr = &input[0];
		while(getword(&ptr,&wk1[0])) {
					/* Get next word in message */
			if(strlen(wk1) != 0) {
				lit(&i,&wk1[0],listfp);
					/* Get word index to 'i' */
				if(num >= MAXWORD) {
					fputs("Overflow of 'word' array\n",stderr);
					exit(3);
				}
				word[num++] = i | 0x8000;
					/* Store with continuation bit */
			}
		}
	}
	if(num >= MAXWORD) {
		fputs("Overflow of 'word' array\n",stderr);
		exit(3);
	}
	word[num] = 0;			/* Store word list terminator */
	fputc('\n',listfp);
	fclose(infp);

	/* Generate listing of messages */

	do_listing(listfp);
	fclose(listfp);

	/* Generate actual output file */

	do_output(outfp);
	fclose(outfp);

	fprintf(stderr,"%s: function 'message' generated\n",progname);

	exit(0);
}


/*
 * Routine to output the listing file
 *
 */

static void do_listing(FILE *listfp)
{	char temp[MAXLINE];
	int i;

	fputs("\n\n",listfp);
	for(i = 1; i <= nmax; i++) {
		mess(&temp[0],i);	/* Get message 'i' to 'temp' */
		if(strlen(temp) != 0) {
			fprintf(listfp,"%3d  %s\n",i,temp);
		}
	}
}


/*
 * Routine to generate the main output file
 *
 * C version
 *
 */

static void do_output(FILE *outfp)
{	int i,j,k;
	char m[MAXLINE];

	fputs("/*\n * File: message.c\n *\n",outfp);
	fprintf(outfp," * This file is generated automatically by the '%s' program\n",progname);
	fputs(" *\n * It should never be edited; rather, alter the message file then\n",outfp);
	fprintf(outfp," * rerun '%s'\n *\n */\n\n",progname);
	fprintf(outfp,"#include <stdio.h>\n\n/",progname);
	for(i = 1; i <= 70; i++) fputc('*',outfp);
	fputs("\n *       Outputs an error message stored in a compressed format       *\n",outfp);
	fputs(" *",outfp);
	for(i = 1; i <= 68; i++) fputc(' ',outfp);
	fputc('*',outfp);
	fputc('\n',outfp);
	for(i = 1; i <= nmax; i++) {
		mess(&m[0],i);		/* Get message 'i' to 'm' */
		k = strlen(m);
		if(k != 0) {		/* If message exists */
			fprintf(outfp," *     %3d  %s",i,m);
			for(j = 1; j <= 58 - k; j++) fputc(' ',outfp);
			fputs("*\n",outfp);
		}
	}
	fputs(" *",outfp);
	for(i = 1; i <= 68; i++) fputc(' ',outfp);
	fputs("*\n *",outfp);
	for(i = 1; i <= 69; i++) fputc('*',outfp);
	fputs("/\n\n",outfp);
	next--;				/* Point to last used slot in 'lett' */
	fputs("typedef\tlong\tINT;\n\n",outfp);
	fprintf(outfp,"#define\tMWORDMAX\t%d\n",num+1);
	fprintf(outfp,"#define\tDEFAULT\t\t%d\n\n",nummax+1);
	fputs("const char outtt[64] = {\n",outfp);
	fputs("\t'?','A','B','C','D','E','F','G',\n",outfp);
	fputs("\t    'H','I','J','K','L','M','N',\n",outfp);
	fputs("\t    'O','P','Q','R','S','T','U',\n",outfp);
	fputs("\t    'V','W','X','Y','Z','&','-',\n",outfp);
	fputs("\t    '/','\\'','(',')',\n",outfp);
	fputs("\t    'a','b','c','d','e','f','g',\n",outfp);
	fputs("\t    'h','i','j','k','l','m','n',\n",outfp);
	fputs("\t    'o','p','q','r','s','t','u',\n",outfp);
	fputs("\t    'v','w','x','y','z','.','%',\n",outfp);
	fputs("\t    '#','?','?'\n};\n\n",outfp);
	fputs("const int mword[MWORDMAX+1] = {\n",outfp);
	fputs("\t0,\n\t",outfp);

	for(i = 0; i <= num; i++) {
		fprintf(outfp,"0x%04x",word[i]);
		if(i != num) fputc(',',outfp);
		else fputc('\n',outfp);
		if(((i + 1) % 8) == 0) {
			fputc('\n',outfp);
			if(i != num)
				fputc('\t',outfp);
		}
	}
	fputs("};\n",outfp);

	fprintf(outfp,"\nconst INT mlett[%d] = {\n\t0,\n\t",next+2);

	for(i = 0; i <= next; i++) {
#if	IBMPC
		fprintf(outfp,"0x%08lx",lett[i]);
#else
		fprintf(outfp,"0x%08x",lett[i]);
#endif
		if(i != next) fputc(',',outfp);
		if(((i + 1) % 4) == 0) {
			fputc('\n',outfp);
			if(i != next)
				fputc('\t',outfp);
		}
	}
	if((next+1)%4 != 0) fputc('\n',outfp);
	fputs("};\n\n",outfp);

	fprintf(outfp,"void message(char *mes,int n)\n");
	fputs("{\tint i,j,k,q;\n",outfp);
	fputs("\tINT m,sh;\n\n",outfp);
	fputs("\t*mes++ = \' \';\n",outfp);
	fputs("\t*mes = \'\\0\';\n",outfp);
	fputs("\tj = 0;\n",outfp);
	fputs("\tfor(i = 0; i < MWORDMAX+1; i++) {\n",outfp);
	fputs("\t\tif(n == mword[i]) {\n",outfp);
	fputs("\t\t\tj = 1;\n",outfp);
	fputs("\t\t\tbreak;\n",outfp);
	fputs("\t\t}\n",outfp);
	fputs("\t}\n\n",outfp);
	fputs("\tif(j == 0) {\n",outfp);
	fputs("\t\ti = DEFAULT;\n",outfp);
	fputs("\t\tj = 1;\n",outfp);
	fputs("\t}\n\n",outfp);
	fputs("\twhile(1) {\n",outfp);
	fputs("\t\tk = mword[i+j];\n",outfp);
	fputs("\t\tif((k & 0x8000) == 0) break;\n",outfp);
	fputs("\t\tk &= 0x7fff;\n",outfp);
	fputs("\t\tif(j != 1) *mes++ = ' ';\n",outfp);
	fputs("\t\tdo {\n",outfp);
	fputs("\t\t\tm = mlett[k+1];\n",outfp);
	fputs("\t\t\tsh = 25;\n",outfp);
	fputs("\t\t\tdo {\n",outfp);
	fputs("\t\t\t\tq = (int) ((m >> sh) & 0x3f);\n",outfp);
	fputs("\t\t\t\tif(q != 0) *mes++ = outtt[q];\n",outfp);
	fputs("\t\t\t\tsh -= 6;\n",outfp);
	fputs("\t\t\t} while(sh >= 0);\n",outfp);
	fputs("\t\t\tk++;\n",outfp);
	fputs("\t\t} while((m & 1) != 0);\n",outfp);
	fputs("\t\tj++;\n",outfp);
	fputs("\t}\n",outfp);
	fputs("\t*mes = '\\0';\t\t\t/* Terminate string */\n",outfp);
	fputs("}\n",outfp);
	fputs("\n/*\n * End of file: message.c\n *\n*/\n",outfp);
}


/*
 * Function to extract the next word from the string 's' and place
 * it in 'word'. Returns zero if no words left, otherwise returns 1.
 *
 */

static int getword(char **s,char *word)
{	char ch;
	char *p = *s;			/* Get working copy of pointer */

	if(*p == '\0') return(0);	/* String empty */

	while(1) {			/* Lose leading spaces */
		ch = *p++;
		if(ch != ' ') break;
	}

	while((ch != ' ') && (ch != '\0')) {
		*word++ = ch;
		ch = *p++;
	}
	if(ch == '\0') p--;		/* Back off to null terminator */
	*word = '\0';			/* Add terminator */

	/* 'p' now points beyond the space. */

	*s = p;				/* Copy back pointer */
	return(1);			/* A word has been read */
}


/*
 * This routine searches for the word 'txt' in the current word list.
 * If found, it returns the word index in 'p'.
 * If not found, it adds the word to the word list and again returns
 * the word index in 'p'.
 *
 */

static void lit(int *p,char *txt,FILE *listfp)
{	int ch = 0;
	int i,j;
	int l = 0;
	int txtlen = strlen(txt);
	INT w = 0;
	INT sh = 25;

	while(ch < txtlen) {
		i = txt[ch++];		/* Get next character in word */
		i = intt[i];		/* Convert to 6-bit code */
		w = w | (((INT) i) << sh);
		sh -= 6;
		if(sh >= 0) continue;
		if(ch < txtlen) w |= 1;
		if(next+l >= MAXLETT) {
			fprintf(stderr,"Overflow of 'lett' array\n");
			exit(3);
		}
		lett[next+l] = w;
		w = 0;
		sh = 25;
		l++;
	}

	if(sh != 25) {
		if(next+l >= MAXLETT) {
			fprintf(stderr,"Overflow of 'lett' array\n");
			exit(3);
		}
		lett[next+l] = w;
		l++;
	}
					/* Store any remainder */
	for(i = 0; i < next; i++) {
		for(j = 0; j < l; j++) {
			if(lett[i+j] != lett[next+j]) goto fail;
        	}
		goto found;
	fail:;
	}

	*p = next;
	next += l;
	fprintf(listfp," word entered");
	return;

found:
	*p = i;
	fprintf(listfp," word found  ");
}


/*
 * Routine to return a string corresponding to message 'n'.
 * The string is stored in the character array 'mes'.
 *
 */

static void mess(char *mes,int n)
{	int i,j,k,q;
	INT m,sh;

	j = 0;
	for(i = 0; i < num; i++) {
		if(n == word[i]) {
			j = 1;
			break;
		}
	}
	if(j == 0) {
		*mes = '\0';
		return;
	}

	while(1) {
		k = word[i+j];
		if((k & 0x8000) == 0) break;
		k &= 0x7fff;
		if(j != 1) *mes++ = ' ';
		do {
			m = lett[k];
			sh = 25;
			do {
				q = (int) ((m >> sh) & 0x3f);
				if(q != 0) *mes++ = outtt[q];
				sh -= 6;
			} while(sh >= 0);
			k++;
		} while((m & 1) != 0);
		j++;
	}
	*mes = '\0';			/* Terminate string */
}


/*
 * Function to open a file in a specified mode
 * Does not return if there is a failure, but outputs an error
 * message and exits
 *
 */

static FILE *openio(char *file,char *mode)
{	FILE *fp;

	fp = fopen(file,mode);
	if(fp == (FILE *) NULL) {
		fprintf(stderr,"%s: cannot open '%s'\n",progname,file);
		exit(2);
	}
}


/*
 * Function to read a number from input 'fp' and return its value.
 *
 */

static int readn(FILE *fp)
{	int res;
	int ch = '0';

	for(res = 0; isdigit(ch); ch = fgetc(fp)) {
		res = res*10 + (ch - '0');
	}

	return(res);
}


/*
 * Routine to convert multiple spaces to single spaces in the string 's'.
 *
 */

static void squash(char *s)
{	char ch;
	char lastch = 'x';
	int i;
	int j = 0;

	for(i = 0; s[i] != '\0'; i++) {
		ch = s[i];
		if(!((ch == ' ') && (lastch == ' '))) {
			s[j++] = ch;
			lastch = ch;
		}
	}
	s[j] = '\0';
} 


/*
 * Routine to output brief usage information, then exit.
 *
 */

static void usage()
{	fprintf(stderr,"Usage: %s input output listing\n",progname);
	exit(1);
}

/*
 * End of file: compmess.c
 *
 */
-----------------------------------------------------------------
1 "REPEAT is not required"
2 "Label & has already been set in this block"
4 "& is not a Switch name at current textual level"
5 "Switch name & in expression or assignment"
6 "Switch label &(#) set a second time"
7 "Name & has already been declared"
0
-----------------------------------------------------------------
/*
 * File: testmess.c
 *
 * Test program for message compression system
 *
 * Link with message.c for testing
 *
 */

#include <stdio.h>
#include <stdlib.h>

/* External references */

extern	void	message(char *,int);


void main(argc,argv)
int	argc;
char	*argv[];
{	int n;
	char mes[80];

	while(1) {
		fscanf(stdin,"%d",&n);	/* Read a message number */
		if(n == 0) break;

		message(&mes[0],n);
		fprintf(stdout,"Message %d => \\%s\\\n",n,mes);
	}

	exit(0);
}

/*
 * End of file: testmess.c
 *
 */

-------------------------+-------------------------------------------------
Bob Eager                | University of Kent at Canterbury
                         | +44 227 764000 ext 7589
-------------------------+-------------------------------------------------