[net.sources] printfck.c - Check printf format strings

aeb@mcvax.UUCP (Andries Brouwer) (03/31/85)

In order to enable lint to check the correspondence between
the format specifications of printf-like functions (like %d, %ld, %s)
and the number and type of actual arguments the program given below
copies stdin to stdout, but adds a first line
    #include "procent.h"
and replaces each call to a printf-like function (printf, sprintf, fprintf,
and others you may care to add to its source) such as
    sprintf(buf, "A %20s%*d", s, m, n);
by
    sprintf(buf, "A %20s%*d", procent_s(s), procent_d(m), procent_d(n));
Having processed all one's sources with printfck one creates files
procent.h and procent.c, where procent.h is something like
    extern int procent_d();
    extern long procent_D();
    extern unsigned long procent_U();
    extern char procent_c();
    extern char *procent_s();
and procent.c is something like
    int procent_d(d) int d; { return(d); }
    long procent_D(D) long D; { return(D); }
    unsigned long procent_U(U) unsigned long U; { return(U); }
    char procent_c(c) char c; { return(c); }
    char *procent_s(s) char *s; { return(s); }
Now lint can do the checking with
    lint -options procent.c other_sources ...

I just wrote this as a quick hack to check the Hack & Quest sources.
If someone wants to make this into something generally useful
the first thing to do is making printfck read the list of functions
to check from a file or from its arguments.
There are some minor problems with the interaction with preprocessor
directives (e.g., parts of a printf statement between #ifdef's
or #define's containing quotes).
Perhaps this should have been done with lex.

-------------- cut here ------------------------
/* printfck.c - check all uses of %d, %ld, %s, %u etc. - 850325 aeb@mcvax*/
#include	<stdio.h>

/* Feed with a list of routine names and descriptions:
 *	printf("",...)
 *	sprintf(s,"",...)
 *	fprintf(f,"",...)
 * and with a source file; produce output in which occurrences of e.g.
 *	sprintf(buf, "%s%ld", s, l)
 * are replaced by
 *	sprintf(buf, "%s%ld", procent_s(s), procent_L(l))
 * Now let lint do the checking.
 * Bugs:
 *	Cases where the format string is not explicitly given (e.g., is the
 *	result of some other routine, or looks like  bool ? "s1" : "s2")
 *	are not handled.
 *	Cases where the preprocessor produces quotes or comment delimiters
 *	or concatenates partial identifiers are not handled.
 *	We do not distinguish two sets of identifiers.
 *	Only the parts lint sees get checked - not parts between (false)
 *	#ifdef's. If the call to printf is outside #ifdef's, but some
 *	args are inside, printfck may get confused. However, this is easy
 *	to avoid:
 *
 *	THIS FAILS			THIS WORKS
 *	----------			----------
 *		printf("%s%d",			printf("%s%d", (
 *	#ifdef debug			#ifdef debug
 *			"foo"				"foo"
 *	#else				#else
 *			"bar"				"bar"
 *	#endif debug			#endif debug
 *			, num);				), num);
 *
 */

struct ir {
	char *rname;
	int pn;		/* number of args preceding format string */
} irs[] = {		/* should be read in - for now explicit */
	"printf",	0,
	"fprintf",	1,
	"sprintf",	1,
	"Sprintf",	1,
	"impossible",	0,
	"panic",	0,
	"pline",	0,
	"warning",	0,
	"error",	0,
	(char *) 0,	0
};

char *progname;

int eof = 0;
int peekc = '\n';	/* recognize # on very first line */
int lastc = 0;		/* result of last getchar() */
int linenr = 1;

getcx()
{
	register int c;

	if(peekc) {
		c = peekc;
		peekc = 0;
	} else if(eof) {
		c = EOF;
	} else {
		if(lastc) {
			putchar(lastc);
			lastc = 0;
		}
		if((c = getchar()) == EOF)
			eof++;
		else {
			lastc = c;
			if(c == '\n')
				linenr++;
		}
	}

	return(c);
}

/* Note: we do not want to eliminate comments; perhaps they contain
   lint directives. */
getcy()		/* as getcx(), but skip comments */
{
	register int c = getcx();

	if(c == '/') {
		c = getcx();
		if(c == '*') {
			while(1) {
				c = getcx();
				if(c == EOF)
					error("unfinished comment");
				while(c == '*') {
					c = getcx();
					if(c == '/')
						return(getcy());
				}
			}
		} else {
			peekc = c;
			c = '/';
		}
	}
	return(c);
}

getcz()		/* as getcy(), but skip preprocessor directives */
{
	register int c = getcy();

	while(c == '\n') {
		c = getcx();
		if(c == '#') {
			while(c != '\n') {
				c = getcx();
				if(c == EOF)
					error("incomplete line");
				while(c == '\\') {
					(void) getcx(); c = getcx();
				}
			}
		} else {
			peekc = c;
			return('\n');
		}
	}
	return(c);
}

getcq()		/* as getcz() but skip strings */
{
	register int c = getcz();
	register int delim;

	if(c == '\'' || c == '"') {
		delim = c;
		while(1) {
			c = getcx();
			if(c == EOF)
				error("Unfinished string; delim = %c.", delim);
			if(c == '\\') {
				(void) getcx();
				continue;
			}
			if(c == delim)
				return(getcq());
		}
	}
	return(c);
}

main(argc,argv)
int argc;
char **argv;
{
	register int c;

	progname = argc ? argv[0] : "printfck";

	printf("#include \"procent.h\"\n");
	while((c = getcq()) != EOF) {

		/* check for (interesting) identifiers */
		if(letter(c))
			rd_id(c);
	}
	return(0);
}


rd_id(first)
register int first;
{
	char idf[256];
	register char *ip = idf;
	register int c;

	*ip++ = first;
	while(letdig(c = getcx()))
		if(ip-idf < sizeof(idf)-1)
			*ip++ = c;
	peekc = c;
	*ip = 0;
	handle(idf);
}

/*VARARGS1*/
error(s,x)
char *s;
{
	fprintf(stderr, "\n%s: Error (line %d): ", progname, linenr);
	fprintf(stderr, s, x);
	fprintf(stderr, "\n\n");
	exit(1);
}

/*VARARGS1*/
warning(s,x1,x2)
char *s;
{
	fprintf(stderr, "%s: Warning (line %d): ", progname, linenr);
	fprintf(stderr, s, x1, x2);
}

letter(c)
register int c;
{
	return(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_');
}

digit(c)
register int c;
{
	return('0' <= c && c <= '9');
}

letdig(c)
register int c;
{
	return(letter(c) || digit(c));
}

handle(idf)
register char *idf;
{
	register struct ir *irp = irs;

	while(irp->rname) {
		if(!strcmp(idf, irp->rname)) {
			doit(irp);
			return;
		}
		irp++;
	}
}

skipspaces()
{
	register int c;

	while(1) {
		c = getcz();
		if(c == ' ' || c == '\t' || c == '\n')
			continue;
		peekc = c;
		return;
	}
}

doit(irp)
register struct ir *irp;
{
	register int c, cnt = irp->pn;

	skipspaces();
	if((c = getcz()) != '(') {
		peekc = c;
		warning("%s not followed by '(' but '%c'.\n",
			irp->rname, c);
		return;
	}

	while(cnt--) {
		c = skiparg();
		if(c != ',') {
			peekc = c;
			warning("arg of %s not followed by comma",
				irp->rname);
			return;
		}
	}
	skipspaces();

	/* now parse format string (if present) */
	/* (here we also avoid defining occurrences) */
	if((c = getcx()) != '"') {
		peekc = c;
		return;
	}
	domore(irp);
}

domore(irp)
register struct ir *irp;
{
	char fmt[256];
	register char *fp = fmt;
	register int c;

	while(1) {
		c = getcx();
		if(c == EOF)
			error("premature end of format string");
		if(c == '"')
			break;
		if(c != '%')
			continue;
		c = getcx();
		if(c == '%')
			continue;
		if(c == '-')
			c = getcx();
		if(c == '*') {
			c = getcx();
			if(fp-fmt < sizeof(fmt)-1)
				*fp++ = c;
		} else while(digit(c))
			c = getcx();
		if(c == '.')
			c = getcx();
		if(c == '*') {
			c = getcx();
			if(fp-fmt < sizeof(fmt)-1)
				*fp++ = c;
		} else while(digit(c))
			c = getcx();
		if(c == '#')
			c = getcx();
		if(c == 'l') {
			c = getcx();
			if('a' <= c && c <= 'z')
				c -= 040;
			else
				error("%%l not followed by lowercase");
		}
		if(fp-fmt < sizeof(fmt)-1)
			*fp++ = c;
		else
			warning("ridiculously long format.\n");
	}
	*fp = 0;
	fp = fmt;
	skipspaces();
	while((c = getcz()) == ',') {
		if(!*fp)
			error("too many arguments");
		skipspaces();
		printf("procent_%c(", *fp++);
		c = skiparg();
		printf(")");
		if(c == ')' && *fp)
			error("too few arguments");
		if(c == ')')
			return;
		peekc = c;
	}
}

skiparg()
{
	register int parenct = 0;
	register int c;

	parenct = 0;
	while(1) {
		c = getcq();
		if(c == EOF)
			error("eof in arg list");
		if(!parenct && (c == ',' || c == ')'))
			return(c);
		if(c == '(') {
			parenct++;
			continue;
		}
		if(c == ')') {
			parenct--;
			continue;
		}
	}
}