draper (12/24/82)
This is a follow-up to Tom Anderson's cnest that checked for nested comments. First comes the C source, then the manual entry -- separated by a line of --- ------------------------------------------------------------------------------ /* Copyright: The Regents of the University of California Title: cchk Purpose: To find and report all badly matched openers and closers plus assignment/equality confusions in a c source file. Author: Steve Draper, expanding cnest by Tom Anderson Usage: cchk [-q] [-v] <filename1> <filename2> ... History: December 3, 1982 creation date -- Tom Anderson at microsof!fluke December 9, 1982 Jeffrey Mogul at Stanford - checks for unclosed comment at end of file. December 20, 1982 Converted to cchk -- Steve Draper at UCSD */ #include <stdio.h> #include <ctype.h> #define TRUE 1 #define FALSE 0 #define SP 32 #define TAB 9 #define LF 10 #define BRACE 1 #define SQBRAK 2 #define PAREN 3 #define IF 4 #define IFCOND 5 #define WHLCOND 6 #define THEN 7 #define ELSE 8 #define STACKSIZ 20 struct brak { int type, b_indent, b_ln; } stack[STACKSIZ]; #define rmbrak(N) (top -= N, stackc -= N) #define myungetc(C) ungetc(((C) == LF ? SP : (C)), infile) int mygetchar(), pr(); void checkelse(), newbrak(), checkcloser(), prtype(); FILE *infile; int ln, indent, commindent, stackc, commln; int singlequoterr, oddsinglequote, bracecnt, parencnt, sqbrakcnt; int errstatus = 0, wstatus = 0; int errnmb, wnmb; int verbose = 0; char *filename; struct brak *top; main(argc,argv) unsigned int argc ; char *argv[] ; { register int c ; int i; int doubleqflag = 0; unsigned int file_index; struct brak *ptr; file_index = 1; while (argc > 1 && argv[file_index][0] == '-') { if (strcmp(argv[file_index], "-v") == 0) verbose++; if (strcmp(argv[file_index], "-q") == 0) wnmb = -2; if (strcmp(argv[file_index], "-s") == 0) wnmb = -2; file_index++; argc--; } do { /* INIT for each file */ ln = 1; indent = 0; commindent = 0; singlequoterr = oddsinglequote = parencnt = sqbrakcnt = bracecnt = 0; errnmb = 0; if (wnmb > -2) wnmb = 0; newbrak(0); if (argc == 1) { infile = stdin; filename = NULL; } else { if ((infile = fopen(argv[file_index],"r")) == (FILE *) NULL) { fprintf (stdout,"%s: Can't access %s\n",argv[0], argv[file_index]); continue; } filename = argv[file_index]; } while ( ( c = mygetchar()) != EOF ) { if (verbose == 2) { int i; for (i = stackc; i>0; i--) { printf("%c %d: type ", c, i); prtype(stack[i].type); printf(", indent %d, line %d.\n", stack[i].b_indent, stack[i].b_ln); } } switch (c) { case ';': myungetc(SP); while (top->type == ELSE) rmbrak(1); if (top->type == THEN) { rmbrak(1); checkelse(); } break; case '!': case '>': case '<': /* swallow legit. '=' chars */ c = mygetchar(); if (c != '=') myungetc(c); break; case '=': if ((top-1)->type == IFCOND || (top-1)->type == WHLCOND) { c = mygetchar(); if (c != '=') { myungetc(c); if (pr(1)) printf("Assignment instead of equals in conditional, line %d.\n", ln); } } break; case LF: case SP: c = mygetchar(); switch (c) { case 'i': /* if */ c = mygetchar(); if (c == 'f' && !isalpha(c = fgetc(infile)) && !isdigit(c)) { ungetc(c, infile); newbrak(IF); while ((c = mygetchar()) == SP || c == LF); if (c != '(') { if (pr(1)) printf("Bad if (no condition) line %d.\n", ln); rmbrak(1); } else newbrak(IFCOND); myungetc(c); } else myungetc(c); break; case 'w': /* while */ if ((c = mygetchar()) == 'h' && (c = mygetchar()) == 'i' && (c = mygetchar()) == 'l' && (c = mygetchar()) == 'e' && !isalpha(c = fgetc(infile)) && !isdigit(c)) { ungetc(c, infile); while ((c = mygetchar()) == SP || c == LF); if (c != '(') { if (pr(1)) printf("Bad while (no condition) line %d.\n", ln); } else newbrak(WHLCOND); myungetc(c); } else myungetc(c); break; case 'e': /* else */ myungetc(c); checkelse(); break; default: myungetc(c); break; } break; case '*': /* close comment ? */ c = mygetchar(); if (c != '/') { myungetc(c); break; } if (pr(1)) printf ("Line %d: Comment close without open, indent %d\n", ln, indent); break; case '\'': if ((c = fgetc(infile)) != '\\') { if (c == '\'' || (c = fgetc(infile)) != '\'') { if (pr(1)) printf("Bad character constant line %d\n", ln); singlequoterr = 1; } } else if (!isdigit(c = fgetc(infile))) { if ((c = fgetc(infile)) != '\'') { if (pr(1)) printf("Bad character constant with \\ line %d\n", ln); } } else { if (isdigit(c = fgetc(infile))) if (isdigit(c = fgetc(infile))) c = fgetc(infile); if (c != '\'') if (pr(1)) printf("Bad character constant with \\0 line %d\n", ln); } if (c != '\'') { ungetc(c, infile); oddsinglequote = !oddsinglequote; singlequoterr = 1; } break; case '\"': do { c = fgetc(infile); if (c == EOF) { if (pr(2)) printf("Error: '\"' quoted string not ended by end of file .\n"); break; } else if (c == LF) { if (doubleqflag == 0) if (pr(0)) printf("Warning: '\"' quoted string not ended by end of line %d.\n", ln); doubleqflag = 1; ln++; } else if (c == '\\') { c = SP; fgetc(infile); } } while (c != '\"' ) ; doubleqflag = 0; break; case '{': if (stackc && indent < top->b_indent) if (pr(0)) printf("Indent jumps backwards line %d.\n", ln); newbrak(BRACE); break; case '}': checkcloser(BRACE); while (top->type == ELSE) rmbrak(1); if (top->type == THEN) { rmbrak(1); checkelse(); } break; case '(': if (stackc && indent < top->b_indent) if (pr(0)) printf("Indent jumps backwards line %d.\n", ln); newbrak(PAREN); break; case ')': checkcloser(PAREN); if (top->type == IFCOND) { rmbrak(1); newbrak(THEN); } else if (top->type == WHLCOND) rmbrak(1); break; case '[': if (stackc && indent < top->b_indent) if (pr(0)) printf("Indent jumps backwards line %d.\n", ln); newbrak(SQBRAK); break; case ']': checkcloser(SQBRAK); break; default: break; } } eof: fclose(infile); while (stackc > 0) { pr(2); fputs("Unclosed brak at EOF: ", stdout); prtype(top->type); printf(" opened on line %d.\n", top->b_ln); switch (top->type) { case BRACE: { bracecnt++; break; } case SQBRAK: { sqbrakcnt++; break; } case PAREN: { parencnt++; break; } default: break; } rmbrak(1); } if (errstatus || (oddsinglequote || bracecnt || sqbrakcnt || parencnt)) { pr(2); puts("Summary: "); } else { if (filename != NULL) { fputs(filename, stdout); fputs(": ", stdout); } puts(" O.K."); } if (oddsinglequote) puts("\tOdd number of single quotes. "); if (bracecnt) { printf("\t%d too few %s braces.\n", abs(bracecnt), (bracecnt>0 ? "closing" : "opening")); } if (sqbrakcnt) { printf("\t%d too few %s square brackets.\n", abs(sqbrakcnt), (sqbrakcnt>0 ? "closing" : "opening")); } if (parencnt) { printf("\t%d too few %s parentheses.\n", abs(parencnt), (parencnt>0 ? "closing" : "opening")); } putchar(LF); } while (++file_index < argc); exit(errstatus ? 2 : wstatus); } int mygetchar() { register int c; static int firsttime = 1; c = fgetc(infile); /* if (c == ';') { ungetc(SP, infile); return(';'); } */ if (c == '/') /* open comment ? */ { c = fgetc(infile); if (c != '*') { ungetc(c, infile); return('/'); } commln = ln; commindent = indent; while (1) { c = fgetc(infile); if (c == EOF) /* last comment never ended */ { if (pr(2)) printf ("Comment opened line %d unclosed by end of file.\n", commln); } else if (c == '/') /* nested comment ? */ { if ((c = fgetc(infile)) == '*') { if (pr(0)) fprintf(stdout, "Nested comment: line %d, indent %d. First open: line %d, indent %d\n", ln, indent, commln, commindent); } else ungetc(c, infile); } else if (c == '*') /* end comment ? */ { if ((c = fgetc(infile)) == '/') { if (indent != commindent && indent-1 != commindent) if (pr(0)) printf( "Indent of comment close doesn't match open: lines %d, %d, indents %d, %d\n", commln, ln, commindent, indent); break; /* only exit from loop */ } else ungetc(c, infile); } else if (c == LF) { do { if (c == SP) indent++; else if (c == TAB) indent = ((indent+8)/8)*8; else if (c == LF) { ln++; indent = 0; } } while (isspace(c = fgetc(infile))); ungetc(c, infile); } } return(SP); } if (c == LF || firsttime == 1) { firsttime = 0; lf: while (1) { if (c == SP) indent++; else if (c == TAB) indent = ((indent+8)/8)*8; else if (c == LF) { ln++; indent = 0; singlequoterr = 0; } else { ungetc(c, infile); return(LF); break; } c = fgetc(infile); } } if (c == SP || c == TAB) { do c = fgetc(infile); while (c == SP || c == TAB); if (c != LF) { ungetc(c, infile); return(SP); } else goto lf; } return(c); } /* administer count of error msgs. and suppress if too many administer the status var.s prepend file name to msg. flag error msg.s (not warnings) with '*' */ int pr(error) int error; { if (singlequoterr) return(0); if (verbose) { int i; for (i = stackc; i>0; i--) { printf("%d: type ", i); prtype(stack[i].type); printf(", indent %d, line %d.\n", stack[i].b_indent, stack[i].b_ln); } } if (error == 2) { errnmb = 0; errstatus = 1; } else if (error) { errstatus = 1; if (errnmb < 0) return(0); else if (errnmb >= 9) { errnmb = -1; puts("Other error messages being suppressed.\n"); return(0); } } else { wstatus = 1; if (wnmb < 0) return(0); else if (errnmb + wnmb >= 9) { wnmb = -1; puts("Further warning messages being suppressed.\n"); return(0); } } if (filename != NULL) { fputs(filename, stdout); fputs(": ", stdout); } if (error) putchar('*'); if (error) errnmb++; else wnmb++; return(1); } void newbrak(newtype) int newtype; { if (newtype == 0) { top = stack; stackc = 0; } else { top++; stackc++; } if (stackc >= STACKSIZ) { if (pr(2)) printf("***stack overflow, line %d.\n", ln); exit(); } top->type = newtype; top->b_indent = indent; top->b_ln = ln; } void prtype(type) int type; { switch(type) { case BRACE: putchar('}'); break; case PAREN: putchar(')'); break; case SQBRAK: putchar(']'); break; case IF: fputs("if", stdout); break; case IFCOND: fputs("if-condition", stdout); break; case THEN: fputs("then", stdout); break; case ELSE: fputs("else", stdout); break; case WHLCOND: fputs("while-condition", stdout); break; default: fputs("'NULL'", stdout); break; } } void checkcloser(type) int (type); { int i = 0, found = 0; if (type == top->type && top->b_indent == indent) { rmbrak(1); return; } while(!found && ++i < stackc && indent <= (top-i)->b_indent) if (type == (top-i)->type && (top-i)->b_indent == indent) found = 1; if (found) { if (pr(1)) printf("Missing closer%c detected line %d:\n", (i>1?'s':'\0'), ln); while(i--) { if (pr(1)) { fputs("\tMissing closing ", stdout); prtype(top->type); printf(" opened line %d.\n", top->b_ln); } switch (top->type) { case BRACE: { bracecnt++; break; } case SQBRAK: { sqbrakcnt++; break; } case PAREN: { parencnt++; break; } default: break; } rmbrak(1); } rmbrak(1); /* the matching brak */ } else if (type == top->type) { if (indent != top->b_indent) { if (pr(0)) { fputs("Mismatched indent on closing ", stdout); prtype(type); printf (" lines %d, %d; indents %d, %d.\n", top->b_ln, ln, top->b_indent, indent); } } rmbrak(1); } else { switch (type) { case BRACE: { bracecnt--; break; } case SQBRAK: { sqbrakcnt--; break; } case PAREN: { parencnt--; break; } default: break; } if (pr(1)) { fputs("Muddle detected at unmatched closing ", stdout); prtype(type); printf(" line %d.\n", ln); } } } /* removes IF from stack checks else's indent */ void checkelse() { int c; while ((c = mygetchar()) == SP || c == LF); if (c == 'e' && (c = mygetchar()) == 'l' && (c = mygetchar()) == 's' && (c = mygetchar()) == 'e' && !isalpha(c = fgetc(infile)) && !isdigit(c)) { ungetc(c, infile); if (top->type == THEN) rmbrak(1); if (top->type != IF) { if (pr(1)) printf("Else with no if line %d.\n", ln); } else if (indent+2 < top->b_indent) { if (pr(1)) printf("Dangling else -- bound to wrong if? \"if\" line %d, \"else\" line %d.\n", top->b_ln, ln); } else if (indent != top->b_indent) { if (pr(0)) { fputs("Wrong indent for else", stdout); if (indent-2 > top->b_indent) fputs(" (missing if?)", stdout); printf(". \"if\" line %d, \"else\" line %d.\n", top->b_ln, ln); } } if (top->type == IF) rmbrak(1); newbrak(ELSE); } else { myungetc(c); myungetc(SP); /* no else so terminate the IF */ if (top->type == IF) { rmbrak(1); while (top->type == ELSE) rmbrak(1); if (top->type == THEN) { rmbrak(1); checkelse(); } } } } ---------------------------------------------------------------------------- .TH cchk 1CSL "Dec 19, 1982" .SH NAME cchk \- C program checker .SH SYNOPSIS .I cchk [-q] [-v] [files ...] .SH EXAMPLES .nf cchk foo.c .br This is the basic useage. .sp 2 cchk $1.c if ($status == 0) cc -O $1.c -o $1 .fi .br These lines might appear in a shell script. They run cchk on the source of the program named as argument, and then run the compiler on it only if cchk detected no errors. .SH DESCRIPTION .I cchk checks C programs for correctly matching brackets of all kinds, including quotes and comment brackets, checks that the indentation of matching brackets also matches, and checks for symptoms of 3 kinds of errors that the C compiler allows without warning: "dangling else" errors where an else is bound to the wrong preceding if, nested comments, where the first close-comment bracket prematurely ends the outer comment, and the use of assignment ('=') where equality-test ('==') was meant. It is meant to be run as a pre-check on C programs before calling the compiler, just as you might run lint as a post-check. It's virtues are that it is about 5 times as fast as the compiler, so that it allows you to weed out some trivial syntactic errors faster; for the errors it detects it produces better error messages than the compiler; and it detects the errors mentioned above that the compiler ignores. .PP The indentation rules it applies are that the indentation of the first non-white character on the line holding an opener should match that on the line holding the matching closer. These rules are fairly weak (e.g. they are compatible with but do not enforce the Ingres format standard), though they may still conflict with your own habits. The .I -q (quiet) option suppresses messages classed as warnings, which includes those about mismatched indentations. The .I -v (verbose) option prints more information -- it shows what is on its internal stack at the time an error is detected. It is probably only of real use for debugging .I cchk itself. The program returns status 1 if warnings were issued, status 2 if errors were detected, and 0 if neither. .PP The distinction between warnings and errors is somewhat arbitrary. Because C allows certain errors it would be inappropriate here to make the distinction between compilable and non-compiliable programs. Basically only indentation mismatches are warnings, and the symptoms of dangling elses or using assignment ('=') instead of equality ('==') are treated as errors. The program will always print some message if you have an error involving mismatched brackets of some kind, and will pass any legal program if its indentation is also correct, unless '=' is used in the top level of a condition expression. For cases in between it tries hard but may make mistakes, though if you are aiming for a properly indented program you can be sure that an error means that something is wrong. .PP When it detects signs of a bracket mismatch it makes a decision on the spot about the most likely underlying cause. It does not wait for more evidence to disambiguate this, so on the occasions it is wrong, not only are the messages inappropriate to some degree, but several messages may be produced concerning what is really a single (unrecognized) error. The most common example of this is if you have the wrong indent on a closing brace such that it matches an earlier opening brace, cchk assumes first that there is a missing closing brace, and then when it finds the second closing brace that this has no matching opening brace (this having been already wrongly accounted for). The summary it gives at the end tells you whether there was really a net imbalance of brackets, which may help sort out these cases. .PP .I cchk was written as a result of the following observations. 1) In Unix, modularity suggests that it is appropriate to have different programs with different special expertise where other systems would cram them all into one program. Thus lint incoporates special knowledge about type-checking and portability considerations that would be inappropriate in a compiler. .I cchk like lint takes advantage of the fact that since it is not the compiler it can be wrong some of the time without preventing anyone from doing anything. 2) C has, in my opinion, some bad choices in its syntax that cause frequent errors by users. It turns out, though, that these can largely be checked for cheaply, which alleviates the original poor design choice. These are: a) Not supporting nested comments (nor warning about them in the compiler). b) Not having an "endif" (or "fi") closer to terminate if statements, thus leaving users open to the dangling else problem. (This is the problem that if you have nested if statements the following else will get bound to the nearest preceding one, which is not always the intuitively reasonable one.) This is especially troublesome, as it means among other things that if you modify a program by adding an else clause to an existing if statement, you may have to modify (by adding braces) not the if statement to which you are attaching the else, but a nested if statement acting as its "then" clause. c) The use of '=' for assignment, following Fortran's bad usage. It seems to be the case that both '=' and '==' get seen and mentally read as "equals" so that it is hard to spot if you write '=' for '==' in conditionals, an error that may happen either because of the language-promoted confusion itself, or because of a typing slip (which is then hard to spot). 3) The C compiler produces outstandingly unhelpful error messages as a rule, from the point of view of a user who wants to make corrections as fast as possible. Once past the beginner stage however, a user can usually do all right by ignoring the text of the error message, which almost never tells her/him what to correct, and attending to the line-number: generally when your attention is directed to only a line or two you can tell what is wrong. This breaks down when the compiler fails to generate anything like the helpful line number. This is usually however in cases of failure to match brackets of some sort -- something which is easy for another program to check. Furthermore attending to the user's indentation usually allows accurate diagnoses and helpful messages to be generated in just these cases. .PP .I cchk, then, attempts to address these points largely by checking bracket matches and using indentation to guess what the real problem was -- whether a missing opener, a missing closer, wrong indentation, or some other mistake such as a spurious character. Like the compiler, it has only a fair chance of recovering after an error and commenting intelligently on the remaining code. However its relatively fast running time means that correcting only the first error in each cycle is not too time consuming. .SH SEE\ ALSO lint(1), cc(1) .SH AUTHOR Steve Draper .SH BUGS It inflicts its own idea of good indentation, which neither matches a recognized standard exactly nor your own practices. It can generate several error descriptions where there is only one error -- one that it does not describe. .PP It does not deal with the preprocessor intelligently. There are two kinds of case to note: 1) defines may themselves not be good C e.g. #define ctrl(letter) ('letter' & 077) .br will work ok in the program but will draw "bad character constant" from .I cchk. Similarly, though more questionable, you might define your own opener and closer e.g. #define then { #define endif } 2) Some uses of #ifdef will confuse .I cchk, for instance if alternative if-condition lines are given, controlled by #ifdef ... #else .I cchk will see them both. Similarly using "#ifdef\ comment" to comment out parts of the text in order to overcome the lack of nested comments in C will draw fire if the commented out section is not legal C. .PP This could be overcome by piping the program through the preprocessor before .I cchk sees it i.e. by cc -E foo.c | cchk .br but then the line numbers .I cchk generates will be wrong. .SH DIAGNOSTICS The program returns status 1 if warnings were issued, status 2 if errors were detected, and 0 if neither.