hugh@hcrvx1.UUCP (Hugh Redelmeier) (01/11/85)
There has been much discussion recently about how the C standard may allow conforming compilers to ignore all but six characters of an identifier, and to ignore case. Also, many Unices (is that a trade mark?) care about all of a long identifier (System V release 2 and 4.2BSD) while many others don't. Other net discussions claim it is sinful to distinguish identifiers by case. So I am distributing "clash", a program soon to celebrate its tenth birthday. I wrote it under 5th edition Unix, when that was current. I had hoped clash would become obsolete in short order by improvements in cc/as/ld. Then when lint was first described, I had thought surely it would take over the job. Oh well. # This is a shell archive. Remove anything before this line, then # unpack it by saving it in a file and typing "sh file". (Files # unpacked will be owned by you and have default permissions.) # # This archive contains: # clash.1 clash.c echo x - clash.1 cat > "clash.1" << '//E*O*F clash.1//' .TH CLASH 1 Local .SH NAME clash \- find indistinguishable or long identifiers .SH SYNOPSIS .B clash [ \fB\-actdsm\fIn\fR ] [ file ] ... .SH DESCRIPTION .IR Clash finds identifiers that are not distinct in the first .I numSigChars characters, or finds identifiers that are longer than .I numSigChars characters. It lexically analyzes its input, ignoring comments. It does not parse, so it does not understand scoping. Some restrictions that .I clash might help detect: .IP - Most Unix file systems consider file names (components of pathnames) identical if their first 14 characters are identical. .IP - Many Unix assemblers and the loaders consider only the first eight characters of an identifier. .IP - Many C compilers treat identifiers as identical if their first seven characters are the same (eight for identifiers that are not external). In fact, the ANSI C standard will probably make it legal for conforming compilers to ignore all but the first six characters and to ignore case distinctions. .IP - Yacc terminals become C preprocessor symbols, and should therefore differ within the first eight characters. .PP The argument list is a sequence of input file names and flags. If no input file name is given, the standard input is processed. .PP A flag operand starts with ``\-'' and continues with any number of option names. Flags d, l, s, and m toggle a corresponding switch. .TP .B \-a the input is a PDP-11 assembler program .TP .B \-c the input is a C program (default) .TP .B \-t the input is some other language (``text'') .TP .B \-d dump on error .TP .B \-l print long identifiers .TP .B \-s separate: process each file independantly .TP .B \-m monocase: case distinctions don't count .TP .BI \- n sets .I numSigChars (default is 7) .SH "LOCAL INFO" Written at the University of Toronto by D. Hugh Redelmeier. .SH BUGS Understands neither libraries nor #include commands: all relevant files must be explicitly scanned. .br The maximum number of symbols and the maximum number of characters in them are fixed. //E*O*F clash.1// echo x - clash.c cat > "clash.c" << '//E*O*F clash.c//' /* Clash: find indistinct or long identifiers * * Synopsis: * clash [-actdsm<number>] file ... * * Description: * * This program finds identifiers that are longer than 'numSigChars', * and those that are not distinct in the first 'numSigChars' chars. * The user can specify whether case distinctions are significant. * * -a means Unix PDP-11 Assembler input * -c means input is c program (default) * -t means text (scan comments and strings) * -d means dump on error * -l means print long identifiers * -s means separate: process each file independantly * -m means monocase: case distinctions don't count * -<number> sets 'numSigChars' (default is 7) * * Flags d, l, s, and m toggle a corresponding switch. * * Bugs: * Does not understand libraries and include files: all * relevant files must be explicitly scanned. * The maximum number of symbols (sizeSymTab) and * the maximum number of characters in them (sizeStrTab) * are fixed at compile time. * * D. Hugh Redelmeier 75.06.11 * * 1979?: Modified for Version 6 "typesetting tape" C. * 1984 Nov 29: Modified for System V C. * 1984 Dec 4: Added -m flag. */ #define UNINIT #include <stdio.h> #define TRUE 1 #define FALSE 0 int asmSw = FALSE; /* if on, scan assembler */ int dumpOnError = FALSE; /* controls method of exit on error */ int iLong = FALSE; /* if on, any long idents are reported */ int separateSw = FALSE; /* if on, all files are treated as separate */ int textSw = FALSE; /* if on, scan comments and strings */ int caseMask = 0177; /* if monocase, mask out case bit */ int numSigChars = 7; /* see introduction */ #define sizeSymTab 1000 struct symCell { char *cIdPtr; int cIdLen; struct symCell *cClashPtr; struct symCell *cNext; }; struct symCell symTab[sizeSymTab] UNINIT; int eLong UNINIT; int eClash UNINIT; struct symCell *freeSymCell UNINIT; #define sizeHashTab 373 struct symCell *hashTab[sizeHashTab] UNINIT; #define sizeStrTab 10000 char strTab[sizeStrTab] UNINIT; char *freeChar UNINIT; char *idPtr UNINIT; unsigned idHash UNINIT; int idLen UNINIT; char lastInChar UNINIT; int lineNo UNINIT; char **argCursor UNINIT; char *mIn UNINIT, *mFile UNINIT; /* message about input file */ assert(p,s) register int p; register char *s; { if (!p) { printf("\nclash(%s)",*argCursor); if (lineNo) printf(" in line %d",lineNo); printf(": %s.\n",s); if (dumpOnError) abort(); /*else*/ exit(1); } } init() { register struct symCell **p; eClash = eLong = FALSE; freeChar = strTab; freeSymCell = symTab; for (p=hashTab; p!=hashTab+sizeHashTab;) *p++ = NULL; lastInChar='\n'; } printAndInit() { if (freeChar!=strTab) { if (iLong && eLong) printLong(); if (eClash) printClash(); init(); } } int strSimilar(p1,p2) register char *p1, *p2; { register int len; len = idLen<numSigChars? idLen : numSigChars; do ; while (--len>=0 && ((*p1++^*p2++)&caseMask)==0); return len<0; } printSym(p) register struct symCell *p; { register int l; register char *c; c=p->cIdPtr; for (l=p->cIdLen; l!=0; l--) putchar(*c++); } int idHead(c) register char c; { return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || c=='_' || asmSw && c=='.'; } int getId() { register int c; register char delim; c=lastInChar; if (textSw) while (!idHead(c)) { if (c == '\n') { lineNo++; c=getchar(); if (c ==EOF) return FALSE; } else { assert(c!=EOF,"unexpected EOF"); c=getchar(); } } else if (asmSw) while (!idHead(c)) { switch (c) { case '\n': lineNo++; c=getchar(); if (c==EOF) return FALSE; continue; case '/': do c=getchar(); while (c!='\n' && c!=EOF); continue; case '\"': c=getchar(); if (c=='\\') c=getchar(); if (c==EOF || c=='\n') break; /* fall through */ case '\'': c=getchar(); if (c=='\\') c=getchar(); break; case '<': do { if (c=='\\') c=getchar(); assert((c!='\n')&(c!=EOF), "bad string"); c=getchar(); } while (c!='>'); break; case '\\': c=getchar(); } assert(c!='\n',"unexpected newline"); assert(c!=EOF,"unexpected EOF"); c=getchar(); } else /* must be c */ while (!idHead(c)) { switch (c) { case '\n': lineNo++; c=getchar(); if (c==EOF) return FALSE; continue; case '/': c=getchar(); if (c=='/') { do c=getchar(); while (c!='\n' && c!=EOF); continue; } else if (c=='*') { c=getchar(); do { while (c!='*') { if (c=='\n') lineNo++; else assert(c!=EOF,"unending comment"); c=getchar(); } c=getchar(); } while (c!='/'); break; } else continue; case '\"': case '\'': delim=c; c=getchar(); while (c!=delim) { if (c=='\\') c=getchar(); assert((c!='\n') && (c!=EOF), "bad string"); c=getchar(); } } assert(c!=EOF,"unexpected EOF"); c=getchar(); } idPtr=freeChar; idHash=0; idLen=0; do { assert(freeChar<&strTab[sizeStrTab], "char space"); *freeChar++ = c; if (++idLen <= numSigChars) idHash += (c&caseMask) * idLen; c=getchar(); } while ('0'<=c && c<='9' || idHead(c)); idHash %= sizeHashTab; if (idLen>numSigChars) eLong=TRUE; lastInChar=c; return TRUE; } buildTables() { register struct symCell *s; register struct symCell *cp; while (getId()) if (idLen<numSigChars && (caseMask&('a'^'A'))!=0) freeChar=idPtr; else { cp=NULL; for (s=hashTab[idHash]; ; s=s->cNext) { if (s == NULL) { /* identifier not found: add it */ assert(freeSymCell<&symTab[sizeSymTab], "sym space"); s=freeSymCell++; s->cIdPtr = idPtr; s->cIdLen = idLen; s->cClashPtr = cp; s->cNext = hashTab[idHash]; hashTab[idHash] = s; break; } if ((s->cIdLen == idLen) && strncmp(s->cIdPtr,idPtr,idLen)==0) { /* identifier found: done */ freeChar=idPtr; break; } if ((cp == NULL) && strSimilar(s->cIdPtr,idPtr)) { /* similar id found: remember */ cp=s; eClash=TRUE; } } } } printLong() { register struct symCell *s; printf("Symbols longer than %d chars%s%s:\n",numSigChars,mIn,mFile); for (s = &symTab[0]; s<freeSymCell; s++) if (s->cIdLen > numSigChars) { printSym(s); putchar('\n'); } } printClash() { register struct symCell *hp; register struct symCell *s; register struct symCell *t; printf("Identifiers not distinct in %d%s chars%s%s:\n", numSigChars, (caseMask&('a'^'A'))==0?" monocase" : "", mIn, mFile); for (hp = freeSymCell; hp > &symTab[0]; ) { hp--; if (hp->cClashPtr!=NULL) { for (s=hp; ; s=t) { printSym(s); t=s->cClashPtr; if (t==NULL) break; s->cClashPtr=NULL; putchar('/'); } putchar('\n'); } } } int main(argc,argv) int argc; char *argv[]; { register char *p; register int worked; init(); worked = FALSE; for (argCursor=argv; --argc>0 || !worked;) { p = *++argCursor; if (argc > 0 && *p=='-') { while (*++p!='\0') switch (*p) { case 't': textSw=TRUE; break; case 'a': textSw=FALSE; asmSw=TRUE; break; case 'c': asmSw=textSw=FALSE; break; case 'd': dumpOnError=!dumpOnError; break; case 'l': printAndInit(); iLong=!iLong; break; case 's': printAndInit(); separateSw=TRUE; break; case 'm': printAndInit(); caseMask ^= 'a'^'A'; break; default: assert(('0' <= *p) && (*p <= '9'),"funny option"); printAndInit(); for (numSigChars=0; '0'<=*p && *p<='9'; p++) numSigChars = numSigChars*10+(*p-'0'); p--; assert(numSigChars>0,"silly number"); } } else { mIn=""; mFile=""; if (argc>0) { if (separateSw) { mIn=" in "; mFile = p; } assert(freopen(p,"r",stdin)!=NULL, "no such file"); } if (separateSw) init(); buildTables(); if (separateSw || argc<=1) printAndInit(); fclose(stdin); worked = TRUE; } } return 0; } //E*O*F clash.c// exit 0