guy@sun.uucp (Guy Harris) (04/18/86)
The S5 "fgrep" has a bug in it. If you have a file with shxp hers his xk in it, and you feed that as a pattern file to the S5 "fgrep", it will NOT match on "shxk"! Here's a fix. It also includes Doug Gwyn's recent fixes, and some other changes: Checks for I/O errors reading files. Better error messages - uses "perror". Runs much faster - doesn't call a subroutine to do character matching (I posted this fix a while ago). Disallows multiple "-f" flags. (Should also disallow multiple "-e" flags as well - I just noticed this, so it's not in this "diff" listing.) Allows "-e" flag when reading standard input. *** fgrep.c.orig Thu Apr 17 21:27:30 1986 --- fgrep.c.fixed Thu Apr 17 21:32:27 1986 *************** *** 9,14 */ #include <stdio.h> #define MAXSIZ 6000 #define QSIZE 400 --- 9,15 ----- */ #include <stdio.h> + #include <ctype.h> #define MAXSIZ 6000 #define QSIZE 400 *************** *** 30,35 int nsucc; long tln; FILE *wordf; char *argptr; extern char *optarg; extern int optind; --- 31,37 ----- int nsucc; long tln; FILE *wordf; + char *wordfname; char *argptr; extern char *optarg; extern int optind; *************** *** 34,39 extern char *optarg; extern int optind; main(argc, argv) char **argv; { --- 36,45 ----- extern char *optarg; extern int optind; + /* The following macro was inserted to allow for the "-i" option */ + + #define same(a, b) ((a) == (b) || iflag && ((a) ^ (b)) == ' ' && letter(a) == letter(b)) + main(argc, argv) char **argv; { *************** *** 63,68 continue; case 'f': fflag++; wordf = fopen(optarg, "r"); if (wordf==NULL) { --- 69,78 ----- continue; case 'f': + if (fflag) { + fprintf(stderr, "fgrep: Only one \"-f\" option allowed\n"); + exit(2); + } fflag++; wordfname = optarg; wordf = fopen(wordfname, "r"); *************** *** 64,70 case 'f': fflag++; ! wordf = fopen(optarg, "r"); if (wordf==NULL) { fprintf(stderr, "fgrep: can't open %s\n", optarg); exit(2); --- 74,81 ----- exit(2); } fflag++; ! wordfname = optarg; ! wordf = fopen(wordfname, "r"); if (wordf==NULL) { fprintf(stderr, "fgrep: "); perror(wordfname); *************** *** 66,72 fflag++; wordf = fopen(optarg, "r"); if (wordf==NULL) { ! fprintf(stderr, "fgrep: can't open %s\n", optarg); exit(2); } continue; --- 77,84 ----- wordfname = optarg; wordf = fopen(wordfname, "r"); if (wordf==NULL) { ! fprintf(stderr, "fgrep: "); ! perror(wordfname); exit(2); } continue; *************** *** 92,98 } argc -= optind; ! if (errflg || ((argc <= 0) && !fflag)) { printf("usage: fgrep %s\n",usage); exit(2); } --- 104,110 ----- } argc -= optind; ! if (errflg || ((argc <= 0) && !fflag && !eflag)) { printf("usage: fgrep %s\n",usage); exit(2); } *************** *** 128,134 char *nlp; if (file) { if ((fptr = fopen(file, "r")) == NULL) { ! fprintf(stderr, "fgrep: can't open %s\n", file); retcode = 2; return; } --- 140,147 ----- char *nlp; if (file) { if ((fptr = fopen(file, "r")) == NULL) { ! fprintf(stderr, "fgrep: "); ! perror(file); retcode = 2; return; } *************** *** 187,192 blkno += ccount; } } if ( (vflag && (failed == 0 || xflag == 0)) || (vflag == 0 && xflag && failed) ) goto nomatch; succeed: nsucc = 1; --- 200,207 ----- blkno += ccount; } } + if (ccount <= 0 && ferror(fptr)) + goto done; if ( (vflag && (failed == 0 || xflag == 0)) || (vflag == 0 && xflag && failed) ) goto nomatch; succeed: nsucc = 1; *************** *** 198,204 } else { if (nfile > 1) printf("%s:", file); ! if (bflag) printf("%d:", (blkno-ccount-1)/BUFSIZ); if (nflag) printf("%ld:", lnum); if (p <= nlp) { while (nlp < &buf[2*BUFSIZ]) putchar(*nlp++); --- 213,219 ----- } else { if (nfile > 1) printf("%s:", file); ! if (bflag) printf("%ld:", (blkno-ccount-1)/BUFSIZ); if (nflag) printf("%ld:", lnum); if (p <= nlp) { while (nlp < &buf[2*BUFSIZ]) putchar(*nlp++); *************** *** 221,226 failed = 0; } } fclose(fptr); if (cflag) { if (nfile > 1) --- 236,249 ----- failed = 0; } } + done: + if (ccount <= 0 && ferror(fptr)) { + fprintf(stderr, "fgrep: Read error on "); + perror(file ? file : "standard input"); + retcode = 2; + fclose(fptr); + return; + } fclose(fptr); if (cflag) { if (nfile > 1) *************** *** 232,241 getargc() { register c; ! if (wordf) ! return(getc(wordf)); ! if ((c = *argptr++) == '\0') ! return(EOF); return(c); } --- 255,274 ----- getargc() { register c; ! if (wordf) { ! c = getc(wordf); ! if (c == EOF) { ! if (ferror(wordf)) { ! fprintf(stderr, "fgrep: Read error on "); ! perror(wordfname); ! exit(1); ! } ! fclose(wordf); ! } ! } else { ! if ((c = *argptr++) == '\0') ! return(EOF); ! } return(c); } *************** *** 303,309 } overflo() { ! fprintf(stderr, "wordlist too large\n"); exit(2); } cfail() { --- 336,342 ----- } overflo() { ! fprintf(stderr, "fgrep: wordlist too large\n"); exit(2); } cfail() { *************** *** 310,315 struct words *queue[QSIZE]; struct words **front, **rear; struct words *state; register char c; register struct words *s; s = w; --- 343,349 ----- struct words *queue[QSIZE]; struct words **front, **rear; struct words *state; + int bstart; register char c; register struct words *s; s = w; *************** *** 328,333 front = queue; else front++; cloop: if ((c = s->inp) != 0) { *rear = (q = s->nst); if (front < rear) if (rear >= &queue[QSIZE-1]) --- 362,368 ----- front = queue; else front++; cloop: if ((c = s->inp) != 0) { + bstart = 0; *rear = (q = s->nst); if (front < rear) if (rear >= &queue[QSIZE-1]) *************** *** 337,343 else if (++rear == front) overflo(); state = s->fail; ! floop: if (state == 0) state = w; if (state->inp == c) { qloop: q->fail = state->nst; if ((state->nst)->out == 1) q->out = 1; --- 372,381 ----- else if (++rear == front) overflo(); state = s->fail; ! floop: if (state == 0) { ! state = w; ! bstart = 1; ! } if (state->inp == c) { qloop: q->fail = state->nst; if ((state->nst)->out == 1) q->out = 1; *************** *** 345,350 } else if ((state = state->link) != 0) goto floop; } if ((s = s->link) != 0) goto cloop; --- 383,392 ----- } else if ((state = state->link) != 0) goto floop; + else if(bstart == 0){ + state = 0; + goto floop; + } } if ((s = s->link) != 0) goto cloop; *************** *** 351,357 } } ! /* The following functions were inserted to allow for the "-i" option */ same(a, b) register int a, b; --- 393,399 ----- } } ! /* The following function was inserted to allow for the "-i" option */ letter(c) register int c; *************** *** 353,364 /* The following functions were inserted to allow for the "-i" option */ - same(a, b) - register int a, b; - { - return (a == b || iflag && (a ^ b) == ' ' && letter(a) == letter(b)); - } - letter(c) register int c; { --- 395,400 ----- /* The following function was inserted to allow for the "-i" option */ letter(c) register int c; { *************** *** 362,370 letter(c) register int c; { ! if (c >= 'a' && c <= 'z') ! return (c); ! if (c >= 'A' && c <= 'z') ! return (c + 'a' - 'A'); return(0); } --- 398,406 ----- letter(c) register int c; { ! if (islower(c)) ! return(c); ! if (isupper(c)) ! return(_tolower(c)); return(0); } The use of a macro for character comparison, as well as this bug fix, came from the 4.2BSD version of "fgrep". I think this bug is also in the V7 "fgrep"; the section of code in the S5 "fgrep" where this bug lies is similar to the V7 one and lacks the changes from 4.2. Here's an edited "diff" listing of the interesting differences between the V7 and 4.2 "fgrep", in case you're curious: 24a12 > #include <ctype.h> 37c25 < int bflag, cflag, fflag, lflag, nflag, vflag, xflag; --- > int bflag, cflag, fflag, lflag, nflag, vflag, xflag, yflag; 93a83,86 > case 'i': /* Berkeley */ > case 'y': /* Btl */ > yflag++; > continue; 95c88 < fprintf(stderr, "egrep: unknown flag\n"); --- > fprintf(stderr, "fgrep: unknown flag\n"); 104c97 < fprintf(stderr, "egrep: can't open %s\n", *argv); --- > fprintf(stderr, "fgrep: can't open %s\n", *argv); 125a119,120 > # define ccomp(a,b) (yflag ? lca(a)==lca(b) : a==b) > # define lca(x) (isupper(x) ? tolower(x) : x) 161c158 < if (c->inp == *p) { --- > if (ccomp(c->inp, *p)) { 174c171 < if (c->inp == *p) { --- > if (ccomp(c->inp , *p)) { 319a317 > int bstart; 337a336 > bstart = 0; 347c346,349 < floop: if (state == 0) state = w; --- > floop: if (state == 0) { > state = w; > bstart = 1; > } 349c351 < q->fail = state->nst; --- > qloop: q->fail = state->nst; 351c353 < continue; --- > if ((q = q->link) != 0) goto qloop; 354a357,360 > else if(bstart == 0){ > state = 0; > goto floop; > } The interesting things to note are: 1) The "ignore case" flags - "-i" and "-y" - are not in the V7 version, even though the Berkeley code claims "-y" is a BTL flag. The S5 version does have such a flag - but, despite what the comments might make you expect, it's "-i", not "-y"! 2) The error messages in the V7 "fgrep" call it "egrep", not "fgrep". 3) The bug in question is fixed by the stuff at the end that involves the variable "bstart". (I *think* the problem is that the "fgrep" implementation of the Aho/Corasick algorithms - "Efficient String Matching: An Aid to Bibliographic Search", Alfred V. Aho and Margaret J. Corasick, *Communications of the ACM, June 1975, Volume 18, Number 6, pp. 333 - 340 - was lacking the part of Algorithm 3 reading while g(state, a) = fail do state := f(state) Since "fgrep" doesn't care what particular pattern was matched, and doesn't bother doing any further string matching once it's found one match on a line, I think that this part isn't completely necessary; however, the stuff added by the bug fix is necessary.) 4) Either this fix, or the one involving "qloop", or both, were in some update to V7 which I remember seeing. This update also had "fsck", some fixes to bugs mentioned by Dennis Ritchie in a talk which was commented on in some issue of ";login" (one such bug, I remember, was that some error bit for some DEC tape driver was incorrectly #defined in the driver), new versions of "f77" and "awk" and some other programs (the new versions ended up in 4BSD, but didn't completely make it into System N until S5R2 or so). This bit of UNIX trivia not brought to you by Abbott Horn (or is it Abbott and Horn?), makers of Trivial Pursuit. Another note, on a standard I/O "feature"/bug: While testing this, I noticed that if you ran "fgrep" and just typed "shkx" followed by control-D, it waited for another control-D before exiting. I built it with the 4.2BSD standard I/O library, and it exited as soon as you typed ^D. There was a lot of flaming about the so-called "sticky EOF" feature of the 4.2BSD standard I/O a while ago. The 4.2BSD standard I/O returns an EOF on all attempts to read from a stream which has already gotten an EOF (i.e., any stream where the _IOEOF bit is set, so that "feof(stream)" returns TRUE). This change caused programs which read something from standard input until EOF, and then start reading more data from standard input without doing a "clearerr(stdin)", to break. These programs were assuming that standard input was a terminal (unless they did an "fseek" after the first EOF, there is no way that subsequent reads from a regular file wouldn't get EOF, assuming somebody wasn't writing to the file they were reading from). I believe the change was made in 4.2BSD to fix "fread"s which saw an EOF before they got all the data requested. This change also fixes the problem described above, as well as making the behavior of "feof" consistent with the behavior of reads from the file; other versions of standard I/O can have "feof(stream)" be true, but have reads from "stream" not return an end-of-file indication! -- Guy Harris {ihnp4, decvax, seismo, decwrl, ...}!sun!guy guy@sun.arpa
guy@sun.UUCP (04/23/86)
> 4) Either this fix, or the one involving "qloop", or both, were > in some update to V7 which I remember seeing. This update > also had "fsck", some fixes to bugs mentioned by Dennis Ritchie > in a talk which was commented on in some issue of ";login" > (one such bug, I remember, was that some error bit for some > DEC tape driver was incorrectly #defined in the driver), new > versions of "f77" and "awk" and some other programs (the > new versions ended up in 4BSD, but didn't completely make it > into System N until S5R2 or so). It sure was. From the README file from that addendum tape: Addenda to UNIX 7th edition distribution tape, 12/2/80. fgrep.c: new source for fgrep(1) corrects certain troubles with keys with common prefixes The "V7 addendum" and 4.2BSD versions of "fgrep" are almost identical; the only significant differences are that the 4.2BSD version accepts "-i" as well as "-y" for the "case-independent matching" flag (which is interesting, considering the S5 version accepts "-i" but not "-y") and the 4.2BSD version uses a block size of BUFSIZ (1024 bytes on 4.xBSD) rather than 512. This, of course, raises the question "why didn't this fix make it into System V, and why did System V's "-i" flag use a subroutine for character comparison rather than a macro?" I suspect the answer is the same as for the question "why didn't the new 'f77' or 'awk' make it in until S5R2 or so", which is "nobody in the USDL knew about it." One suspects the lines of communication between BTL research and AT&T-IS need some Roto-Rooter(TM) work... > This bit of UNIX trivia not brought to you by Abbott Horn (or is it Abbott > and Horn?), makers of Trivial Pursuit. It's Horn Abbott, you nitwit. -- Guy Harris {ihnp4, decvax, seismo, decwrl, ...}!sun!guy guy@sun.arpa