[comp.bugs.sys5] S5R2 "grep" performance improvements aren't

guy@gorodish.UUCP (01/29/87)

In S5R2, "fread" and "fwrite" were changed not to just loop doing
"getc" and "putc".  "fgets" and "fputs" were changed to do the same
thing.  "grep" was changed to use a slightly-different version of
"fgets", called "fgetl", that returned the length of the line, and 0
on EOF or error, rather than a pointer to the line and NULL on EOF or
error.

The theory was, I guess, that this would speed it up.  Unfortunately,
the reality is that it slows it down!  Some tests on a 3B2/400 and a
Sun-3/180 indicate that the old V7 code was faster.

This also casts some doubt on the merits of the changes to "fgets"
and "fputs"; it's worth noting that the 4.3BSD "fread" and "fwrite"
use similar techniques to the S5R2 ones, but the 4.3BSD "fgets" and
"fputs" don't.

Also, the "grep -i" code is dumb; it should use register variables
and pointers, and not call the "tolower" subroutine.  This stuff is
done once per character, guys....

Here's a fix, along with some other improvements (proper casting of
null pointers, reasonable error checking and error messages).

*** /archbeluga/s5r2/usr/src/cmd/grep.c	Mon Jul 25 14:24:55 1983
--- grep.c	Wed Jan 28 17:59:51 1987
***************
*** 121,127 ****
  	compile(*argv, expbuf, &expbuf[ESIZE], '\0');
  
  	if (--argc == 0)
! 		execute(NULL);
  	else
  		while (argc-- > 0)
  			execute(*++argv);
--- 121,127 ----
  	compile(*argv, expbuf, &expbuf[ESIZE], '\0');
  
  	if (--argc == 0)
! 		execute((char *)NULL);
  	else
  		while (argc-- > 0)
  			execute(*++argv);
***************
*** 130,145 ****
  }
  
  execute(file)
! register char *file;
  {
  	register char *lbuf;
! 	register i;
  
  	if (file == NULL)
  		temp = stdin;
  	else if ( (temp = fopen(file, "r")) == NULL) {
! 		if (!sflag)
! 			errmsg("grep: can't open %s\n", file);
  		nsucc = 2;
  		return;
  	}
--- 130,149 ----
  }
  
  execute(file)
! char *file;
  {
+ 	register FILE *temp;
  	register char *lbuf;
! 	register char *p1;
! 	register int c;
  
  	if (file == NULL)
  		temp = stdin;
  	else if ( (temp = fopen(file, "r")) == NULL) {
! 		if (!sflag) {
! 			fprintf(stderr, "grep: ");
! 			perror(file);
! 		}
  		nsucc = 2;
  		return;
  	}
***************
*** 147,175 ****
  	lnum = 0;
  	tln = 0;
  
! 	while((nchars = fgetl(prntbuf, BUFSIZ, temp)) != 0) {
! 		if(nchars == BUFSIZ - 1  &&  prntbuf[nchars-1] != '\n')
! 			continue;
  
- 		if(prntbuf[nchars-1] == '\n') {
- 			nlflag = 1;
- 			prntbuf[nchars-1] = '\0';
- 		} else
- 			nlflag = 0;
- 
  		lnum++;
  
  		if (iflag) {
! 			for(i=0, lbuf=linebuf; i < nchars; i++, lbuf++)
! 				*lbuf = (char)tolower((int)prntbuf[i]);
  			*lbuf = '\0';
  			lbuf = linebuf;
  		} else
  			lbuf = prntbuf;
  
! 		if((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
! 			break;	/* lflag only once */
  	}
  	fclose(temp);
  
  	if (cflag) {
--- 151,222 ----
  	lnum = 0;
  	tln = 0;
  
! 	/*
! 	 * This flag will be cleared on the last line if it doesn't contain
! 	 * a newline.
! 	 */
! 	nlflag = 1;
! 	for (;;) {
! 		p1 = prntbuf;
! 		while ((c = getc(temp)) != '\n') {
! 			if (c == EOF) {
! 				if (ferror(temp)) {
! 					fprintf(stderr, "grep: Read error on ");
! 					perror(file ? file : "standard input");
! 					nsucc = 2;
! 					return;
! 				}
! 				if (p1 == prntbuf)
! 					goto out;
! 				nlflag = 0;
! 				break;
! 			}
! 			*p1++ = c;
! 			if (p1 >= &prntbuf[BUFSIZ-1])
! 				break;
! 		}
! 		*p1 = '\0';
! 		nchars = p1 - prntbuf;
  
  		lnum++;
  
  		if (iflag) {
! 			lbuf = linebuf;
! 			p1 = prntbuf;
! 			while ((c = *p1++) != '\0') {
! 				if (isupper(c))
! 					*lbuf++ = _tolower(c);
! 				else
! 					*lbuf++ = c;
! 			}
  			*lbuf = '\0';
  			lbuf = linebuf;
  		} else
  			lbuf = prntbuf;
  
! 		if(step(lbuf, expbuf))
! 			goto found;
! 		if(vflag) {
! 			if(succeed(file, temp))
! 				break;
! 		}
! 		continue;
! 
! 	found:
! 		if(!vflag) {
! 			if(succeed(file, temp))
! 				break;
! 		}
  	}
+ 	if (ferror(temp)) {
+ 		fprintf(stderr, "grep: Read error on ");
+ 		perror(file ? file : "standard input");
+ 		nsucc = 2;
+ 		fclose(temp);
+ 		return;
+ 	}
+ 
+ out:
  	fclose(temp);
  
  	if (cflag) {
***************
*** 180,187 ****
  	return;
  }
  
! succeed(f)
  register char *f;
  {
  	nsucc = (nsucc == 2) ? 2 : 1;
  	if (cflag) {
--- 227,235 ----
  	return;
  }
  
! succeed(f, iop)
  register char *f;
+ FILE *iop;
  {
  	nsucc = (nsucc == 2) ? 2 : 1;
  	if (cflag) {
***************
*** 197,211 ****
  		fprintf(stdout, "%s:", f);
  
  	if (bflag)	/* print block number */
! 		fprintf(stdout, "%ld:", (ftell(temp)-1)/BLKSIZE);
  
  	if (nflag)	/* print line number */
  		fprintf(stdout, "%ld:", lnum);
  
- 	if (nlflag)
- 		prntbuf[nchars-1] = '\n';
- 
  	fwrite(prntbuf, 1, nchars, stdout);
  	return(0);
  }
  
--- 245,258 ----
  		fprintf(stdout, "%s:", f);
  
  	if (bflag)	/* print block number */
! 		fprintf(stdout, "%ld:", (ftell(iop)-1)/BLKSIZE);
  
  	if (nflag)	/* print line number */
  		fprintf(stdout, "%ld:", lnum);
  
  	fwrite(prntbuf, 1, nchars, stdout);
+ 	if (nlflag)
+ 		putchar('\n');
  	return(0);
  }
  
***************
*** 257,306 ****
  
  	errmsg("%s\n", errstr[err]);
  	exit(2);
- }
- 
- /*
-  * The following code is a modified version of the fgets() stdio
-  * routine.  The reason why it is used instead of fgets() is that
-  * we need to know how many characters we read into the buffer.
-  * Thus that value is returned here instead of the value of s1.
-  */
- #define MIN(x, y)	(x < y ? x : y)
- #define _BUFSYNC(iop)	if (_bufend(iop) - iop->_ptr <   \
- 				( iop->_cnt < 0 ? 0 : iop->_cnt ) )  \
- 					_bufsync(iop)
- 
- extern int _filbuf();
- extern char *memccpy();
- 
- fgetl(ptr, size, iop)
- char *ptr;
- register int size;
- register FILE *iop;
- {
- 	char *p, *ptr0 = ptr;
- 	register int n;
- 
- 	for (size--; size > 0; size -= n) {
- 		if (iop->_cnt <= 0) { /* empty buffer */
- 			if (_filbuf(iop) == EOF) {
- 				if (ptr0 == ptr)
- 					return (NULL);
- 				break; /* no more data */
- 			}
- 			iop->_ptr--;
- 			iop->_cnt++;
- 		}
- 		n = MIN(size, iop->_cnt);
- 		if ((p = memccpy(ptr, (char *) iop->_ptr, '\n', n)) != NULL)
- 			n = p - ptr;
- 		ptr += n;
- 		iop->_cnt -= n;
- 		iop->_ptr += n;
- 		_BUFSYNC(iop);
- 		if (p != NULL)
- 			break; /* found '\n' in buffer */
- 	}
- 	*ptr = '\0';
- 	return (ptr-ptr0);
  }
--- 304,307 ----