[net.bugs.4bsd] "xstr" speedup

steveh@hammer.UUCP (Stephen Hemminger) (09/28/85)

The following change speeds up xstr substantially (it can be a big cpu hog
if you are building a big program.

Gprof says most of the time was spent in istail which was doing lots
of strlen's.  By finding the length of the string once at the start of
the loop, and storing the length of the hash element in the table
things go a lot faster (it becomes IO bound).

It also incorporates Guy's fixes (in a different form).
	
-------------------------
*** xstr.c.orig	Sat Sep 28 09:46:46 1985
--- xstr.c	Sat Sep 28 10:15:17 1985
***************
*** 11,17
   * November, 1978
   */
  
! #define	ignore(a)	Ignore((char *) a)
  
  char	*calloc();
  off_t	tellpt;

--- 11,17 -----
   * November, 1978
   */
  
! #define	ignore(a)	((void) a)
  
  /*NOSTRICT*/
  char *
***************
*** 13,19
  
  #define	ignore(a)	Ignore((char *) a)
  
! char	*calloc();
  off_t	tellpt;
  off_t	hashit();
  char	*mktemp();

--- 13,33 -----
  
  #define	ignore(a)	((void) a)
  
! /*NOSTRICT*/
! char *
! emalloc(size)
! 	unsigned size;	
! {
! 	register char *p;
! 	extern char *malloc();
! 
! 	if( (p = malloc(size)) == NULL) {
! 		fprintf(stderr, "xstr: out of memory\n");
! 		exit(8);
! 	}
! 	return p;
! }
! 
  off_t	tellpt;
  off_t	hashit();
  char	*mktemp();
***************
*** 18,24
  off_t	hashit();
  char	*mktemp();
  int	onintr();
- char	*savestr();
  char	*strcat();
  char	*strcpy();
  off_t	yankstr();

--- 32,37 -----
  off_t	hashit();
  char	*mktemp();
  int	onintr();
  char	*strcat();
  char	*strcpy();
  off_t	yankstr();
***************
*** 59,65
  		} while (*cp);
  	}
  	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
! 		signal(SIGINT, onintr);
  	if (cflg || argc == 0 && !readstd)
  		inithash();
  	else

--- 72,78 -----
  		} while (*cp);
  	}
  	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
! 		ignore(signal(SIGINT, onintr));
  	if (cflg || argc == 0 && !readstd)
  		inithash();
  	else
***************
*** 63,69
  	if (cflg || argc == 0 && !readstd)
  		inithash();
  	else
! 		strings = mktemp(savestr("/tmp/xstrXXXXXX"));
  	while (readstd || argc > 0) {
  		if (freopen("x.c", "w", stdout) == NULL)
  			perror("x.c"), exit(1);

--- 76,82 -----
  	if (cflg || argc == 0 && !readstd)
  		inithash();
  	else
! 		strings = mktemp("/tmp/xstrXXXXXX");
  	while (readstd || argc > 0) {
  		if (freopen("x.c", "w", stdout) == NULL)
  			perror("x.c"), exit(1);
***************
*** 91,97
  	register int c;
  	register int incomm = 0;
  
! 	printf("char\txstr[];\n");
  	for (;;) {
  		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
  			if (ferror(stdin)) {

--- 104,110 -----
  	register int c;
  	register int incomm = 0;
  
! 	printf("extern char\txstr[];\n");
  	for (;;) {
  		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
  			if (ferror(stdin)) {
***************
*** 248,255
  #define	BUCKETS	128
  
  struct	hash {
- 	off_t	hpt;
- 	char	*hstr;
  	struct	hash *hnext;
  	short	hnew;
  } bucket[BUCKETS];

--- 261,266 -----
  #define	BUCKETS	128
  
  struct	hash {
  	struct	hash *hnext;
  	char	*hstr;
  	int	hlen;
***************
*** 251,256
  	off_t	hpt;
  	char	*hstr;
  	struct	hash *hnext;
  	short	hnew;
  } bucket[BUCKETS];
  

--- 262,270 -----
  
  struct	hash {
  	struct	hash *hnext;
+ 	char	*hstr;
+ 	int	hlen;
+ 	off_t	hpt;
  	short	hnew;
  } bucket[BUCKETS];
  
***************
*** 259,265
  	char *str;
  	int new;
  {
- 	int i;
  	register struct hash *hp, *hp0;
  
  	hp = hp0 = &bucket[lastchr(str) & 0177];

--- 273,278 -----
  	char *str;
  	int new;
  {
  	register struct hash *hp, *hp0;
  	register int i, len;
  
***************
*** 261,266
  {
  	int i;
  	register struct hash *hp, *hp0;
  
  	hp = hp0 = &bucket[lastchr(str) & 0177];
  	while (hp->hnext) {

--- 274,280 -----
  	int new;
  {
  	register struct hash *hp, *hp0;
+ 	register int i, len;
  
  	hp0 = &bucket[lastchr(str) & 0177];
  	len = strlen(str);
***************
*** 262,272
  	int i;
  	register struct hash *hp, *hp0;
  
! 	hp = hp0 = &bucket[lastchr(str) & 0177];
! 	while (hp->hnext) {
! 		hp = hp->hnext;
! 		i = istail(str, hp->hstr);
! 		if (i >= 0)
  			return (hp->hpt + i);
  	}
  	hp = (struct hash *) calloc(1, sizeof (*hp));

--- 276,286 -----
  	register struct hash *hp, *hp0;
  	register int i, len;
  
! 	hp0 = &bucket[lastchr(str) & 0177];
! 	len = strlen(str);
! 	for (hp = hp0; hp; hp = hp->hnext) {
! 		i = hp->hlen - len;
! 		if(i >= 0 && strcmp(&hp->hstr[i], str) == 0)
  			return (hp->hpt + i);
  	}
  	hp = (struct hash *) emalloc(sizeof (*hp));
***************
*** 269,275
  		if (i >= 0)
  			return (hp->hpt + i);
  	}
! 	hp = (struct hash *) calloc(1, sizeof (*hp));
  	hp->hpt = mesgpt;
  	hp->hstr = savestr(str);
  	mesgpt += strlen(hp->hstr) + 1;

--- 283,289 -----
  		if(i >= 0 && strcmp(&hp->hstr[i], str) == 0)
  			return (hp->hpt + i);
  	}
! 	hp = (struct hash *) emalloc(sizeof (*hp));
  	hp->hpt = mesgpt;
  	hp->hlen = len;
  	hp->hstr = strcpy(emalloc(len+1), str);
***************
*** 271,278
  	}
  	hp = (struct hash *) calloc(1, sizeof (*hp));
  	hp->hpt = mesgpt;
! 	hp->hstr = savestr(str);
! 	mesgpt += strlen(hp->hstr) + 1;
  	hp->hnext = hp0->hnext;
  	hp->hnew = new;
  	hp0->hnext = hp;

--- 285,293 -----
  	}
  	hp = (struct hash *) emalloc(sizeof (*hp));
  	hp->hpt = mesgpt;
! 	hp->hlen = len;
! 	hp->hstr = strcpy(emalloc(len+1), str);
! 	mesgpt += len + 1;
  	hp->hnext = hp0->hnext;
  	hp->hnew = new;
  	hp0->hnext = hp;
***************
*** 301,307
  		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
  			found(hp->hnew, hp->hpt, hp->hstr);
  			if (hp->hnew) {
! 				fseek(mesgwrit, hp->hpt, 0);
  				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
  				if (ferror(mesgwrit))
  					perror(strings), exit(4);

--- 316,322 -----
  		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
  			found(hp->hnew, hp->hpt, hp->hstr);
  			if (hp->hnew) {
! 				ignore(fseek(mesgwrit, hp->hpt, 0));
  				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
  				if (ferror(mesgwrit))
  					perror(strings), exit(4);
***************
*** 307,313
  					perror(strings), exit(4);
  			}
  		}
! 	ignore(fclose(mesgwrit));
  }
  
  found(new, off, str)

--- 322,329 -----
  					perror(strings), exit(4);
  			}
  		}
! 	if(fclose(mesgwrit) == EOF)
! 		perror(strings), exit(4);
  }
  
  found(new, off, str)
***************
*** 315,322
  	off_t off;
  	char *str;
  {
- 	register char *cp;
- 
  	if (vflg == 0)
  		return;
  	if (!new)

--- 331,336 -----
  	off_t off;
  	char *str;
  {
  	if (vflg == 0)
  		return;
  	if (!new)
***************
*** 377,405
  	ignore(fclose(strf));
  }
  
- char *
- savestr(cp)
- 	register char *cp;
- {
- 	register char *dp = (char *) calloc(1, strlen(cp) + 1);
- 
- 	return (strcpy(dp, cp));
- }
- 
- Ignore(a)
- 	char *a;
- {
- 
- 	a = a;
- }
- 
- ignorf(a)
- 	int (*a)();
- {
- 
- 	a = a;
- }
- 
  lastchr(cp)
  	register char *cp;
  {

--- 391,396 -----
  	ignore(fclose(strf));
  }
  
  lastchr(cp)
  	register char *cp;
  {
***************
*** 409,424
  	return (*cp);
  }
  
- istail(str, of)
- 	register char *str, *of;
- {
- 	register int d = strlen(of) - strlen(str);
- 
- 	if (d < 0 || strcmp(&of[d], str) != 0)
- 		return (-1);
- 	return (d);
- }
- 
  onintr()
  {
  

--- 400,405 -----
  	return (*cp);
  }
  
  onintr()
  {
  
***************
*** 422,428
  onintr()
  {
  
! 	ignorf(signal(SIGINT, SIG_IGN));
  	if (strings[0] == '/')
  		ignore(unlink(strings));
  	ignore(unlink("x.c"));

--- 403,409 -----
  onintr()
  {
  
! 	ignore(signal(SIGINT, SIG_IGN));
  	if (strings[0] == '/')
  		ignore(unlink(strings));
  	ignore(unlink("x.c"));