[net.news.b] Speedup kludge for vnews 'p' command

per@erix.UUCP (Per Hedeland) (05/29/84)

As followups so frequently appear before the articles they follow up to,
I find the vnews 'p' (parent) command very useful. However, if you have
a big history file, it is painfully slow. (Our history file is six weeks
long; on the other hand, we only recieve some 25% of the total news flow.)

Using the dbm files (provided your system have them, of course) is a 
rather obvious speedup. However, the information content of these is not
kept up to date as the readable file is compacted. But since expire is the
only (?) program that compacts the file, letting expire rebuild the dbm
files every time takes care of this.

The changes below assume that you have a version of expire with the -r 
(rebuild) option. The win in the 'p' command is, on our system, typically a
reduction from ~7 seconds to ~1. The same effect is (of course) achieved for 
the "article id" command to vnews, readnews. The penalty is some 10% increased
execution time for expire (who cares?). If, by chance, the entry in the dbm
files is incorrect, the programs revert to the "old" method of linear search.

Per Hedeland
per@erix.UUCP  or  ...{decvax,philabs}!mcvax!enea!erix!per


*** funcs.old.c	Sun May 27 00:09:59 1984
--- funcs.c	Mon May 28 16:50:51 1984
***************
*** 855,860
  	char oidbuf[BUFSIZ];
  	FILE *hfp;
  	char *p;
  
  	/* Try to understand old artid's as well.  Assume .UUCP domain. */
  	if (artid[0] != '<') {

--- 855,868 -----
  	char oidbuf[BUFSIZ];
  	FILE *hfp;
  	char *p;
+ #ifdef DBM
+ 	typedef struct {
+ 		char *dptr;
+ 		int dsize;
+ 	} datum;
+ 	datum lhs, rhs;
+ 	datum fetch();
+ #endif DBM
  
  	/* Try to understand old artid's as well.  Assume .UUCP domain. */
  	if (artid[0] != '<') {
***************
*** 867,872
  	} else
  		strcpy(oidbuf, artid);
  	hfp = xfopen(ARTFILE, "r");
  	while (fgets(lbuf, BUFLEN, hfp) != NULL) {
  		p = index(lbuf, '\t');
  		if (p == NULL)

--- 875,906 -----
  	} else
  		strcpy(oidbuf, artid);
  	hfp = xfopen(ARTFILE, "r");
+ #ifdef DBM
+ 	/* Use the dbm files (hoping expire is keeping them up to date) */
+ 	dbminit(ARTFILE);
+ 	lhs.dptr = oidbuf;
+ 	lhs.dsize = strlen(oidbuf) + 1;
+ 	rhs = fetch(lhs);
+ 	if (rhs.dptr == NULL) {
+ 		fclose(hfp);
+ 		return(NULL);
+ 	}
+ 	fseek(hfp, * (long *) rhs.dptr, 0);
+ 	if (fgets(lbuf, BUFLEN, hfp) != NULL) {
+ 		p = index(lbuf, '\t');
+ 		if (p == NULL)
+ 			p = index(lbuf, '\n');
+ 		*p = 0;
+ 		if (strcmp(lbuf, artid) == 0 || strcmp(lbuf, oidbuf) == 0) {
+ 			fclose(hfp);
+ 			*p = '\t';
+ 			*(lbuf + strlen(lbuf) - 1) = 0;	/* zap the \n */
+ 			return(lbuf);
+ 		}
+ 	}
+ 	/* The dbm files weren't up to date; revert to linear search */
+ 	fseek(hfp, 0L, 0);
+ #endif DBM
  	while (fgets(lbuf, BUFLEN, hfp) != NULL) {
  		p = index(lbuf, '\t');
  		if (p == NULL)


*** expire.old.c	Thu May 24 14:02:11 1984
--- expire.c	Thu May 24 14:54:57 1984
***************
*** 416,422
  		link(NARTFILE, ARTFILE);
  		unlink(NARTFILE);
  #ifdef DBM
- 		if (rebuild)
  			reblddbm ( );
  #endif
  	}

--- 416,421 -----
  		link(NARTFILE, ARTFILE);
  		unlink(NARTFILE);
  #ifdef DBM
  			reblddbm ( );
  #endif
  	}

per@erix.UUCP (Per Hedeland) (08/06/84)

I'm afraid there was a bug in the fix I posted (quite a while back), in that
it didn't make sure that dbminit() was only done once. It should also be noted
that the fix doesn't agree well with the NETPATHS stuff, since you can't handle
more than one set of dbm files at a time. Anyway, here are the diffs again,
hopefully more correct this time.

Per Hedeland
per@erix.UUCP  or  ...{decvax,philabs}!mcvax!enea!erix!per


*** funcs.old.c	Sun May 27 00:09:59 1984
--- funcs.new.c	Mon Aug  6 11:29:26 1984
***************
*** 855,860
  	char oidbuf[BUFSIZ];
  	FILE *hfp;
  	char *p;
  
  	/* Try to understand old artid's as well.  Assume .UUCP domain. */
  	if (artid[0] != '<') {

--- 855,869 -----
  	char oidbuf[BUFSIZ];
  	FILE *hfp;
  	char *p;
+ #ifdef DBM
+ 	static int dbmopen = 0;
+ 	typedef struct {
+ 		char *dptr;
+ 		int dsize;
+ 	} datum;
+ 	datum lhs, rhs;
+ 	datum fetch();
+ #endif DBM
  
  	/* Try to understand old artid's as well.  Assume .UUCP domain. */
  	if (artid[0] != '<') {
***************
*** 867,872
  	} else
  		strcpy(oidbuf, artid);
  	hfp = xfopen(ARTFILE, "r");
  	while (fgets(lbuf, BUFLEN, hfp) != NULL) {
  		p = index(lbuf, '\t');
  		if (p == NULL)

--- 876,912 -----
  	} else
  		strcpy(oidbuf, artid);
  	hfp = xfopen(ARTFILE, "r");
+ #ifdef DBM
+ 	/* Use the dbm files (hoping expire is keeping them up to date) */
+ 	if (dbmopen == 0) {
+ 		if (dbminit(ARTFILE) == 0) dbmopen = 1;
+ 		else dbmopen = -1;
+ 	}
+ 	if (dbmopen == 1) {
+ 		lhs.dptr = oidbuf;
+ 		lhs.dsize = strlen(oidbuf) + 1;
+ 		rhs = fetch(lhs);
+ 		if (rhs.dptr == NULL) {
+ 			fclose(hfp);
+ 			return(NULL);
+ 		}
+ 		fseek(hfp, * (long *) rhs.dptr, 0);
+ 		if (fgets(lbuf, BUFLEN, hfp) != NULL) {
+ 			p = index(lbuf, '\t');
+ 			if (p == NULL)
+ 				p = index(lbuf, '\n');
+ 			*p = 0;
+ 			if (strcmp(lbuf, artid) == 0 || strcmp(lbuf, oidbuf) == 0) {
+ 				fclose(hfp);
+ 				*p = '\t';
+ 				*(lbuf + strlen(lbuf) - 1) = 0;	/* zap the \n */
+ 				return(lbuf);
+ 			}
+ 		}
+ 		/* The dbm files weren't up to date; revert to linear search */
+ 		fseek(hfp, 0L, 0);
+ 	}
+ #endif DBM
  	while (fgets(lbuf, BUFLEN, hfp) != NULL) {
  		p = index(lbuf, '\t');
  		if (p == NULL)


*** expire.old.c	Thu May 24 14:02:11 1984
--- expire.c	Thu May 24 14:54:57 1984
***************
*** 416,422
  		link(NARTFILE, ARTFILE);
  		unlink(NARTFILE);
  #ifdef DBM
- 		if (rebuild)
  			reblddbm ( );
  #endif
  	}

--- 416,421 -----
  		link(NARTFILE, ARTFILE);
  		unlink(NARTFILE);
  #ifdef DBM
  			reblddbm ( );
  #endif
  	}