jim@aob.aob.mn.org (Jim Anderson) (05/24/89)
Watching the recent flamage about inpaths not getting rid of cross-posted articles and the perl scripts and flames about perl availability, etc, I decided to modify inpaths.c to minimize the effect of cross-posted articles by not counting articles which have already been seen. In the process of making these modifications, a problem with malloc on machine where sizeof(char *) != sizeof(int) showed up. This modification, along with the cross-posting modification is included. Also, while I was drifting through the article, I noticed that if the open was unsuccessful, it still attempted to read the header from the nonexistant article. I made some modifications so that if an article is unreadable, it skips it and tries the next article. The general idea behind the modification is to keep track of a number of inode numbers of articles. Currently, it keeps track of 4000 inode numbers. It counts the number of cross-postings, and as all the occurances are accounted for, it removes it from the list, providing space for another cross-posted article. Granted, this technique does not get rid of all cross-posted articles, but it does at least get rid of many of them, and the higher the tracking is set, the more it gets rid of. Running on an 80286 as I am, 4000 is a practical limit for this tracking. Anyway, justification behind us, these are the changes I made to accomplish this: *** inpaths.old Tue May 23 19:03:30 1989 --- inpaths.c Tue May 23 20:42:18 1989 *************** *** 36,41 #include <fcntl.h> #include <ctype.h> #include <sys/types.h> #define SURVEYPERIOD 21 /* Maximum number of days in survey period */ #define INTERVAL SURVEYPERIOD*60*60*24 --- 36,42 ----- #include <fcntl.h> #include <ctype.h> #include <sys/types.h> + #include <sys/stat.h> #define SURVEYPERIOD 21 /* Maximum number of days in survey period */ #define INTERVAL SURVEYPERIOD*60*60*24 *************** *** 40,45 #define SURVEYPERIOD 21 /* Maximum number of days in survey period */ #define INTERVAL SURVEYPERIOD*60*60*24 #define HEADBYTES 1024 main (argc,argv) int argc; --- 41,47 ----- #define SURVEYPERIOD 21 /* Maximum number of days in survey period */ #define INTERVAL SURVEYPERIOD*60*60*24 #define HEADBYTES 1024 + #define MAXINODETRACK 8000 struct sInodeTrack { int inodeNum; /* File inode number */ *************** *** 41,46 #define INTERVAL SURVEYPERIOD*60*60*24 #define HEADBYTES 1024 main (argc,argv) int argc; char **argv; --- 43,53 ----- #define HEADBYTES 1024 #define MAXINODETRACK 8000 + struct sInodeTrack { + int inodeNum; /* File inode number */ + int count; /* Number of references remaining. */ + } inodeTrack[MAXINODETRACK]; + main (argc,argv) int argc; char **argv; *************** *** 47,53 { char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2; char rightdelim; ! char *pathfield; char artbuf[HEADBYTES]; char * scanlimit; char *hostname; --- 54,60 ----- { char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2; char rightdelim; ! char *pathfield,*malloc(); char artbuf[HEADBYTES]; char * scanlimit; char *hostname; *************** *** 76,81 } ; struct nrec *hosthash[128], *hnptr, *list, *relay; struct trec *rlist; int i, article, gotbytes, c; extern errno; --- 83,89 ----- } ; struct nrec *hosthash[128], *hnptr, *list, *relay; struct trec *rlist; + struct stat sbuf; int i, article, gotbytes, c; int ignoreArticle; extern errno; *************** *** 77,82 struct nrec *hosthash[128], *hnptr, *list, *relay; struct trec *rlist; int i, article, gotbytes, c; extern errno; hostname = "unknown"; --- 85,91 ----- struct trec *rlist; struct stat sbuf; int i, article, gotbytes, c; + int ignoreArticle; extern errno; for (i=0;i<MAXINODETRACK;i++) { *************** *** 79,84 int i, article, gotbytes, c; extern errno; hostname = "unknown"; verbose = 2; while (( c=getopt(argc, argv, "sml" )) != EOF) --- 88,97 ----- int ignoreArticle; extern errno; + for (i=0;i<MAXINODETRACK;i++) { + inodeTrack[i].inodeNum = 0; + inodeTrack[i].count = 0; + } hostname = "unknown"; verbose = 2; while (( c=getopt(argc, argv, "sml" )) != EOF) *************** *** 126,131 /* Open the file for reading */ article = open(lptr, O_RDONLY); isopen = (article > 0); /* Read in the first few bytes of the article; find the end of the header */ gotbytes = read(article, artbuf, HEADBYTES); --- 139,153 ----- /* Open the file for reading */ article = open(lptr, O_RDONLY); isopen = (article > 0); + if (!isopen) + goto bypass; /* Go back and read another line */ + fstat(article,&sbuf); + for (i=0;i<MAXINODETRACK && inodeTrack[i].inodeNum!=0 && + inodeTrack[i].inodeNum!=sbuf.st_ino;i++) + ; + ignoreArticle = 0; + if (i<MAXINODETRACK && inodeTrack[i].inodeNum == sbuf.st_ino) { + /* Found a cross-posted article */ ignoreArticle = 1; inodeTrack[i].count--; *************** *** 127,132 article = open(lptr, O_RDONLY); isopen = (article > 0); /* Read in the first few bytes of the article; find the end of the header */ gotbytes = read(article, artbuf, HEADBYTES); if (gotbytes < 10) goto bypass; --- 149,182 ----- if (i<MAXINODETRACK && inodeTrack[i].inodeNum == sbuf.st_ino) { /* Found a cross-posted article */ + ignoreArticle = 1; + inodeTrack[i].count--; + if (inodeTrack[i].count==0) { + /* Last occurance of this inode - delete it */ + int j; + + inodeTrack[i].inodeNum = 0; + for (j=i+1;inodeTrack[j].inodeNum!=0;j++) { + inodeTrack[j-1].inodeNum = + inodeTrack[j].inodeNum; + inodeTrack[j-1].count = inodeTrack[j].count; + } + } + } else { + /* It wasn't in the list - Maybe add it?? */ + if (sbuf.st_nlink>1) { + /* It wasn't in the list and we should see it again- + add it */ + if (i<MAXINODETRACK-1) { + /* We have enough room to add it */ + inodeTrack[i].inodeNum = sbuf.st_ino; + inodeTrack[i].count = sbuf.st_nlink-1; + } + } /* else no room to add it - maybe later */ + } + if (ignoreArticle) + goto bypass; /* Go back and read another file name */ + /* Read in the first few bytes of the article; find the end of the header */ gotbytes = read(article, artbuf, HEADBYTES); if (gotbytes < 10) goto bypass; *************** *** 188,194 if(list == NULL) { /* get storage and splice in a new one */ hnptr = (struct nrec *) malloc(sizeof (struct nrec)); ! hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1); hnptr->link = hosthash[*cp1]; hnptr->rlink = (struct trec *) NULL; hnptr->sentto = (long) 0; --- 238,244 ----- if(list == NULL) { /* get storage and splice in a new one */ hnptr = (struct nrec *) malloc(sizeof (struct nrec)); ! (void) strcpy((hnptr->id = malloc(1+strlen(cp1))),cp1); hnptr->link = hosthash[*cp1]; hnptr->rlink = (struct trec *) NULL; hnptr->sentto = (long) 0; -- Jim Anderson (612) 636-2869 Anderson O'Brien, Inc New mail:jim@aob.mn.org 2575 N. Fairview Ave. Old mail:{rutgers,gatech,amdahl}!bungia!aob!jim St. Paul, MN 55113 "Fireball... Let me see... How did that go?"