FAC2@dayton.saic.com (Earle Ake) (11/23/89)
The following is the inpaths program that Brian Reid wrote to run at UNIX sites to report on NEWS flow and other things. I have it working at my site, although I have made no effort yet to optimize it. Try it out and see what you think. There is a command procedure in comments to run it. Let me know if the code has problems or if you can find a way to speed it up. I have also sent a copy to Brian Reid so he can check to see if I haven't messed it up for the UNIX folks. -- _____________________________________________________________________________ ____ ____ ___ Earle Ake /___ /___/ / / Science Applications International Corporation ____// / / /__ Dayton, Ohio ----------------------------------------------------------------------------- Internet: fac2%dayton.saic.com@uunet.uu.net uucp: uunet!dayvb!fac2 /* inpaths.c -- track the paths of incoming news articles and prepare * in a format suitable for decwrl pathsurveys * * * This program inputs a list of filenames of news articles, and outputs a * data report which should be mailed to the decwrl Network Monitoring * Project at address "pathsurvey@decwrl.dec.com". Please run it once a month * if you can, in time so that the results arrive at decwrl by the 1st * day of the month. * * * Run it like this: * * cd /usr/spool/news * find . -type f -print | inpaths "yourhost" | mail pathsurvey@decwrl.dec.com * * where "yourhost" is the host name of your computer, e.g. "decwrl". * * The input to "inpaths" must be a list of the file names of news articles, * relative to the spooling directory. "./news/config/2771" and * "news/config/2771" are both legal inputs, but "/usr/spool/news/config/2771" * is not. * If you have some other way of generating a list of news file * names, such as running a script over the history file, you can use that * instead. Inpaths handles crossposting regardless of which technique * you use. * * If you get an error message "no traffic found. Check $CWD", then the * problem is most likely that the path names you are giving it are not * relative to the spooling directory, e.g. you are feeding it lines like * "/usr/spool/news/news/config/2771" instead of "./news/config/2771" * * There are 3 options: -s, -m, and -l for short, medium, and long report. * The default is to produce a long report. If you are worried about mail * expenses you can send a shorter report. The long report is typically * about 50K bytes for a major site, and perhaps 25K bytes for a smaller * site. * * Brian Reid * V1 Sep 1986 * V2.4 May 1989 * * Special thanks to Mel Pleasant and Bob Thrush for significant help with * portability bugs. * */ /* inpaths.c V3.0 -- ported to VMS by Earle Ake 22 Nov 1989 * * EMail to: fac2@Dayton.SAIC.COM ..uunet!dayvb!fac2 * * This is the inpaths program by Brian Reid which I ported to VMS so that * the DECUS UUCP sites could also participate in the inpaths reports. I have * left the original code intact but have enclosed my VMS specific code * within #ifdef VMS statements. Compile and link this and put the executable * into the UUCP_BIN directory. Execute it a few days before the first of the * month and it will generate a file called inpaths.report and automatically * mail it to pathsurvey@decwrl.dec.com. * */ /* Below is a file I call inpaths.com which when run near the end of the month will generate an inpaths.report and send it to decwrl. I have put inpaths.exe into UUCP_BIN. Put it wherever you want and then edit the line below to reflect that change. $! Cut out this command procedure and call it inpaths.com $! $! INPATHS.COM - Generate and mail the inpaths report to decwrl. $! $ inpaths := $UUCP_BIN:inpaths $ site = f$trnlnm("UUCP_HOST_NAME") $ define/user sys$output inpaths.report $ inpaths 'site $ mail/subject="INPATHS report from site: ''site'" - inpaths.report uucp%"""pathsurvey@decwrl.dec.com""" $ delete inpaths.report;* $ exit */ /* if you are compiling on a USG machine (SysV, etc), please uncomment the following line: */ /* #define SYSV */ #define VERSION "3.0" #include <stdio.h> #ifndef VMS #include <fcntl.h> #endif #include <ctype.h> #ifdef VMS #include <types.h> #include <stat.h> #include <unixio.h> #include <file.h> #else #include <sys/types.h> #include <sys/stat.h> #endif #define HEADBYTES 1024 #ifdef SYSV long time(); #else SYSV time_t time(); #endif SYSV extern void exit(); extern char *malloc(); extern char *strcpy(); /* this is index() or strchr() included here for portability */ char *index(ptr,chr) char *ptr,chr; { do {if (*ptr==chr) return(ptr);} while (*ptr++); return ( (char *) NULL); } main (argc,argv) int argc; char **argv; { char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2; char rightdelim; char *pathfield, *groupsfield; int crossposted; char artbuf[HEADBYTES], ngfilename[256]; struct stat statbuf, *sbptr; char *scanlimit; #ifdef VMS char *bufstart; #endif char *hostname; char hostString[128]; int needHost; static int passChar[256]; int isopen,columns,verbose,totalTraffic; long nowtime,age,agesum; float avgAge; #ifdef VMS static char *nextname(); #endif /* definitions for getopt */ extern int optind; extern char *optarg; /* structure used to tally the traffic between two hosts */ typedef struct trec { struct trec *rlink; struct nrec *linkid; int tally; } ; /* structure to hold the information about a host */ typedef struct nrec { struct nrec *link; struct trec *rlink; char *id; long sentto; /* tally of articles sent to somebody from here */ } ; struct nrec *hosthash[128], *hnptr, *list, *relay; struct trec *rlist; int i, article, gotbytes, c; extern errno; hostname = "unknown"; verbose = 2; while (( c=getopt(argc, argv, "sml" )) != EOF) switch (c) { case 's': verbose=0; break; case 'm': verbose=1; break; case 'l': verbose=2; break; case '?': fprintf(stderr, "usage: %s [-s] [-m] [-l] hostname\n",argv[0]); exit(1); } if (optind < argc) { hostname = argv[optind]; } else { fprintf(stderr,"usage: %s [-s] [-m] [-l] `hostname`\n",argv[0]); exit(1); } fprintf(stderr,"computing %s inpaths for host %s\n", verbose==0 ? "short" : (verbose==1 ? "medium" : "long"),hostname); for (i = 0; i<128; i++) hosthash[i] = (struct nrec *) NULL; /* precompute character types to speed up scan */ for (i = 0; i<=255; i++) { passChar[i] = 0; if (isalpha(i) || isdigit(i)) passChar[i] = 1; if (i == '-' || i == '.' || i == '_') passChar[i] = 1; } totalTraffic = 0; nowtime = (long) time(0L); agesum = 0; #ifdef VMS while ((lptr = nextname("news_device:[*...]*.itm")) != 0) { isopen = 0; #else while (gets(linebuf) != (char *) NULL) { lptr = linebuf; isopen = 0; /* Skip blank lines */ if (linebuf[0] == '\0') goto bypass; /* Skip files that do not have pure numeric names */ i = strlen(lptr)-1; do { if (!isdigit(linebuf[i])) { if (linebuf[i]=='/') break; goto bypass; } i--; } while (i>=0); #endif /* Open the file for reading */ #ifdef VMS article = open(lptr, O_RDONLY, 0); #else article = open(lptr, O_RDONLY); #endif isopen = (article > 0); if (!isopen) goto bypass; sbptr = &statbuf; if (fstat(article, sbptr) == 0) { /* Record age of file in hours */ age = (nowtime - statbuf.st_mtime) / 3600; agesum += age; /* Reject names that are not ordinary files */ if ((statbuf.st_mode & S_IFREG) == 0) goto bypass; /* Pick the file name apart into an equivalent newsgroup name */ #ifdef VMS while (*lptr != '[') { lptr++; } lptr++; cp = ngfilename; while (*lptr != ']') { *cp++ = tolower(*lptr); lptr++; } *cp = NULL; #else if (*lptr == '.') { lptr++; if (*lptr == '/') lptr++; } cp = ngfilename; while (*lptr != 0) { if (*lptr == '/') *cp++ = '.'; else *cp++ = *lptr; lptr++; } cp--; while (isdigit(*cp)) *cp-- = NULL; if (*cp == '.') *cp = NULL; #endif } else goto bypass; /* Read in the first few bytes of the article; find the end of the header */ #ifdef VMS bufstart = artbuf; /* Find "Path:" header field */ pathfield = (char *) 0; groupsfield = (char *) 0; readrec: gotbytes = read(article, bufstart, HEADBYTES); #else gotbytes = read(article, artbuf, HEADBYTES); #endif if (gotbytes < 10) goto bypass; #ifndef VMS /* Find "Path:" header field */ pathfield = (char *) 0; groupsfield = (char *) 0; #endif #ifndef VMS scanlimit = &artbuf[gotbytes]; #endif #ifdef VMS if (strncmp(bufstart, "Path: ", 6) == 0) { pathfield = bufstart; bufstart = artbuf + gotbytes; } else if (strncmp(bufstart, "Newsgroups: ", 12) == 0) { groupsfield = bufstart; goto gotpath; } goto readrec; #else for (cp=artbuf; cp <= scanlimit; cp++) { if (*cp == '\n') break; if (pathfield && groupsfield) goto gotpath; if (strncmp(cp, "Path: ", 6) == 0) { pathfield = cp; goto nextgr; } if (strncmp(cp, "Newsgroups: ", 12) == 0) { groupsfield = cp; goto nextgr; } goto readrec; nextgr: while (*cp != '\n' && cp <= scanlimit) cp++; } if (groupsfield == (char *) 0 || (pathfield == (char *) 0)) goto bypass; #endif gotpath: ; /* Determine the name of the newsgroup to which this is charged. It is not necessarily the name of the file in which we found it; rather, use the "Newsgroups:" field. */ crossposted = 0; groupsfield += 12; /* skip 'Newsgroups: ' */ while (*groupsfield == ' ') groupsfield++; cp= (char *) index(groupsfield,'\n'); *cp = 0; cp=(char *) index(groupsfield,','); if (cp) { crossposted++; *cp = 0; } /* To avoid double-billing, only charge the newsgroup if the pathname matches the contents of the Newsgroups: field. This will also prevent picking up junk and control messages. */ if (strcmp(ngfilename,groupsfield)) goto bypass; /* Extract all of the host names from the "Path:" field and put them in our host table. */ cp = pathfield; while (*cp != NULL && *cp != '\n') cp++; if (cp == NULL) { fprintf(stderr,"%s: end of Path line not in buffer.\n",lptr); goto bypass; } totalTraffic++; *cp = 0; pathfield += 5; /* skip 'Path:' */ cp1 = pathfield; relay = (struct nrec *) NULL; rightdelim = '!'; while (cp1 < cp) { /* get next field */ while (*cp1=='!') cp1++; cp2 = ++cp1; while (passChar[(int) (*cp2)]) cp2++; rightdelim = *cp2; *cp2 = 0; if (rightdelim=='!' && *cp1 != (char) NULL) { /* see if already in the table */ list = hosthash[*cp1]; while (list != NULL) { /* * Attempt to speed things up here a bit. Since we hash * on the first char, we see if the second char is a match * before calling strcmp() */ if (list->id[1] == cp1[1] && !strcmp(list->id, cp1)) { hnptr = list; break; /* I hate unnecessary goto's */ } list = list->link; } if(list == NULL) { /* get storage and splice in a new one */ hnptr = (struct nrec *) malloc(sizeof (struct nrec)); hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1); hnptr->link = hosthash[*cp1]; hnptr->rlink = (struct trec *) NULL; hnptr->sentto = (long) 0; hosthash[*cp1] = hnptr; } } /* At this point "hnptr" points to the host record of the current host. If there was a relay host, then "relay" points to its host record (the relay host is just the previous host on the Path: line. Since this Path means that news has flowed from host "hnptr" to host "relay", we want to tally one message in a data structure corresponding to that link. We will increment the tally record that is attached to the source host "hnptr". */ if (relay != NULL && relay != hnptr) { rlist = relay->rlink; while (rlist != NULL) { if (rlist->linkid == hnptr) goto have2; rlist = rlist->rlink; } rlist = (struct trec *) malloc(sizeof (struct trec)); rlist->rlink = relay->rlink; relay->rlink = rlist; rlist->linkid = hnptr; rlist->tally = 0; have2: rlist->tally++; hnptr->sentto++; } cp1 = cp2; relay = hnptr; if (rightdelim == ' ' || rightdelim == '(') break; } bypass: if (isopen) close(article) ; } /* Now dump the host table */ if (!totalTraffic) { fprintf(stderr,"%s: error--no traffic found. Check $CWD.\n",argv[0]); exit(1); } avgAge = ((double) agesum) / (24.0*(double) totalTraffic); printf("ZCZC begin inhosts %s %s %d %d %3.1f\n", VERSION,hostname,verbose,totalTraffic,avgAge); for (jc=0; jc<127; jc++) { list = hosthash[jc]; while (list != NULL) { if (list->rlink != NULL) { if (verbose > 0 || (100*list->sentto > totalTraffic)) printf("%ld\t%s\n",list->sentto, list->id); } list = list->link; } } printf("ZCZC end inhosts %s\n",hostname); printf("ZCZC begin inpaths %s %s %d %d %3.1f\n", VERSION,hostname,verbose,totalTraffic,avgAge); for (jc=0; jc<127; jc++) { list = hosthash[jc]; while (list != NULL) { if (verbose > 1 || (100*list->sentto > totalTraffic)) { if (list->rlink != NULL) { columns = 3+strlen(list->id); sprintf(hostString,"%s H ",list->id); needHost = 1; rlist = list->rlink; while (rlist != NULL) { if ( (100*rlist->tally > totalTraffic) || ((verbose > 1)&&(5000*rlist->tally>totalTraffic)) ) { if (needHost) printf("%s",hostString); needHost = 0; relay = rlist->linkid; if (columns > 70) { printf("\n%s",hostString); columns = 3+strlen(list->id); } printf("%d Z %s U ", rlist->tally, relay->id); columns += 9+strlen(relay->id); } rlist = rlist->rlink; } if (!needHost) printf("\n"); } } list = list->link; } } printf("ZCZC end inpaths %s\n",hostname); fclose(stdout); exit(0); } #ifdef VMS #include <descrip.h> #include <errno.h> #include <nam.h> #include <rmsdef.h> static int find_file_context = 0; /* Class D tells the RTL to allocate space; Class S says that we provide. */ static struct dsc$descriptor_s fn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_D,0}; static struct dsc$descriptor_s wn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_S,0}; static $DESCRIPTOR(null_character,"\0"); extern int lib$find_file(); extern int lib$find_file_end(); static char* nextname(wildname) char *wildname; { char errbuf[100]; int status; if (find_file_context == 0) { /* New file name to process */ wn_desc.dsc$w_length = strlen(wildname); wn_desc.dsc$a_pointer = wildname; } if ((status = lib$find_file(&wn_desc,&fn_desc,&find_file_context,0,0,0,0)) & 1) { /* VMS RTL doesn't use '\0' terminated strings. */ str$append(&fn_desc,&null_character); return fn_desc.dsc$a_pointer; } else if (status = RMS$_NMF) { /* No more files */ lib$find_file_end(&find_file_context); return 0; } /* else { */ /* Help perror() */ /* errno = EVMSERR; vaxc$errno = status; sprintf(errbuf, "%s: %s", Argv[0], Cfile); perror(errbuf); return 0; } */ } /* got this off net.sources */ #include <stdio.h> #define index strchr /* * get option letter from argument vector */ int opterr = 1, /* useless, never set or used */ optind = 1, /* index into parent argv vector */ optopt; /* character checked for validity */ char *optarg; /* argument associated with option */ #define BADCH (int)'?' #define EMSG "" #define tell(s) fputs(*nargv,stderr);fputs(s,stderr); \ fputc(optopt,stderr);fputc('\n',stderr);return(BADCH); getopt(nargc,nargv,ostr) int nargc; char **nargv, *ostr; { static char *place = EMSG; /* option letter processing */ register char *oli; /* option letter list index */ char *index(); if(!*place) { /* update scanning pointer */ if(optind >= nargc || *(place = nargv[optind]) != '-' || !*++place) return(EOF); if (*place == '-') { /* found "--" */ ++optind; return(EOF); } } /* option letter okay? */ if ((optopt = (int)*place++) == (int)':' || !(oli = index(ostr,optopt))) { if(!*place) ++optind; tell(": illegal option -- "); } if (*++oli != ':') { /* don't need argument */ optarg = NULL; if (!*place) ++optind; } else { /* need an argument */ if (*place) optarg = place; /* no white space */ else if (nargc <= ++optind) { /* no arg */ place = EMSG; tell(": option requires an argument -- "); } else optarg = nargv[optind]; /* white space */ place = EMSG; ++optind; } return(optopt); /* dump back option letter */ } #endif
gih900@UUNET.UU.NET (Geoff Huston) (11/25/89)
> The following is the inpaths program that Brian Reid wrote to run >at UNIX sites to report on NEWS flow and other things. I have it working >at my site, although I have made no effort yet to optimize it. Try it out >and see what you think. There is a command procedure in comments to run >it. Let me know if the code has problems or if you can find a way to speed >it up. I have also sent a copy to Brian Reid so he can check to see if I >haven't messed it up for the UNIX folks. I too have put it up - thats a nice port as the compilation, link and execution were all painless on VMS! Geoff Huston