FAC2@dayton.saic.com (Earle Ake) (11/23/89)
The following is the inpaths program that Brian Reid wrote to run
at UNIX sites to report on NEWS flow and other things. I have it working
at my site, although I have made no effort yet to optimize it. Try it out
and see what you think. There is a command procedure in comments to run
it. Let me know if the code has problems or if you can find a way to speed
it up. I have also sent a copy to Brian Reid so he can check to see if I
haven't messed it up for the UNIX folks.
--
_____________________________________________________________________________
____ ____ ___
Earle Ake /___ /___/ / / Science Applications International Corporation
____// / / /__ Dayton, Ohio
-----------------------------------------------------------------------------
Internet: fac2%dayton.saic.com@uunet.uu.net uucp: uunet!dayvb!fac2
/* inpaths.c -- track the paths of incoming news articles and prepare
* in a format suitable for decwrl pathsurveys
*
*
* This program inputs a list of filenames of news articles, and outputs a
* data report which should be mailed to the decwrl Network Monitoring
* Project at address "pathsurvey@decwrl.dec.com". Please run it once a month
* if you can, in time so that the results arrive at decwrl by the 1st
* day of the month.
*
*
* Run it like this:
*
* cd /usr/spool/news
* find . -type f -print | inpaths "yourhost" | mail pathsurvey@decwrl.dec.com
*
* where "yourhost" is the host name of your computer, e.g. "decwrl".
*
* The input to "inpaths" must be a list of the file names of news articles,
* relative to the spooling directory. "./news/config/2771" and
* "news/config/2771" are both legal inputs, but "/usr/spool/news/config/2771"
* is not. * If you have some other way of generating a list of news file
* names, such as running a script over the history file, you can use that
* instead. Inpaths handles crossposting regardless of which technique
* you use.
*
* If you get an error message "no traffic found. Check $CWD", then the
* problem is most likely that the path names you are giving it are not
* relative to the spooling directory, e.g. you are feeding it lines like
* "/usr/spool/news/news/config/2771" instead of "./news/config/2771"
*
* There are 3 options: -s, -m, and -l for short, medium, and long report.
* The default is to produce a long report. If you are worried about mail
* expenses you can send a shorter report. The long report is typically
* about 50K bytes for a major site, and perhaps 25K bytes for a smaller
* site.
*
* Brian Reid
* V1 Sep 1986
* V2.4 May 1989
*
* Special thanks to Mel Pleasant and Bob Thrush for significant help with
* portability bugs.
*
*/
/* inpaths.c V3.0 -- ported to VMS by Earle Ake 22 Nov 1989
*
* EMail to: fac2@Dayton.SAIC.COM ..uunet!dayvb!fac2
*
* This is the inpaths program by Brian Reid which I ported to VMS so that
* the DECUS UUCP sites could also participate in the inpaths reports. I have
* left the original code intact but have enclosed my VMS specific code
* within #ifdef VMS statements. Compile and link this and put the executable
* into the UUCP_BIN directory. Execute it a few days before the first of the
* month and it will generate a file called inpaths.report and automatically
* mail it to pathsurvey@decwrl.dec.com.
*
*/
/*
Below is a file I call inpaths.com which when run near the end of the month
will generate an inpaths.report and send it to decwrl.
I have put inpaths.exe into UUCP_BIN. Put it wherever you want and then
edit the line below to reflect that change.
$! Cut out this command procedure and call it inpaths.com
$!
$! INPATHS.COM - Generate and mail the inpaths report to decwrl.
$!
$ inpaths := $UUCP_BIN:inpaths
$ site = f$trnlnm("UUCP_HOST_NAME")
$ define/user sys$output inpaths.report
$ inpaths 'site
$ mail/subject="INPATHS report from site: ''site'" -
inpaths.report uucp%"""pathsurvey@decwrl.dec.com"""
$ delete inpaths.report;*
$ exit
*/
/* if you are compiling on a USG machine (SysV, etc),
please uncomment the following line: */
/* #define SYSV */
#define VERSION "3.0"
#include <stdio.h>
#ifndef VMS
#include <fcntl.h>
#endif
#include <ctype.h>
#ifdef VMS
#include <types.h>
#include <stat.h>
#include <unixio.h>
#include <file.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#endif
#define HEADBYTES 1024
#ifdef SYSV
long time();
#else SYSV
time_t time();
#endif SYSV
extern void exit();
extern char *malloc();
extern char *strcpy();
/* this is index() or strchr() included here for portability */
char *index(ptr,chr)
char *ptr,chr;
{
do {if (*ptr==chr) return(ptr);} while (*ptr++);
return ( (char *) NULL);
}
main (argc,argv)
int argc;
char **argv;
{
char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2;
char rightdelim;
char *pathfield, *groupsfield;
int crossposted;
char artbuf[HEADBYTES], ngfilename[256];
struct stat statbuf, *sbptr;
char *scanlimit;
#ifdef VMS
char *bufstart;
#endif
char *hostname;
char hostString[128];
int needHost;
static int passChar[256];
int isopen,columns,verbose,totalTraffic;
long nowtime,age,agesum;
float avgAge;
#ifdef VMS
static char *nextname();
#endif
/* definitions for getopt */
extern int optind;
extern char *optarg;
/* structure used to tally the traffic between two hosts */
typedef struct trec {
struct trec *rlink;
struct nrec *linkid;
int tally;
} ;
/* structure to hold the information about a host */
typedef struct nrec {
struct nrec *link;
struct trec *rlink;
char *id;
long sentto; /* tally of articles sent to somebody from here */
} ;
struct nrec *hosthash[128], *hnptr, *list, *relay;
struct trec *rlist;
int i, article, gotbytes, c;
extern errno;
hostname = "unknown";
verbose = 2;
while (( c=getopt(argc, argv, "sml" )) != EOF)
switch (c) {
case 's': verbose=0; break;
case 'm': verbose=1; break;
case 'l': verbose=2; break;
case '?': fprintf(stderr,
"usage: %s [-s] [-m] [-l] hostname\n",argv[0]);
exit(1);
}
if (optind < argc) {
hostname = argv[optind];
} else {
fprintf(stderr,"usage: %s [-s] [-m] [-l] `hostname`\n",argv[0]);
exit(1);
}
fprintf(stderr,"computing %s inpaths for host %s\n",
verbose==0 ? "short" : (verbose==1 ? "medium" : "long"),hostname);
for (i = 0; i<128; i++) hosthash[i] = (struct nrec *) NULL;
/* precompute character types to speed up scan */
for (i = 0; i<=255; i++) {
passChar[i] = 0;
if (isalpha(i) || isdigit(i)) passChar[i] = 1;
if (i == '-' || i == '.' || i == '_') passChar[i] = 1;
}
totalTraffic = 0;
nowtime = (long) time(0L);
agesum = 0;
#ifdef VMS
while ((lptr = nextname("news_device:[*...]*.itm")) != 0) {
isopen = 0;
#else
while (gets(linebuf) != (char *) NULL) {
lptr = linebuf;
isopen = 0;
/* Skip blank lines */
if (linebuf[0] == '\0') goto bypass;
/* Skip files that do not have pure numeric names */
i = strlen(lptr)-1;
do {
if (!isdigit(linebuf[i])) {
if (linebuf[i]=='/') break;
goto bypass;
}
i--;
} while (i>=0);
#endif
/* Open the file for reading */
#ifdef VMS
article = open(lptr, O_RDONLY, 0);
#else
article = open(lptr, O_RDONLY);
#endif
isopen = (article > 0);
if (!isopen) goto bypass;
sbptr = &statbuf;
if (fstat(article, sbptr) == 0) {
/* Record age of file in hours */
age = (nowtime - statbuf.st_mtime) / 3600;
agesum += age;
/* Reject names that are not ordinary files */
if ((statbuf.st_mode & S_IFREG) == 0) goto bypass;
/* Pick the file name apart into an equivalent newsgroup name */
#ifdef VMS
while (*lptr != '[') {
lptr++;
}
lptr++;
cp = ngfilename;
while (*lptr != ']') {
*cp++ = tolower(*lptr);
lptr++;
}
*cp = NULL;
#else
if (*lptr == '.') {
lptr++;
if (*lptr == '/') lptr++;
}
cp = ngfilename;
while (*lptr != 0) {
if (*lptr == '/') *cp++ = '.';
else *cp++ = *lptr;
lptr++;
}
cp--; while (isdigit(*cp)) *cp-- = NULL;
if (*cp == '.') *cp = NULL;
#endif
} else goto bypass;
/* Read in the first few bytes of the article; find the end of the header */
#ifdef VMS
bufstart = artbuf;
/* Find "Path:" header field */
pathfield = (char *) 0;
groupsfield = (char *) 0;
readrec: gotbytes = read(article, bufstart, HEADBYTES);
#else
gotbytes = read(article, artbuf, HEADBYTES);
#endif
if (gotbytes < 10) goto bypass;
#ifndef VMS
/* Find "Path:" header field */
pathfield = (char *) 0;
groupsfield = (char *) 0;
#endif
#ifndef VMS
scanlimit = &artbuf[gotbytes];
#endif
#ifdef VMS
if (strncmp(bufstart, "Path: ", 6) == 0) {
pathfield = bufstart;
bufstart = artbuf + gotbytes;
}
else if (strncmp(bufstart, "Newsgroups: ", 12) == 0) {
groupsfield = bufstart; goto gotpath;
}
goto readrec;
#else
for (cp=artbuf; cp <= scanlimit; cp++) {
if (*cp == '\n') break;
if (pathfield && groupsfield) goto gotpath;
if (strncmp(cp, "Path: ", 6) == 0) {
pathfield = cp; goto nextgr;
}
if (strncmp(cp, "Newsgroups: ", 12) == 0) {
groupsfield = cp; goto nextgr;
}
goto readrec;
nextgr:
while (*cp != '\n' && cp <= scanlimit) cp++;
}
if (groupsfield == (char *) 0 || (pathfield == (char *) 0))
goto bypass;
#endif
gotpath: ;
/* Determine the name of the newsgroup to which this is charged. It is not
necessarily the name of the file in which we found it; rather, use the
"Newsgroups:" field. */
crossposted = 0;
groupsfield += 12; /* skip 'Newsgroups: ' */
while (*groupsfield == ' ') groupsfield++;
cp= (char *) index(groupsfield,'\n'); *cp = 0;
cp=(char *) index(groupsfield,',');
if (cp) {
crossposted++;
*cp = 0;
}
/* To avoid double-billing, only charge the newsgroup if the pathname matches
the contents of the Newsgroups: field. This will also prevent picking up
junk and control messages.
*/
if (strcmp(ngfilename,groupsfield)) goto bypass;
/* Extract all of the host names from the "Path:" field and put them in our
host table. */
cp = pathfield;
while (*cp != NULL && *cp != '\n') cp++;
if (cp == NULL) {
fprintf(stderr,"%s: end of Path line not in buffer.\n",lptr);
goto bypass;
}
totalTraffic++;
*cp = 0;
pathfield += 5; /* skip 'Path:' */
cp1 = pathfield;
relay = (struct nrec *) NULL;
rightdelim = '!';
while (cp1 < cp) {
/* get next field */
while (*cp1=='!') cp1++;
cp2 = ++cp1;
while (passChar[(int) (*cp2)]) cp2++;
rightdelim = *cp2; *cp2 = 0;
if (rightdelim=='!' && *cp1 != (char) NULL) {
/* see if already in the table */
list = hosthash[*cp1];
while (list != NULL) {
/*
* Attempt to speed things up here a bit. Since we hash
* on the first char, we see if the second char is a match
* before calling strcmp()
*/
if (list->id[1] == cp1[1] && !strcmp(list->id, cp1)) {
hnptr = list;
break; /* I hate unnecessary goto's */
}
list = list->link;
}
if(list == NULL) {
/* get storage and splice in a new one */
hnptr = (struct nrec *) malloc(sizeof (struct nrec));
hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1);
hnptr->link = hosthash[*cp1];
hnptr->rlink = (struct trec *) NULL;
hnptr->sentto = (long) 0;
hosthash[*cp1] = hnptr;
}
}
/*
At this point "hnptr" points to the host record of the current host. If
there was a relay host, then "relay" points to its host record (the relay
host is just the previous host on the Path: line. Since this Path means
that news has flowed from host "hnptr" to host "relay", we want to tally
one message in a data structure corresponding to that link. We will
increment the tally record that is attached to the source host "hnptr".
*/
if (relay != NULL && relay != hnptr) {
rlist = relay->rlink;
while (rlist != NULL) {
if (rlist->linkid == hnptr) goto have2;
rlist = rlist->rlink;
}
rlist = (struct trec *) malloc(sizeof (struct trec));
rlist->rlink = relay->rlink;
relay->rlink = rlist;
rlist->linkid = hnptr;
rlist->tally = 0;
have2: rlist->tally++;
hnptr->sentto++;
}
cp1 = cp2;
relay = hnptr;
if (rightdelim == ' ' || rightdelim == '(') break;
}
bypass: if (isopen) close(article) ;
}
/* Now dump the host table */
if (!totalTraffic) {
fprintf(stderr,"%s: error--no traffic found. Check $CWD.\n",argv[0]);
exit(1);
}
avgAge = ((double) agesum) / (24.0*(double) totalTraffic);
printf("ZCZC begin inhosts %s %s %d %d %3.1f\n",
VERSION,hostname,verbose,totalTraffic,avgAge);
for (jc=0; jc<127; jc++) {
list = hosthash[jc];
while (list != NULL) {
if (list->rlink != NULL) {
if (verbose > 0 || (100*list->sentto > totalTraffic))
printf("%ld\t%s\n",list->sentto, list->id);
}
list = list->link;
}
}
printf("ZCZC end inhosts %s\n",hostname);
printf("ZCZC begin inpaths %s %s %d %d %3.1f\n",
VERSION,hostname,verbose,totalTraffic,avgAge);
for (jc=0; jc<127; jc++) {
list = hosthash[jc];
while (list != NULL) {
if (verbose > 1 || (100*list->sentto > totalTraffic)) {
if (list->rlink != NULL) {
columns = 3+strlen(list->id);
sprintf(hostString,"%s H ",list->id);
needHost = 1;
rlist = list->rlink;
while (rlist != NULL) {
if (
(100*rlist->tally > totalTraffic)
|| ((verbose > 1)&&(5000*rlist->tally>totalTraffic))
) {
if (needHost) printf("%s",hostString);
needHost = 0;
relay = rlist->linkid;
if (columns > 70) {
printf("\n%s",hostString);
columns = 3+strlen(list->id);
}
printf("%d Z %s U ", rlist->tally, relay->id);
columns += 9+strlen(relay->id);
}
rlist = rlist->rlink;
}
if (!needHost) printf("\n");
}
}
list = list->link;
}
}
printf("ZCZC end inpaths %s\n",hostname);
fclose(stdout);
exit(0);
}
#ifdef VMS
#include <descrip.h>
#include <errno.h>
#include <nam.h>
#include <rmsdef.h>
static int find_file_context = 0;
/* Class D tells the RTL to allocate space; Class S says that we provide. */
static struct dsc$descriptor_s fn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_D,0};
static struct dsc$descriptor_s wn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_S,0};
static $DESCRIPTOR(null_character,"\0");
extern int lib$find_file();
extern int lib$find_file_end();
static char* nextname(wildname)
char *wildname;
{
char errbuf[100];
int status;
if (find_file_context == 0)
{
/* New file name to process */
wn_desc.dsc$w_length = strlen(wildname);
wn_desc.dsc$a_pointer = wildname;
}
if ((status = lib$find_file(&wn_desc,&fn_desc,&find_file_context,0,0,0,0)) & 1)
{
/* VMS RTL doesn't use '\0' terminated strings. */
str$append(&fn_desc,&null_character);
return fn_desc.dsc$a_pointer;
}
else if (status = RMS$_NMF)
{
/* No more files */
lib$find_file_end(&find_file_context);
return 0;
}
/*
else {
*/
/* Help perror() */
/*
errno = EVMSERR;
vaxc$errno = status;
sprintf(errbuf, "%s: %s", Argv[0], Cfile);
perror(errbuf);
return 0;
}
*/
}
/* got this off net.sources */
#include <stdio.h>
#define index strchr
/*
* get option letter from argument vector
*/
int opterr = 1, /* useless, never set or used */
optind = 1, /* index into parent argv vector */
optopt; /* character checked for validity */
char *optarg; /* argument associated with option */
#define BADCH (int)'?'
#define EMSG ""
#define tell(s) fputs(*nargv,stderr);fputs(s,stderr); \
fputc(optopt,stderr);fputc('\n',stderr);return(BADCH);
getopt(nargc,nargv,ostr)
int nargc;
char **nargv,
*ostr;
{
static char *place = EMSG; /* option letter processing */
register char *oli; /* option letter list index */
char *index();
if(!*place) { /* update scanning pointer */
if(optind >= nargc || *(place = nargv[optind]) != '-' || !*++place) return(EOF);
if (*place == '-') { /* found "--" */
++optind;
return(EOF);
}
} /* option letter okay? */
if ((optopt = (int)*place++) == (int)':' || !(oli = index(ostr,optopt))) {
if(!*place) ++optind;
tell(": illegal option -- ");
}
if (*++oli != ':') { /* don't need argument */
optarg = NULL;
if (!*place) ++optind;
}
else { /* need an argument */
if (*place) optarg = place; /* no white space */
else if (nargc <= ++optind) { /* no arg */
place = EMSG;
tell(": option requires an argument -- ");
}
else optarg = nargv[optind]; /* white space */
place = EMSG;
++optind;
}
return(optopt); /* dump back option letter */
}
#endif