[news.software.anu-news] 'inpaths' program for VMS sites

FAC2@dayton.saic.com (Earle Ake) (11/23/89)

	The following is the inpaths program that Brian Reid wrote to run
at UNIX sites to report on NEWS flow and other things.  I have it working
at my site, although I have made no effort yet to optimize it.  Try it out
and see what you think.  There is a command procedure in comments to run
it.  Let me know if the code has problems or if you can find a way to speed
it up.  I have also sent a copy to Brian Reid so he can check to see if I
haven't messed it up for the UNIX folks.

-- 
_____________________________________________________________________________
             ____ ____    ___
Earle Ake   /___ /___/ / /     Science Applications International Corporation
           ____//   / / /__                 Dayton, Ohio
-----------------------------------------------------------------------------
Internet: fac2%dayton.saic.com@uunet.uu.net    uucp: uunet!dayvb!fac2



/* inpaths.c -- track the paths of incoming news articles and prepare
 *	      in a format suitable for decwrl pathsurveys
 *
 *
 * This program inputs a list of filenames of news articles, and outputs a
 * data report which should be mailed to the decwrl Network Monitoring
 * Project at address "pathsurvey@decwrl.dec.com". Please run it once a month
 * if you can, in time so that the results arrive at decwrl by the 1st
 * day of the month.
 *
 *
 * Run it like this:
 *
 *  cd /usr/spool/news
 *  find . -type f -print | inpaths "yourhost" | mail pathsurvey@decwrl.dec.com
 *
 *  where "yourhost" is the host name of your computer, e.g. "decwrl".
 *
 * The input to "inpaths" must be a list of the file names of news articles,
 * relative to the spooling directory. "./news/config/2771" and
 * "news/config/2771" are both legal inputs, but "/usr/spool/news/config/2771"
 * is not.  * If you have some other way of generating a list of news file
 * names, such as running a script over the history file, you can use that
 * instead. Inpaths handles crossposting regardless of which technique
 * you use.
 *
 * If you get an error message "no traffic found. Check $CWD", then the
 * problem is most likely that the path names you are giving it are not
 * relative to the spooling directory, e.g. you are feeding it lines like
 * "/usr/spool/news/news/config/2771" instead of "./news/config/2771"
 * 
 * There are 3 options: -s, -m, and -l for short, medium, and long report.
 * The default is to produce a long report. If you are worried about mail
 * expenses you can send a shorter report. The long report is typically
 * about 50K bytes for a major site, and perhaps 25K bytes for a smaller
 * site. 
 *
 * Brian Reid
 *	V1	 Sep 1986
 *	V2.4	 May 1989
 *
 * Special thanks to Mel Pleasant and Bob Thrush for significant help with
 * portability bugs.
 *     
 */

/* inpaths.c V3.0 -- ported to VMS by Earle Ake 22 Nov 1989
 *
 * EMail to: fac2@Dayton.SAIC.COM    ..uunet!dayvb!fac2
 *
 * This is the inpaths program by Brian Reid which I ported to VMS so that
 * the DECUS UUCP sites could also participate in the inpaths reports.  I have
 * left the original code intact but have enclosed my VMS specific code
 * within #ifdef VMS statements.  Compile and link this and put the executable
 * into the UUCP_BIN directory.  Execute it a few days before the first of the
 * month and it will generate a file called inpaths.report and automatically
 * mail it to pathsurvey@decwrl.dec.com.
 *
 */

/*

Below is a file I call inpaths.com which when run near the end of the month
will generate an inpaths.report and send it to decwrl.

I have put inpaths.exe into UUCP_BIN.  Put it wherever you want and then
edit the line below to reflect that change.

$! Cut out this command procedure and call it inpaths.com
$!
$! INPATHS.COM - Generate and mail the inpaths report to decwrl.
$!
$ inpaths := $UUCP_BIN:inpaths
$ site = f$trnlnm("UUCP_HOST_NAME")
$ define/user sys$output inpaths.report
$ inpaths 'site
$ mail/subject="INPATHS report from site: ''site'" -
	inpaths.report uucp%"""pathsurvey@decwrl.dec.com"""
$ delete inpaths.report;*
$ exit

 */


/* if you are compiling on a USG machine (SysV, etc),
   please uncomment the following line: */

/* #define SYSV		*/



#define VERSION "3.0"
#include <stdio.h>

#ifndef VMS
#include <fcntl.h>
#endif

#include <ctype.h>

#ifdef VMS
#include <types.h>
#include <stat.h>
#include <unixio.h>
#include <file.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#endif

#define HEADBYTES 1024

#ifdef SYSV
    long time();
#else SYSV
    time_t time();
#endif SYSV

extern void exit();
extern char *malloc();
extern char *strcpy();

/* this is index() or strchr() included here for portability */

char *index(ptr,chr)
char *ptr,chr;
 {
    do {if (*ptr==chr) return(ptr);} while (*ptr++);
    return ( (char *) NULL);
 }

main (argc,argv)
  int argc;
  char **argv;
 {
    char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2;
    char rightdelim;
    char *pathfield, *groupsfield;
    int crossposted;
    char artbuf[HEADBYTES], ngfilename[256];
    struct stat statbuf, *sbptr;
    char *scanlimit;
#ifdef VMS
    char *bufstart;
#endif
    char *hostname;
    char hostString[128];
    int needHost;
    static int passChar[256];
    int isopen,columns,verbose,totalTraffic;
    long nowtime,age,agesum;
    float avgAge;

#ifdef	VMS
static	char	*nextname();
#endif

	/* definitions for getopt */
    extern int optind;
    extern char *optarg;

 /* structure used to tally the traffic between two hosts */
    typedef struct trec {
	struct trec *rlink;
	struct nrec *linkid;
	int tally;
    } ;

 /* structure to hold the information about a host */
    typedef struct nrec {
	struct nrec *link;
	struct trec *rlink;
	char *id;
	long sentto; /* tally of articles sent to somebody from here */
    } ;
    struct nrec *hosthash[128], *hnptr, *list, *relay;
    struct trec *rlist;
    int i, article, gotbytes, c;
    extern errno;

    hostname = "unknown";
    verbose = 2;
    while (( c=getopt(argc, argv, "sml" )) != EOF)
    switch (c) {
	case 's': verbose=0; break;
	case 'm': verbose=1; break;
	case 'l': verbose=2; break;
	case '?': fprintf(stderr,
	"usage: %s [-s] [-m] [-l] hostname\n",argv[0]);
	exit(1);
    }
    if (optind < argc) {
        hostname = argv[optind];
    } else {
	fprintf(stderr,"usage: %s [-s] [-m] [-l] `hostname`\n",argv[0]);
	exit(1);
    }

    fprintf(stderr,"computing %s inpaths for host %s\n",
	verbose==0 ? "short" : (verbose==1 ? "medium" : "long"),hostname);
    for (i = 0; i<128; i++) hosthash[i] = (struct nrec *) NULL;

/* precompute character types to speed up scan */
    for (i = 0; i<=255; i++) {
    	passChar[i] = 0;
	if (isalpha(i) || isdigit(i)) passChar[i] = 1;
	if (i == '-' || i == '.' || i == '_') passChar[i] = 1;
    }
    totalTraffic = 0;
    nowtime = (long) time(0L);
    agesum = 0;

#ifdef VMS
    while ((lptr = nextname("news_device:[*...]*.itm")) != 0) {
	isopen = 0;
#else
    while (gets(linebuf) != (char *) NULL) {
        lptr = linebuf;
	isopen = 0;

/* Skip blank lines */
	if (linebuf[0] == '\0') goto bypass;

/* Skip files that do not have pure numeric names */
	i = strlen(lptr)-1;
	do {
	    if (!isdigit(linebuf[i])) {
	        if (linebuf[i]=='/') break;
		goto bypass;
	    }
	    i--;
	} while (i>=0);
#endif

/* Open the file for reading */
#ifdef VMS
	article = open(lptr, O_RDONLY, 0);
#else
	article = open(lptr, O_RDONLY);
#endif
	isopen = (article > 0);
	if (!isopen) goto bypass;
	sbptr = &statbuf;
	if (fstat(article, sbptr) == 0) {

/* Record age of file in hours */
	    age = (nowtime - statbuf.st_mtime) / 3600;
	    agesum += age;
/* Reject names that are not ordinary files 		*/
	    if ((statbuf.st_mode & S_IFREG) == 0) goto bypass;
/* Pick the file name apart into an equivalent newsgroup name */
#ifdef VMS
	    while (*lptr != '[') {
		lptr++;
	    }
	    lptr++;
	    cp = ngfilename;
	    while (*lptr != ']') {
		*cp++ = tolower(*lptr);
		lptr++;
	    }
	    *cp = NULL;
#else
	    if (*lptr == '.') {
	        lptr++;
		if (*lptr == '/') lptr++;
	    }
	    cp = ngfilename;
	    while (*lptr != 0) {
	        if (*lptr == '/') *cp++ = '.';
		else *cp++ = *lptr;
		lptr++;
	    }
	    cp--; while (isdigit(*cp)) *cp-- = NULL;
	    if (*cp == '.') *cp = NULL;
#endif
        } else goto bypass;

/* Read in the first few bytes of the article; find the end of the header */
#ifdef VMS
	bufstart = artbuf;
/* Find "Path:" header field */
	pathfield = (char *) 0;
	    groupsfield = (char *) 0;

readrec: gotbytes = read(article, bufstart, HEADBYTES);
#else
	gotbytes = read(article, artbuf, HEADBYTES);
#endif
	if (gotbytes < 10) goto bypass;

#ifndef VMS
/* Find "Path:" header field */
	pathfield = (char *) 0;
	    groupsfield = (char *) 0;
#endif
#ifndef VMS
	scanlimit = &artbuf[gotbytes];
#endif
#ifdef VMS
	if (strncmp(bufstart, "Path: ", 6) == 0) {
	    pathfield = bufstart;
	    bufstart = artbuf + gotbytes;
	}
	else if (strncmp(bufstart, "Newsgroups: ", 12) == 0) {
	    groupsfield = bufstart; goto gotpath;
	}
	goto readrec;
#else
	for (cp=artbuf; cp <= scanlimit; cp++) {
	    if (*cp == '\n') break;
	    if (pathfield && groupsfield) goto gotpath;
	    if (strncmp(cp, "Path: ", 6) == 0) {
		pathfield = cp; goto nextgr;
	    }
	    if (strncmp(cp, "Newsgroups: ", 12) == 0) {
		groupsfield = cp; goto nextgr;
	    }
	    goto readrec;
   nextgr:
	    while (*cp != '\n' && cp <= scanlimit) cp++;
	}
	if (groupsfield == (char *) 0 || (pathfield == (char *) 0)) 
	    goto bypass; 
#endif
gotpath: ;

/* Determine the name of the newsgroup to which this is charged. It is not
   necessarily the name of the file in which we found it; rather, use the
   "Newsgroups:" field.							 */

	crossposted = 0;
	groupsfield += 12;	/* skip 'Newsgroups: ' */
	while (*groupsfield == ' ') groupsfield++;
	cp= (char *) index(groupsfield,'\n'); *cp = 0;
	cp=(char *) index(groupsfield,',');
	if (cp) {
	    crossposted++;
	    *cp = 0;
	}

/* To avoid double-billing, only charge the newsgroup if the pathname matches
   the contents of the Newsgroups: field. This will also prevent picking up
   junk and control messages.
 */
	if (strcmp(ngfilename,groupsfield)) goto bypass;

/* Extract all of the host names from the "Path:" field and put them in our
host table.								 */
	cp = pathfield;
	while (*cp != NULL && *cp != '\n') cp++;
	if (cp == NULL) {
	    fprintf(stderr,"%s: end of Path line not in buffer.\n",lptr);
	    goto bypass;
	}

	totalTraffic++;
	*cp = 0;
	pathfield += 5;	/* skip 'Path:' */
	cp1 = pathfield;
	relay = (struct nrec *) NULL;
	rightdelim = '!';
	while (cp1 < cp) {
	    /* get next field */
	    while (*cp1=='!') cp1++;
	    cp2 = ++cp1;
	    while (passChar[(int) (*cp2)]) cp2++;

	    rightdelim = *cp2; *cp2 = 0;
	    if (rightdelim=='!' && *cp1 != (char) NULL) {
	    /* see if already in the table */
		list = hosthash[*cp1];
		while (list != NULL) {
		    /*
		     * Attempt to speed things up here a bit.  Since we hash
		     * on the first char, we see if the second char is a match
		     * before calling strcmp()
		     */
		    if (list->id[1] == cp1[1] && !strcmp(list->id, cp1)) {
			hnptr = list;
			break;		/* I hate unnecessary goto's */
		    }
		    list = list->link;
		}
		if(list == NULL) {
			/* get storage and splice in a new one */
			hnptr = (struct nrec *) malloc(sizeof (struct nrec));
			hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1);
			hnptr->link = hosthash[*cp1];
			hnptr->rlink = (struct trec *) NULL;
			hnptr->sentto = (long) 0;
			hosthash[*cp1] = hnptr;
		}
	    }
/* 
At this point "hnptr" points to the host record of the current host. If
there was a relay host, then "relay" points to its host record (the relay
host is just the previous host on the Path: line. Since this Path means
that news has flowed from host "hnptr" to host "relay", we want to tally
one message in a data structure corresponding to that link. We will
increment the tally record that is attached to the source host "hnptr".
*/

	    if (relay != NULL && relay != hnptr) {
		rlist = relay->rlink;
		while (rlist != NULL) {
		    if (rlist->linkid == hnptr) goto have2;
		    rlist = rlist->rlink;
		}
		rlist = (struct trec *) malloc(sizeof (struct trec));
		rlist->rlink = relay->rlink;
		relay->rlink = rlist;
		rlist->linkid = hnptr;
		rlist->tally = 0;

    have2:      rlist->tally++;
		hnptr->sentto++;
	    }

	    cp1 = cp2;
	    relay = hnptr;
	    if (rightdelim == ' ' || rightdelim == '(') break;
	}
bypass: if (isopen) close(article) ;
    }
/* Now dump the host table */
    if (!totalTraffic) {
	fprintf(stderr,"%s: error--no traffic found. Check $CWD.\n",argv[0]);
	exit(1);
    }

    avgAge = ((double) agesum) / (24.0*(double) totalTraffic);
    printf("ZCZC begin inhosts %s %s %d %d %3.1f\n",
    	VERSION,hostname,verbose,totalTraffic,avgAge);
    for (jc=0; jc<127; jc++) {
	list = hosthash[jc];
	while (list != NULL) {
	    if (list->rlink != NULL) {
		if (verbose > 0 || (100*list->sentto > totalTraffic))
		    printf("%ld\t%s\n",list->sentto, list->id);
	    }
	    list = list->link;
	}
    }
    printf("ZCZC end inhosts %s\n",hostname);

    printf("ZCZC begin inpaths %s %s %d %d %3.1f\n",
        VERSION,hostname,verbose,totalTraffic,avgAge);
    for (jc=0; jc<127; jc++) {
	list = hosthash[jc];
	while (list != NULL) {
	    if (verbose > 1 || (100*list->sentto > totalTraffic)) {
		if (list->rlink != NULL) {
		    columns = 3+strlen(list->id);
		    sprintf(hostString,"%s H ",list->id);
		    needHost = 1;
		    rlist = list->rlink;
		    while (rlist != NULL) {
		        if (
			     (100*rlist->tally > totalTraffic)
			  || ((verbose > 1)&&(5000*rlist->tally>totalTraffic))
			   ) {
			    if (needHost) printf("%s",hostString);
			    needHost = 0;
			    relay = rlist->linkid;
			    if (columns > 70) {
				printf("\n%s",hostString);
				columns = 3+strlen(list->id);
			    }
			    printf("%d Z %s U ", rlist->tally, relay->id);
			    columns += 9+strlen(relay->id);
			}
			rlist = rlist->rlink;
		    }
		    if (!needHost) printf("\n");
		}
	    }
	    list = list->link;
	}
    }
    printf("ZCZC end inpaths %s\n",hostname);
    fclose(stdout);
    exit(0);
}
#ifdef	VMS
#include	<descrip.h>
#include	<errno.h>
#include	<nam.h>
#include	<rmsdef.h>

static	int	find_file_context = 0;

/* Class D tells the RTL to allocate space; Class S says that we provide. */
static	struct dsc$descriptor_s	fn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_D,0};
static	struct dsc$descriptor_s	wn_desc = {0,DSC$K_DTYPE_T,DSC$K_CLASS_S,0};

static	$DESCRIPTOR(null_character,"\0");

extern	int	lib$find_file();
extern	int	lib$find_file_end();

static char* nextname(wildname)
	char	*wildname;
{
	char	errbuf[100];
	int	status;

if (find_file_context == 0)
	{
	/* New file name to process */
	wn_desc.dsc$w_length = strlen(wildname);
	wn_desc.dsc$a_pointer = wildname;
	}

if ((status = lib$find_file(&wn_desc,&fn_desc,&find_file_context,0,0,0,0)) & 1)
	{
	/* VMS RTL doesn't use '\0' terminated strings. */
	str$append(&fn_desc,&null_character);
	return fn_desc.dsc$a_pointer;
	}
 else if (status = RMS$_NMF)
	{
	/* No more files */
	lib$find_file_end(&find_file_context);
	return 0;
	}
/*
 else	{
*/
	/* Help perror() */
/*
	errno = EVMSERR;
	vaxc$errno = status;

	sprintf(errbuf, "%s: %s", Argv[0], Cfile);
	perror(errbuf);
	return 0;
	}
*/
}
/* got this off net.sources */
#include <stdio.h>

#define	index	strchr

/*
 * get option letter from argument vector
 */
int	opterr = 1,		/* useless, never set or used */
	optind = 1,		/* index into parent argv vector */
	optopt;			/* character checked for validity */
char	*optarg;		/* argument associated with option */

#define BADCH	(int)'?'
#define EMSG	""
#define tell(s)	fputs(*nargv,stderr);fputs(s,stderr); \
		fputc(optopt,stderr);fputc('\n',stderr);return(BADCH);

getopt(nargc,nargv,ostr)
int	nargc;
char	**nargv,
	*ostr;
{
	static char	*place = EMSG;	/* option letter processing */
	register char	*oli;		/* option letter list index */
	char	*index();

	if(!*place) {			/* update scanning pointer */
		if(optind >= nargc || *(place = nargv[optind]) != '-' || !*++place) return(EOF);
		if (*place == '-') {	/* found "--" */
			++optind;
			return(EOF);
		}
	}				/* option letter okay? */
	if ((optopt = (int)*place++) == (int)':' || !(oli = index(ostr,optopt))) {
		if(!*place) ++optind;
		tell(": illegal option -- ");
	}
	if (*++oli != ':') {		/* don't need argument */
		optarg = NULL;
		if (!*place) ++optind;
	}
	else {				/* need an argument */
		if (*place) optarg = place;	/* no white space */
		else if (nargc <= ++optind) {	/* no arg */
			place = EMSG;
			tell(": option requires an argument -- ");
		}
	 	else optarg = nargv[optind];	/* white space */
		place = EMSG;
		++optind;
	}
	return(optopt);			/* dump back option letter */
}
#endif

gih900@UUNET.UU.NET (Geoff Huston) (11/25/89)

>        The following is the inpaths program that Brian Reid wrote to run
>at UNIX sites to report on NEWS flow and other things.  I have it working
>at my site, although I have made no effort yet to optimize it.  Try it out
>and see what you think.  There is a command procedure in comments to run
>it.  Let me know if the code has problems or if you can find a way to speed
>it up.  I have also sent a copy to Brian Reid so he can check to see if I
>haven't messed it up for the UNIX folks.
     
I too have put it up - thats a nice port as the compilation, link and execution
were all painless on VMS!
     
Geoff Huston