[net.sources] arbitron program for notes readership survey

reid@decwrl.UUCP (Brian Reid) (11/01/86)

This notesfile version of the arbitron program was written by Rich Salz.
It's actually faster and better than the original news arbitron.
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by mirror!rs on Fri Mar 28 16:08:02 EST 1986
# Contents:  Makefile arbitron.1 arbitron.sh arbprog.c Shortnames
 
echo x - Makefile
sed 's/^XX//' > "Makefile" <<'@//E*O*F Makefile//'
XX##
XX##  MAKEFILE FOR NOTESFILE ARBITRON
XX##

XX# If you don't have getopt, enable the next line.
XXGETOPT	= getopt.o

XX# If you have the portable directory routines and the ndir.h header
XX# file, leave the next two lines commented out.  Note:  only part
XX# of the library is used.
XX#DIRS	= closedir.o opendir.o readdir.o
XX#DIRFLG	= -DNDIR

XX# Arbprog needs some notes header files.  Put the path to the
XX# notes $HOME here.
XXHDR	= -I/src/local/notes/src
XXCFLAGS	= $(DIRFLAG) $(HDR) -O

XXSTUFF	= arbprog.o $(GETOPT) $(DIRS)

XX# Where you want the programs to end up -- don't forget the trailing slash!
XX#DEST	= /usr/spool/notes/.utilities/
XXDEST	= ./

XX# Where you want the manpage to end up.
XXMANPAGE	= /usr/man/man1/arbitron.1

XX# How you want them to get there
XXCP	= -cp

XXALL:		$(DEST)arbitron $(DEST)arbprog $(MANPAGE)

XX$(DEST)arbitron:	arbitron.sh
XX	$(CP) arbitron.sh $(DEST)arbitron
XX	chmod 755 $(DEST)arbitron

XX$(DEST)arbprog:		arbprog
XX	$(CP) arbprog $(DEST)arbprog
XX	strip $(DEST)arbprog

XX$(MANPAGE):		arbitron.1
XX	$(CP) arbitron.1 $(MANPAGE)

XXarbprog:		$(STUFF)
XX	cc -o arbprog $(STUFF)

XX# Shar files
XXSHAR1	= Makefile arbitron.1 arbitron.sh arbprog.c Shortnames
XXSHAR2	= getopt.c ndir.h closedir.c opendir.c readdir.c seekdir.c telldir.c
XXSHAR:		SHAR.1 SHAR.2
XXSHAR.1:		$(SHAR1)
XX	shar $(SHAR1) >SHAR.1
XXSHAR.2:		$(SHAR2)
XX	shar $(SHAR2) >SHAR.2
@//E*O*F Makefile//
chmod u=rw,g=rw,o=rw Makefile
 
echo x - arbitron.1
sed 's/^XX//' > "arbitron.1" <<'@//E*O*F arbitron.1//'
XX.TH ARBITRON 1L "28 March 1985"
XX.SH NAME
XXarbitron, arbprog \- generate notesfiles readership report
XX.SH SYNOPSIS
XX.B arbitron
XX[
XX.BI \-c #readers
XX] [
XX.BI \-t #days
XX] [
XX.BI \-x debug_level
XX]
XX.SH DESCRIPTION
XX.I Arbitron
XXcollects notesfile readership statistics and formats a report that can
XXbe sent to Brian Reid's ``net survey.''
XXIt will typically be run from
XX.IR cron (8)
XXon the first or second day of each month.
XX.PP
XX.I Arbitron
XXitself is a small
XX.IR sh (1)
XXscript that calls
XX.I arbprog
XXto generate most of the statistics.
XX.PP
XXThere are a number of configuration parameters in the script that must
XXbe modified before the program is installed and used.
XX.PP
XX.I Arbprog
XXfirst scans through all file names in the notes spool directory,
XXcollecting the names of ``interesting'' notesfiles, along with date
XXthey have last been modified.
XX(An interesting notesfile \(em one that should be reported to the
XXsurvey \(em is one whose name starts with ``net.'' or ``mod.''; this is
XXa macro in the source that can be easily changed.)
XXThe program then scans through all sequencer entries and adds
XXup the readership of each notesfile gathered in the first pass.
XX.PP
XXThe metric that
XX.I arbprog
XXuses to determine if a notesfile is being read is perhaps somewhat
XXcrude:  the user must have been in the notesfile with the sequencer
XXon within some threshold number of days.
XXThe default threshold is fourteen; this can be changed with the ``\-t'' switch.
XX.PP
XX.I Arbprog
XXwill not report notesfiles with less than some number of readers.
XXThe default is one; this can be changed with the ``\-c'' (for cutoff)
XXswitch.
XX.PP
XXVarious levels of debugging can be turned on with the ``\-x'' switch.
XXThe argument should be a number between one and nine; the higher the
XXnumber the more output
XX.I arbprog
XXwill generate.
XXWith ``\-x9'' the output is truly staggering, but perhaps useful.
XX(Note:  You will probably want to run
XX.I arbprog
XXby itself if you do this, rather than
XX.IR arbitron .)
XX.SH BUGS
XXThe system as a whole is really a bit of a hack, although it's probably
XXgood enough for its intended purpose.
XX.SH AUTHOR
XXRich $alz, Mirror Systems (mirror!rs).
XX.SH "SEE ALSO"
XXBrian Reid's ``ratings sweeps'' in the
XX.I net.news.group
XXnotesfile.
@//E*O*F arbitron.1//
chmod u=rw,g=rw,o=rw arbitron.1
 
echo x - arbitron.sh
sed 's/^XX//' > "arbitron.sh" <<'@//E*O*F arbitron.sh//'
XX#! /bin/sh 
XX# arbitron -- this program produces rating sweeps for USENET.
XX# This version, and the accompanying program, are for NOTES systems.
XX#
XX# To participate in the international monthly ratings sweeps, run
XX# "arbitron" every month, ON OR AFTER the first. I combine the results
XX# and post the information to net.news.group
XX#
XX# This script is based on one written by Brian Reid at Stanford, and
XX# updated and bugfixed by Spencer Thomas at Univ. of Utah and Geoff
XX# Kuenning at SAH Counsulting.  It was written by Rich $alz at Mirror
XX# Systems (complete address in the arbprog source).

XX# Set up a call to the mailer so that the results go to the "ratings board."
XX# You may want to receive a copy also, hence the "usenet," below.
XX# uucp path: {sun, pyramid, hplabs, bellcore}!decwrl!netsurvey
XX#NIELSON=netsurvey@decwrl
XXNIELSON=cca!decvax!decwrl!netsurvey
XX#destination=/usr/ucb/more
XXdestination="${MAILER-mail} $NIELSON usenet"

XX# Make sure the path to arbprog is in our search path
XXARBPROG=./arbprog

XX# We need to find the uucp name of your host
XX# BSD way to do it:
XXHOST=`(uuname -l ||  hostname) 2>&-`

XX# USG way to do it:
XX#HOST=`(uname -n || uuname -l ||  hostname) 2>&-`

XX# Here are several expressions, each of which figures out approximately how
XX# many people use this machine. Comment out all but 1 of them; pick the one
XX# you like best.
XX# # ###### Scheme #1: fast but usually returns too big a number
XX#lowUID=5
XX#highUID=9999
XX#NUSERS=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`
XX# # ###### Scheme #2 (works with BSD systems)
XXNUSERS=`last | sort -u +0 -1 | wc -l`
XX# # ###### Scheme #3 (works with USG systems)
XX#NUSERS=`who /etc/wtmp | sort -u +0 -1 | grep -v '^mp' | wc -l`

XX# If your awk blows up, below, you might need this.
XXNUSERS=`echo $NUSERS | tr -dc "0-9"`

XXtrap "rm -f /tmp/arb?$$; exit 1" 1 2 3 15

XX$ARBPROG $@ >/tmp/arbA$$

XXset `date`
XXgrep NetReaders /tmp/arbA$$ |
XXawk '{ printf "Host\t\t'$HOST'\nUsers\t\t'$NUSERS'\nNetReaders\t%s\nReportDate\t'$2$6'\n", $2 }' >/tmp/arbB$$

XX# You may want to add sed commands to change short names into the legal
XX# ones if you're a non-4.[23] site.  See the "Shortnames" file.
XXsed </tmp/arbA$$ 			\
XX	-e '/NetReaders/d'		| sort -rn >>/tmp/arbB$$

XX$destination < /tmp/arbB$$
XXexec rm -f /tmp/arb?$$
@//E*O*F arbitron.sh//
chmod u=rw,g=rw,o=rw arbitron.sh
 
echo x - arbprog.c
sed 's/^XX//' > "arbprog.c" <<'@//E*O*F arbprog.c//'
XX/*
XX**  ARB_PROG
XX**
XX**  This program is intended to be called from the arbitron script.
XX**  It reads notesfiles descriptors to find the last time they've
XX**  been modified.  Then, it reads the sequencer entries to determine
XX**  how many active readers of each notesfile there are.
XX**
XX**  There are options to specify the threshold, the minimum number of
XX**  readers, and control the level of debugging output.  The info with
XX**  maximum output is staggering, but useful.  Anyhow, look at the code.
XX**
XX**  I know this program will work on Notes1.7; it should work on earlier
XX**  versions (1.6, at least); see OLD_STYLE, below.  It uses getopt and
XX**  the portable directory-accessing routines.  Public-domain copies of
XX**  both, if not found inthe sharefile, are available from me.  Note,
XX** though, that I have only run this program with Notes1.7 on BSD4.2.
XX**
XX**  --
XX**  Rich $alz	{mit-eddie, ihnp4!inmet, wjh12, cca, datacube}!mirror!rs
XX**  Mirror Systems	2067 Massachusetts Avenue  Cambridge, MA, 02140
XX**  Telephone:	6,176,610,777
XX*/


XX#include "parms.h"
XX#include "structs.h"
XX#include <pwd.h>
XX#include <sys/types.h>
XX#include <sys/stat.h>
XX#ifdef NDIR
XX#include "ndir.h"
XX#else
XX#include <sys/dir.h>
XX#endif


XX/*
XX**  The notes Makefile normally defines this macro via a -D on the
XX**  cc command line...
XX*/
XX#ifndef MSTDIR
XX#define MSTDIR "/usr/spool/notes"
XX#endif

XX#define DEBUG(X, Text, Arg)		\
XX	if (Debug >= (X)) fprintf(stderr, (Text), (Arg)); else

XXtypedef struct {
XX    time_t	 When;			/* Time of last update		*/
XX    char	 Name[NNLEN];		/* Notesfile name		*/
XX    int		 Readers;		/* Number of readers		*/
XX} NOTESFILE;

XXint			 Actives;	/* Number of active users	*/
XXint			 Bad;		/* Inactive entries		*/
XXint			 Ignored;	/* Otherwise ignored entries	*/
XXint			 Debug;		/* Debug level (verbosity)	*/
XXNOTESFILE		*Table;		/* List of active notesfiles	*/
XXint			 Tcount;	/* Number of active notesfiles	*/
XXint			 Cutoff = 0;	/* MinReaders + 1		*/

XXextern int		 errno;
XXextern int		 optind;
XXextern char		*optarg;
XXextern struct passwd	*getpwnam();
XXextern char		*ctime();
XXextern char		*strcpy();
XXextern char		*calloc();
XX


XX/*
XX**  This macro tells us if a notesfile is "interesting"; i.e., we should
XX**  report it to Nielson.  It must be an active slot in the directory,
XX**  and the name must start with 'mod.' or 'net.'; you might need to change
XX**  this for your site.
XX*/
XX#define INTERESTING(Name)		\
XX     (!strncmp((Name), "mod.", 4) || !strncmp((Name), "net.", 4))



XX/*
XX**  This macro tells us if the user is an active reader of the notesfile.
XX**  For now, we just say he must have sequenced through it within some
XX**  threshold (like fourteen days).
XX*/
XX#define ACTIVE(Noteptr, Seqentry)	\
XX	((Noteptr)->When <= GMTIME((Seqentry).lastin) + Threshold)


XX/*
XX**  The threshold mentioned above; may be overridden on the command line.
XX*/
XXtime_t		 Threshold = 14;


XX/*
XX**  As I recall, notesfiles versions earlier than 1.7 kept only the
XX**  broken-down time in their "when_f" structure.  If you're one of
XX**  those lame people using old software, try enabling the next line.
XX*/
XX/* #define OLD_STYLE			/*  */
XX#ifndef OLD_STYLE
XX#define GMTIME(W)		((W).w_gmttime)
XX#else
XX#define GMTIME(W)		_convert(&(W))


XX/*
XX**  Return a "close enough" approximation to Unix-style GMT time.
XX**  N.B.:  ignores time zones and leap years, but I believe this is
XX**  ok since we're mainly doing compares.
XX*/
XXtime_t
XX_convert(W)
XX    register struct when_f	*W;
XX{
XX    static int			 Months[] = {
XX	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
XX    };

XX    for (t = (W->w_year - 1970) * 365; --(W->w_month) >= 0; )
XX	t += Months[W->w_month];
XX    t *= 24 * 60 * 60;
XX    t += W->w_hours * 60 * 60;
XX    t += W->w_mins * 60;
XX    return(t);
XX}
XX#endif	/* OLD_STYLE */
XX


XX/*
XX**  Scan the spool directory, filling in the Table with the names and
XX**  modification dates of INTERSTING notesfiles.
XX*/
XXGetNames()
XX{
XX    register DIR		*Dir;
XX    register struct direct	*F;
XX    register NOTESFILE		*p;
XX    register int		 i;
XX    struct descr_f		 Descr;
XX    char			 Name[NNLEN + NNLEN];

XX    if ((Dir = opendir(".")) == NULL) {
XX	fprintf(stderr, "opendir(\".\") in GetNames failed (%d).\n", errno);
XX	exit(1);
XX    }

XX    for (i = 0; F = readdir(Dir); )
XX	if (F->d_ino && F->d_namlen > 4 && INTERESTING(F->d_name))
XX	    i++;
XX    Tcount = i;
XX    p = Table = (NOTESFILE *)calloc((unsigned int)i, sizeof(NOTESFILE));
XX    rewinddir(Dir);
XX    DEBUG(1, "Found %d interesting notesfiles.\n", i);

XX    while (F = readdir(Dir))
XX	if (F->d_ino && F->d_namlen > 4 && INTERESTING(F->d_name)) {
XX	    (void)sprintf(Name, "%s/%s", F->d_name, INDEXN);
XX	    if ((i = open(Name, 0)) < 0) {
XX		fprintf(stderr, "Can't open %s (%d)\n", Name, errno);
XX		continue;
XX	    }
XX	    if (read(i, (char *)&Descr, sizeof Descr) != sizeof Descr) {
XX		fprintf(stderr, "Short read of %s (%d).\n", Name, errno);
XX		(void)close(i);
XX		continue;
XX	    }
XX	    (void)close(i);
XX	    (void)strcpy(p->Name, F->d_name);
XX	    p->When = GMTIME(Descr.d_lastm);
XX	    /* Gross!  Hack! */
XX	    DEBUG(6, "%20s ", F->d_name);
XX	    DEBUG(6, "%s", ctime(&p->When));
XX	    p++;
XX	}
XX}


XX/*
XX**  For each INTERESTING entry in the luser's sequencer file, bump the
XX**  readership count if he's an ACTIVE reader of that notesfile.
XX*/
XXDo(Name)
XX    char		*Name;
XX{
XX    register NOTESFILE	*p;
XX    register FILE	*Stream;
XX    register int	 i;
XX    register int	 Valid;
XX    char		 Temp[100];
XX    struct seq_f	 Entry;

XX    (void)sprintf(Temp, "%s/%s", SEQUENCER, Name);
XX    if ((Stream = fopen(Temp, "r")) == NULL) {
XX	fprintf(stderr, "fopen(\"%s\") failed (%d).\n", Temp, errno);
XX	return(0);
XX    }

XX    DEBUG(2, "Doing %s\n", Name);
XX    for (Valid = 0; fread((char *)&Entry, sizeof Entry, 1, Stream) == 1; )
XX	if (INTERESTING(Entry.nfname)) {
XX	    for (p = Table, i = Tcount; --i >= 0; p++)
XX		if (!strcmp(Entry.nfname, p->Name)) {
XX		    if (ACTIVE(p, Entry)) {
XX			Valid = 1;
XX			p->Readers++;
XX			DEBUG(9, "\t%s++\n", p->Name);
XX		    }
XX		    else
XX			DEBUG(9, "\t%s--\n", p->Name);
XX		    break;
XX		}
XX	    if (i < 0)
XX		DEBUG(9, "\t%s not found\n", Entry.nfname);
XX	}
XX    (void)fclose(Stream);
XX    return(Valid);
XX}


XXmain(ac, av)
XX    int				 ac;
XX    char			*av[];
XX{
XX    register DIR		*Dir;
XX    register NOTESFILE		*Note;
XX    register struct direct	*F;
XX    register int		 C;

XX    /* Parse JCL. */
XX    while ((C = getopt(ac, av, "c:x:t:")) != EOF)
XX	switch (C) {
XX	    default:
XX		fprintf(stderr, "Usage: %s [-v] [-t$days]\n", av[0]);
XX		exit(1);
XX		break;
XX	    case 'c':
XX		Cutoff = atoi(optarg);
XX		break;
XX	    case 'x':
XX		Debug = atoi(optarg);
XX		break;
XX	    case 't':
XX		Threshold = atoi(optarg);
XX		break;
XX	}
XX    Threshold *= 24 * 60 * 60;

XX    /* Move into the spool directory. */
XX    if (chdir(MSTDIR) < 0) {
XX	fprintf(stderr, "chdir(\"%s\") failed (%d).\n", MSTDIR, errno);
XX	exit(1);
XX    }

XX    /* Get the active notesfiles. */
XX    GetNames();

XX    /* Scan through all files in the sequencer directory. */
XX    if ((Dir = opendir(SEQUENCER)) == NULL) {
XX	fprintf(stderr, "opendir(\"%s\") failed (%d).\n", SEQUENCER, errno);
XX	exit(1);
XX    }
XX    while (F = readdir(Dir)) {
XX	/* Ignore empty slots and the . and .. entries. */
XX	if (!F->d_namlen
XX	 || !F->d_ino
XX	 || (F->d_name[0] == '.' && !F->d_name[1])
XX	 || (F->d_name[0] == '.' && F->d_name[1] == '.' && !F->d_name[2]))
XX	    continue;
XX	if (index(F->d_name, ':')) {
XX	    DEBUG(5, "Ignoring sequencer for %s\n", F->d_name);
XX	    Ignored++;
XX	    continue;
XX	}
XX	/* Ignore people who have "died." */
XX	if (!getpwnam(F->d_name)) {
XX	    DEBUG(5, "%s not in /etc/passwd, ignoring...\n", F->d_name);
XX	    Bad++;
XX	    continue;
XX	}
XX	if (Do(F->d_name))
XX	    Actives++;
XX	else {
XX	    DEBUG(5, "Problems with %s, ignoring...\n", F->d_name);
XX	    Bad++;
XX	}
XX    }
XX    closedir(Dir);

XX    /* Print the report. */
XX    printf("NetReaders %d\n", Actives);
XX    for (Note = Table, C = Tcount; --C >= 0; Note++)
XX	if (Note->Readers > Cutoff)
XX	    printf("%d %s\n", Note->Readers, Note->Name);

XX    exit(0);
XX}
@//E*O*F arbprog.c//
chmod u=rw,g=rw,o=rw arbprog.c
 
echo x - Shortnames
sed 's/^XX//' > "Shortnames" <<'@//E*O*F Shortnames//'
XXI am not sure about these names.  It's kind of cheating to escape
XXonly the first period, but it works...
XX	-e 's/mod\.c\.laser-pr/mod.c.laser-printers/'
XX	-e 's/mod\.c\.workstat/mod.c.workstatstations/'
XX	-e 's/mod\.pro.applet/mod.pro.appletalk/'
XX	-e 's/net\.announce\.a/net.announce.arpanet/'
XX	-e 's/net\.announce\.n/net.announce.newusers/'
XX	-e 's/net\.astro.expe/net.astro.expert/'
XX	-e 's/net\.games.boar/net.games.board/'
XX	-e 's/net\.games.ches/net.games.chess/'
XX	-e 's/net\.games.rogu/net.games.rogue/'
XX	-e 's/net\.games.triv/net.games.trivia/'
XX	-e 's/net\.games.vide/net.games.video/'
XX	-e 's/net\.ham-radio./net.ham-radio.packet/'
XX	-e 's/net\.lang.pasca/net.lang.pascal/'
XX	-e 's/net\.lang.prolo/net.lang.prolog/'
XX	-e 's/net\.mail.heade/net.mail.header/'
XX	-e 's/net\.math.symbo/net.math.symbolic/'
XX	-e 's/net\.micro.amig/net.micro.amiga/'
XX	-e 's/net\.micro.appl/net.micro.apple/'
XX	-e 's/net\.micro.atar/net.micro.atari/'
XX	-e 's/net\.micro.gdea/net.micro.gdead/'
XX	-e 's/net\.micro.trs-/net.micro.trs-80/'
XX	-e 's/net\.music.clas/net.music.classical/'
XX	-e 's/net\.music.gdea/net.music.gdead/'
XX	-e 's/net\.music.synt/net.music.synth/'
XX	-e 's/net\.news.confi/net.news.config/'
XX	-e 's/net\.news.newsi/net.news.newsite/'
XX	-e 's/net\.nlang.celt/net.nlang.celtic/'
XX	-e 's/net\.nlang.gree/net.nlang.greek/'
XX	-e 's/net\.nlang.indi/net.nlang.india/'
XX	-e 's/net\.politics.t/net.politics.t/'
XX	-e 's/net\.rec.skydiv/net.rec.skydive/'
XX	-e 's/net\.religion.c/net.religion.christian/'
XX	-e 's/net\.religion.j/net.religion.jewish/'
XX	-e 's/net\.sources.bu/net.sources.bugs/'
XX	-e 's/net\.sources.ga/net.sources.games/'
XX	-e 's/net\.sources.ma/net.sources.mac/'
XX	-e 's/net\.sport.base/net.sport.baseball/'
XX	-e 's/net\.sport.foot/net.sport.football/'
XX	-e 's/net\.sport.hock/net.sport.hockball/'
XX	-e 's/net\.unix-wizar/net.unix-wizards/'
XX	-e 's/net\.wanted.sou/net.wanted.sources/'
@//E*O*F Shortnames//
chmod u=rw,g=rw,o=rw Shortnames
 
exit 0