[comp.sources.d] arbitron bug fixes

news@becker.UUCP (UseNet News) (06/08/89)

The arbitron program for reporting news readership has a
couple of small bugs in it which
	
	1). cause it to fail under "ksh"
	2). excludes newsgroup hierarchies like
	    "unix-pc" & "u3b" from the report.

I have included a shar file below of the revised
version of arbitron. I've given a copy of this to the
original author who will test it & make an official
release later this summer, he tells me.

 --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< ---------

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  arbitron
# Wrapped by news@becker on Wed Jun  7 19:05:02 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f arbitron -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"arbitron\"
else
echo shar: Extracting \"arbitron\" \(7928 characters\)
sed "s/^X//" >arbitron <<'END_OF_arbitron'
X#! /bin/sh
X# @(#)arbitron	2.4.3	Mon Jun  5 05:03:55 EDT 1989
X# arbitron -- this program produces rating sweeps for USENET.
X#
X# Usage: arbitron
X#
X# To use this program, edit the "configuration" section below so that the
X# information is correct for your site, and then run it. It will produce a
X# readership survey for your machine and mail that survey to decwrl.dec.com,
X# with a cc to you.
X#
X# To participate in the international monthly ratings sweeps, 
X# run "arbitron" every month. I will run the statistics program on the first
X# day of each month; it will include any report that has reached it by that
X# time. To make sure your site's data is included, run the survey program no
X# later than the 20th day of each month.
X#
X# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
X# Updated and bugfixed by 
X#	Spencer Thomas, U.of Utah
X#	Geoff Kuenning, SAH Consulting
X# Updated to work with 2.10.1 and older news systems by
X#	Lindsay Cleveland, AT&T Technologies/Bell Labs
X# Made to work with 16-bit address spaces by
X#	Andy Walker, Maths Dept., University of Nottingham, UK
X# Nagging Bourne shell bug fixed by
X#	Tom Donahue, Rabbit Software Corp
X# Newsgroup inclusion bug fix by
X#	Bruce Becker, G.T.S.
X#
X# Note that the results of this program are dependent on the rate at which
X# you expire news.  If you are a small site that expires news rapidly, the
X# results may indicate fewer active readers than you actually have.
X#
X###########################################################################
X# Configuration information. Edit this section to reflect your site data. #
XTMPDIR=/tmp
XNEWS=/usr/lib/news
XSPOOL=/usr/spool/news
X
X# Make a crude stab at determining the system type. If your installation has
X# only one type of system, you can edit out the "if" statement and just turn
X# this into an assignment statement of the correct value.
Xif [ -d /usr/ucb ]; then
X	STYPE="bsd"
Xelse
X	STYPE="usg"
Xfi
X
X# Range of /etc/passwd UID's that represent actual people (rather than
X# maintenance accounts or daemons or whatever)
XlowUID=100
XhighUID=9999
X
X# If you aren't running a distributed news system (nntpd & rrn, usually),
X# leave NEWSHOST blank. Else set it to the name of the host from which you
X# can rcp a copy of the active file.
XNEWSHOST=
X
X# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
Xsummarypath="netsurvey@decwrl.dec.com $USER"
X
X
X# We need to find the uucp name of your host. If this code doesn't work,
X# then just put it in literally like this:
X#	hostname="ihnp4"
X
Xcase $STYPE in
X	bsd)	cmd='hostname || uuname -l';;
X	sysv)	cmd='uname -n || uuname -l || hostname';;
X	*)	cmd='uuname -l';;
Xesac
X
Xhostname=`sh -c "$cmd" 2>&-`
X
XPATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
X############################################################################
Xexport PATH
X# ---------------------------------------------------------------------------
Xtrap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
Xset `date`
Xdat="$2$6"
Xdestination="${MAILER-mail} $summarypath"
X
X################################
X# Here are several expressions, each of which figures out approximately how
X# many people use this machine. Comment out all but 1 of them; pick the one
X# you like best. Initially the most universal but least reliable of them is
X# uncommented.
X# # ###### Scheme #1: fast but usually returns too big a number
Xnusers=`awk -F: '
XBEGIN { N = 0 }
X$3 >= '"$lowUID"' && $3 <= '"$highUID"' { N = N + 1 }
XEND { print N }' </etc/passwd`
X
X# # ###### Scheme #2 (works with BSD systems)
X#nusers=`last | sort -u +0 -1 | wc -l`
X
X# # ###### Scheme #3 (works with USG systems)
X#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`
X
X################################
X#
X# Set up awk scripts;  these are too large to pass as arguments on most
X# systems.
X#
X# This awk script generates the actual output report.
X# We use 'sed' to substitute in the shell variables to save ourselves
X# endless hassle trying to find quoting/backslashing problems.
X#
X# The input to this script consists of two types of lines (pre-sorted):
X#
X#	(1) Active-file lines.  These have four fields:  newsgroup name,
X#	    first existing article, last article number, 'y' or 'n'
X#	    to allow/disallow posting.
X#			mod.mac 00001 00001 y
X#
X#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
X#	    name, the user name and the articles-read information.  The latter
X#	    can be arbitrarily complex.  It can also be arbitrarily long;
X#	    this can potentially break either awk or sed, in which
X#	    case the script will not work.
X#			mod.map joe 1-199
X#
X#	The script uses the type 1 lines to define the newsgroups
X#	and their active article ranges.  The .newsrc (type 2) lines are
X#	then used to deduce which users are reading that group (a group
X#	is being read if the last article seen is in that group's active
X#	article range).
X#
Xsed    "/^#/d
X	s/NUSERS/$nusers/g
X	s/HOSTNAME/$hostname/g
X	s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
X# makereport -- utility for "arbitron". Early versions were copied from a
X# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
X# Herron.
X#
XBEGIN	{
X	rdrcount  = 0
X	grpcount  = 0
X	realusers = 0
X}
X#
X# Active file line:  dispose of previous group (if any), record group, and
X# record first and last article numbers.  Set group's reader count to none.
XNF == 4 {
X	if (grpname != "") {
X		printf("%d %s\n", grpcount, grpname)
X	}
X	grpname  = $1
X	grpfirst = $3
X	grplast  = $2
X	grpcount = 0
X}
X#
X# .newsrc line.  Break out the final number, which is the last article that
X# has actually been read.  This is a pretty good indicator of the person's
X# true interest in the group.  If 'lastread' for the group is a current
X# (unexpired) article, record a reader for that group.  Finally, record
X# the user as a "real" user of the news system.
X#
XNF == 3 {
X	if ($1 != grpname) next;
X	n1 = split($3, n2, "-")
X	n3 = split(n2[n1], n4, ",")
X	lastread = n4[n3]
X	if ((grpfirst != grplast) && (lastread >= grpfirst) && \
X		(lastread <= grplast)) {
X		grpcount++
X		if (realuser[$2] != 1) {
X		    realuser[$2] = 1
X		    realusers++
X		}
X	}
X}
X#
X# End of file.  Print the report in 2 columns.
XEND	{
X	# For reorganized network, report a group even if nobody reads it.
X	# This will help us keep track of where the groups propagate.
X	if (grpname != "") {
X		printf("%d %s\n", grpcount, grpname)
X	}
X	printf("9999 Host\t\t%s\n", "HOSTNAME")
X	printf("9998 Users\t\t%d\n", NUSERS)
X	printf("9997 NetReaders\t%d\n", realusers)
X	printf("9996 ReportDate\t%s\n", "DATE")
X	printf("9995 SystemType\tnews-arbitron-2.4.3\n")
X}
XDOG
X
Xcat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
XBEGIN	{
X	seen["/"] = 1
X	seen[""]  = 1
X}
X{
X	if (seen[$6] != 1)	{
X		printf("if [ -r %s/.newsrc ] ; then ", $6)
X		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n", $1, $6)
X		seen[$6] = 1
X	}
X}
XMOUSE
X
X# First, make sure we have an active file
Xif [ -z "$NEWSHOST" ]; then
X	ACTIVE=$NEWS/active
Xelse
X	ACTIVE=/tmp/arb.active.$$
X	rcp $NEWSHOST:$NEWS/active $ACTIVE
Xfi
X
Xif [ ! -s $ACTIVE ]; then
X	echo arbitron: ACTIVE file missing or empty. Cannot continue.
X	exit 1
Xfi
X
X# Next, get the list of .newsrc files with duplicates and unreadable files
X# removed.
Xawk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$
X
X# Check to make sure that we found some
Xif [ -s $TMPDIR/arb.tmp.$$ ]; then
X	# See if "active" file has 4 fields or only two (pre-2.10.2)
X	set `sed 1q < $ACTIVE`
X	if [ $# -eq 2 ]; then
X		egrep  '^[a-z][-a-z0-9_]*\.' $ACTIVE |
X		while read group last; do
X			dir=`echo "$group" | sed 's;\.;/;g'`
X			first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
X			case $STYPE in
X				usg)	echo "$group $last ${first:-$last} X";;
X		  		*)	echo "$group $last ${first-$last} X";;
X		   	esac
X		done
X	else
X		egrep '^[a-z][-a-z0-9_]*\.' $ACTIVE
X	fi |
X	sort - $TMPDIR/arb.tmp.$$ |
X	awk -f $TMPDIR/arb.fmt.$$ |
X	sort -nr |
X	sed    '/^$/d
X		/^[0-9]* to\./d
X		s/^999[0-9] //' |
X	$destination
Xelse
X	echo Unable to find any readable .newsrc files 2>&1
X	exit 1
Xfi
END_OF_arbitron
if test 7928 -ne `wc -c <arbitron`; then
    echo shar: \"arbitron\" unpacked with wrong size!
fi
chmod +x arbitron
# end of overwriting check
fi
echo shar: End of shell archive.
exit 0

 --------- 8< --------- 8< --------- 8< --------- 8< --------- 8< ---------

Cheers,
-- 
Bruce Becker	Toronto, Ont.
Internet: news@becker.UUCP	UUCP: ...!uunet!mnetor!becker!news
"'Comefrom' considered useful" - the Anti-Dijkstra