[news.admin] arbitron program

reid@decwrl.DEC.COM (Brian Reid) (11/01/87)

This is the source for the "arbitron" program that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is posted to
these newsgroups rather than to a source group so that it will be widely
available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.2	06/05/87
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl, with
# a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the last
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
# summarypath="netsurvey@decwrl.dec.com $USER"
summarypath="ihnp4!decwrl!netsurvey $USER"

# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="ihnp4"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sort -u +0 -1 | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue.
    exit 1
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo Unable to find any readable .newsrc files 2>&1
     exit 1
fi

tom@tsdiag.UUCP (12/12/87)

this may be a repost (i haven't seen it yet)

I am trying to get arbitron running and parm.h and structs.h were not
in the dist file...

any help?

-- 
Thomas A. Moulton, W2VY          Life is too short to be mad about things.
Home: (201) 779-W2VY             Packet: w2vy@kd6th  Voice: 145.190 (r)
Work: (201) 492-4880 x3226       FAX:  (201) 493-9167
Concurrent Computer Corp.        uucp: ...!ihnp4!hotps!ka2qhd!w2vy

reid@decwrl.dec.com (Brian Reid) (12/15/87)

In article <147@tsdiag.UUCP> tom@tsdiag.UUCP writes:
>this may be a repost (i haven't seen it yet)
>I am trying to get arbitron running and parm.h and structs.h were not
>in the dist file...
>any help?

When the "arbitron program" leaves this site, it is a shell script that does
not call for any compilation or linking. I'm puzzled as to why it would need
header files. Can you elaborate?

wunder@hpcea.CE.HP.COM (Walter Underwood) (12/16/87)

>I am trying to get arbitron running and parm.h and structs.h were not
>in the dist file...

You are obviously trying to install the Notesfiles version of
arbitron.  parms.h and structs.h are header files in the Notes source.
If you are running Netnews, you probably want the News version of
arbitron, which is shell-based (no C code).

wunder

tom@tsdiag.UUCP (12/22/87)

After looking at what You posted i guess the two items were unrelated...

item #1:
>From: reid@decwrl.UUCP
Newsgroups: comp.sources.d,news.admin
Subject: arbitron program (v2.4.2--last updated 4 June 1987)
Message-ID: <12163@decwrl.DEC.COM>
Date: 1 Dec 87 15:59:46 GMT
Sender: reid@decwrl.DEC.COM
Organization: DEC Western Research Laboratory
Lines: 237
Xref: tsdiag comp.sources.d:1318 news.admin:1107

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.2	06/05/87
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
-------------------------
item #2:
>From: reid@decwrl.UUCP
Newsgroups: comp.sources.d,news.admin
Subject: notesfile arbitron (v2--last updated 6 November 1986)
Message-ID: <12164@decwrl.DEC.COM>
Date: 1 Dec 87 15:59:56 GMT
Sender: reid@decwrl.DEC.COM
Organization: DEC Western Research Laboratory
Lines: 525
Xref: tsdiag comp.sources.d:1319 news.admin:1108

This notesfile version of the arbitron program was written by Rich Salz.
It is probably somewhat obsolete by now (software rot sets in rapidly
in the USENET world) but it seems to work.
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by decwrl!reid on Thu Nov  6 11:15:18 PST 1986
# Contents:  Makefile arbitron.1 arbitron.sh arbprog.c Shortnames
 
echo x - Makefile
sed 's/^XX//' > "Makefile" <<'@//E*O*F Makefile//'
---------------------------------------------------------

I was asking for the header files for item #2
I assumed it was part of the package since the names are about the same
and they were posted together.  And your comment about #2 being obsolete
looks like humor (due to the extra comment)...

End of confusion? (maybe the fun has just begun...)
-- 
Thomas A. Moulton, W2VY          Life is too short to be mad about things.
Home: (201) 779-W2VY             Packet: w2vy@kd6th  Voice: 145.190 (r)
Work: (201) 492-4880 x3226       FAX:  (201) 493-9167
Concurrent Computer Corp.        uucp: ...!ihnp4!hotps!ka2qhd!w2vy

reid@decwrl.DEC.COM (Brian Reid) (07/02/88)

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.2	06/05/87
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl, with
# a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the last
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
# summarypath="netsurvey@decwrl.dec.com $USER"
summarypath="ihnp4!decwrl!netsurvey $USER"

# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="ihnp4"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sort -u +0 -1 | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue.
    exit 1
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo Unable to find any readable .newsrc files 2>&1
     exit 1
fi

ccea3@rivm.UUCP (Adri Verhoef) (11/02/88)

># arbitron -- this program produces rating sweeps for USENET.

I made a few enhancements to the script, so that it runs more secure,
e.g.:
1) Instead of temporary awk-scripts that are placed in $TMPDIR (/tmp),
   the default value for $TMPDIR will now be /usr/lib/news (i.e. $NEWS).
   This will make it more secure, as the awk-script cannot be renamed and
   replaced by other programs by others than root and news/usenet.
2) I introduced two variables, $ARB_PWD and $ARB_FMT, that don't make
   use of $$, so that these files will be overwritten the next time
   after a system crash, or if they were not removed.
Now you will actually see which and how many tempfiles are used.

Diffs follow now:
35d34
< TMPDIR=/tmp
36a36
> TMPDIR=$NEWS
37a38,41
> ARB_PWD=arb.pwd
> ARB_FMT=arb.fmt
> ARB_ACT=arb.active
> ARB_TMP=arb.tmp
79c83
< trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
---
> trap "rm -f $TMPDIR/arb.*; exit" 0 1 2 3 15
130c134
<      s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
---
>      s/DATE/$dat/g" > $TMPDIR/$ARB_FMT << 'DOG'
179c183
< cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
---
> cat >$TMPDIR/$ARB_PWD <<'MOUSE'
192c196
< else ACTIVE=/tmp/arb.active.$$
---
> else ACTIVE=$TMPDIR/$ARB_ACT
204c208
< awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$
---
> awk -F: -f $TMPDIR/$ARB_PWD </etc/passwd | sh >$TMPDIR/$ARB_TMP
207c211
< if [ -s $TMPDIR/arb.tmp.$$ ]
---
> if [ -s $TMPDIR/$ARB_TMP ]
222,223c226,227
<      sort - $TMPDIR/arb.tmp.$$ |
<      awk -f $TMPDIR/arb.fmt.$$ |
---
>      sort - $TMPDIR/$ARB_TMP |
>      awk -f $TMPDIR/$ARB_FMT |


You might think that the current version (v2.4.2) of arbitron is the same
as the one as of 4 June 1987, but I noticed some slight differences:
9,10c9,10
< # readership survey for your machine and mail that survey to decwrl, with
< # a cc to you.
---
> # readership survey for your machine and mail that survey to decwrl.dec.com,
> # with a cc to you.
13c13
< # run "arbitron" every month. I will run the statistics program on the last
---
> # run "arbitron" every month. I will run the statistics program on the first
18c18
< # Brian Reid, DEC Western Research Lab, reid@decwrl
---
> # Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
59,61c59,60
< # uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
< # summarypath="netsurvey@decwrl.dec.com $USER"
< summarypath="ihnp4!decwrl!netsurvey $USER"
---
> # uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
> summarypath="netsurvey@decwrl.dec.com $USER"
62a62
> 
230a231,232
> 
> 

keithe@tekgvs.GVS.TEK.COM (Keith Ericson) (11/04/88)

Ah, yes, the monthly posting of the arbitron script. OK, Brian, I do
a ^Z to pt rn to sleep, do an suid to change to root, cd to where I
keep my copy of arbitron, check to see that it's the same version
(let's see, fg to get rn back, look and try to remember 3 numerals
and two dots - it's getting harder as the years go by...), verify
it's the same one, start it running, bg it so I can go back to work
(reading news?!?!?), scan through a few more articles, get my rn
prompt that I have mail, ^Z rn to sleep again, read the mail (my
copy of the arbitron report), fg my rn again, read a few more
articles, notice the rn "mail waiting" prompt, put rn to sleep (^Z),
read the mail..... and guess which machine by the name of ihnp4
decided not to forward the arbitron report!!! Thanks, you jerks!!!

OK, screw it, no arbitron report from tekgvs this month (big deal),
read the news 2 days later, and see the message that arbitron 2.4.2
(lessee, did I remember 'em right this time?) is NOT the same as
arbitron 2.4.2; huh? Well, whaddya; know - he's right: go back to
find the "new and improved arbitron 2.4.2," save it away, put rn to
sleep (^Z), cd to where the NEW arbitron is saved, do an suid to
root, ..., ..., ...

Ain't computers fun.

keith

bob@allosaur.cis.ohio-state.edu (Bob Sutterfield) (11/04/88)

In article <4213@tekgvs.GVS.TEK.COM> keithe@tekgvs.GVS.TEK.COM (Keith Ericson) writes:
>read the mail..... and guess which machine by the name of ihnp4
>decided not to forward the arbitron report!!! Thanks, you jerks!!!

Which "jerks" are you flaming?  The folks who didn't maintain your
system's pathalias database correctly so that it would know that ihnp4
doesn't forward external mail any more; or the folks at ihnp4 who,
with the rest of ATT, decided to use their own computers for their
company's own business, rather than donating their resources to their
competitors?

If you're going to flame someone, please be more specific!
-=-
Zippy sez,								--Bob
Kids, the seven basic food groups are GUM, PUFF PASTRY, PIZZA,
 PESTICIDES, ANTIBIOTICS, NUTRA-SWEET and MILK DUDS!!

root@conexch.UUCP (Larry Dighera) (11/07/88)

In article <26642@tut.cis.ohio-state.edu> bob@allosaur.cis.ohio-state.edu (Bob Sutterfield) writes:
>it would know that ihnp4
>doesn't forward external mail any more; or the folks at ihnp4 who,
>with the rest of ATT, decided to use their own computers for their
>company's own business, rather than donating their resources to their
>competitors?


Instead of "donating their resources to their competitors" [sic], AT&T
now uses their competitors computing resources to offer UUCP based
E-Mail facilities to their ATT-Mail cu$tomers.  They deny the rest of
the UUCP network the ability to route mail through AT&T sites, while
expecting the net to allow AT&T traffic to pass through, so they can 
make a profit on it.
 
Sounds fair to me :-(


-- 
USPS: The Consultants' Exchange, PO Box 12100, Santa Ana, CA  92712
TELE: (714) 842-6348: BBS (N81); (714) 842-5851: Xenix guest account (E71)
UUCP: conexch Any ACU 2400 17148425851 ogin:-""-ogin:-""-ogin: nuucp
UUCP: ...!uunet!turnkey!conexch!root || ...!trwrb!ucla-an!conexch!root

zzw0039@dcrlg1.UUCP (Al Ethridge) (11/09/88)

From article <11697@conexch.UUCP>, by root@conexch.UUCP (Larry Dighera):
 
> 
> Instead of "donating their resources to their competitors" [sic], AT&T
> now uses their competitors computing resources to offer UUCP based
> E-Mail facilities to their ATT-Mail cu$tomers.  They deny the rest of
> the UUCP network the ability to route mail through AT&T sites, while
> expecting the net to allow AT&T traffic to pass through, so they can 
> make a profit on it.
>  
If ATT is going to do stuff like that then why not cut them off from the 
outside world. It works both ways. Maybe that will wake them up (but I
doubt it.

mrm@sceard.UUCP (M.R.Murphy) (11/11/88)

In article <807@dcrlg1.UUCP> zzw0039@dcrlg1.UUCP (Al Ethridge) writes:
>From article <11697@conexch.UUCP>, by root@conexch.UUCP (Larry Dighera):
> 
>> 
>> Instead of "donating their resources to their competitors" [sic], AT&T
>> now uses their competitors computing resources to offer UUCP based
>> E-Mail facilities to their ATT-Mail cu$tomers.  They deny the rest of
>> the UUCP network the ability to route mail through AT&T sites, while
>> expecting the net to allow AT&T traffic to pass through, so they can 
>> make a profit on it.
>>  
>If ATT is going to do stuff like that then why not cut them off from the 
>outside world. It works both ways. Maybe that will wake them up (but I
>doubt it.

I just received the following mail bounce as a result of someone's routing:


[deleted...]
Date: 9 Nov 88 19:57:45 EST (Wed)
From: ucsd!gatech!att.ATT.COM!MAILER-DAEMON
Subject: warning
To: gatech!ucsd!sceard!mrm
Message-Id: <8811091957.AA11068@att.ATT.COM>
Status: R

=======     command failed      =======

 COMMAND: [...deleted]

======= standard error follows  =======


	Forwarding through this system (att) is no longer allowed.


======= text of message follows =======
[deleted...]

Being just a tad frosted at this response from MAILER-DAEMON@att.ATT.COM,
I first thought a standard error follows of

	Forwarding of att traffic through this system is no longer allowed.

and a bounce of the item to root@att.ATT.COM would be appropriate for all
mail and news that originates from att and passes through here.

On sober:-) and considered reflection, a more reasonable thing to do,
it seems, is to mark att as dead in d.AProject. I just looked, and
att isn't mentioned in d.AProject. Comments?
--
Mike Murphy  Sceard Systems, Inc.  544 South Pacific St.  San Marcos, CA  92069
UUCP: {nosc,ucsd}!sceard!mrm     INTERNET: mrm%sceard.UUCP@ucsd.ucsd.edu

reid@decwrl.DEC.COM (Brian Reid) (07/01/89)

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.2	06/05/87
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl.dec.com,
# with a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the first
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
summarypath="netsurvey@decwrl.dec.com $USER"


# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="ihnp4"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sort -u +0 -1 | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue.
    exit 1
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo Unable to find any readable .newsrc files 2>&1
     exit 1
fi

reid@decwrl.DEC.COM (Brian Reid) (10/01/89)

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.3	07/22/89
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl.dec.com,
# with a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the first
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
# Various suggestions provided by:
#	Karl Tombre, CRIN, Vandoeuvre France
#	Dan Kolkowitz, Stanford
#	Paul Eggert, Unisys
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
summarypath="netsurvey@decwrl.dec.com $USER"


# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="decwrl"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:$PATH
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap exit 1 2 3 15
trap 's=$?; rm -f $TMPDIR/arb.*.$$; exit $s' 0
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
PASSWD=$TMPDIR/arb.passwd.$$
(ypcat passwd || cat /etc/passwd) > $PASSWD

# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" <$PASSWD`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sed '/^wtmp begins/d; s/ .*//; /^$/d' | sort -u | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue. >&2
    (exit 1); exit
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ <$PASSWD | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z][-0-9_a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo arbitron: Unable to find any readable .newsrc files >&2
     (exit 1); exit
fi

reid@decwrl.DEC.COM (Brian Reid) (06/02/91)

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.4	10/20/89
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl.dec.com,
# with a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the first
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
# Various suggestions provided by:
#	Karl Tombre, CRIN, Vandoeuvre France
#	Dan Kolkowitz, Stanford
#	Paul Eggert, Unisys
# Fixes for YP and NN by
#	Dan Kolkowitz, Stanford
#	Brent Chapman, Capital Market Technology
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
summarypath="netsurvey@decwrl.dec.com $USER"


# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="decwrl"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:$PATH
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap exit 1 2 3 15
trap 's=$?; rm -f $TMPDIR/arb.*.$$; exit $s' 0
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
PASSWD=$TMPDIR/arb.passwd.$$
(ypcat passwd || cat /etc/passwd) > $PASSWD

# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" <$PASSWD`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sed '/^wtmp begins/d; s/ .*//; /^$/d' | sort -u | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.nn/rc ] ; then ", $6)
		printf("sed -n '/^+/s/^. \([0-9]*\) \(.*\)/\2 %s \1/p'",$1)
		printf(" <%s/.nn/rc;\n",$6)
		printf("elif [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue. >&2
    (exit 1); exit
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ <$PASSWD | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z][-0-9_a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo arbitron: Unable to find any readable .newsrc files >&2
     (exit 1); exit
fi