reid@glacier.ARPA (Brian Reid) (03/08/86)
I'm tired of speculation about how many people read USENET, and how big the
audiences are. Here is a program that does a pretty good job of determining
that information. Please install this program on your site, and run it with
the -m option that will mail the results back to glacier. I will tally and
total the results and post them. We've been running this program at about
20 sites in the San Francisco area for 6 months now, and the results are
remarkably counterintuitive.
This shell script is named "arbitron". Edit the "configuration information"
to work at your site, then type
arbitron -m netsurvey@glacier.ARPA
to mail the results to me. If you want to keep a copy of the results, type
arbitron -m "netsurvey@glacier myname"
where "myname" is your login name.
On Glacier, which is a Vax 750 with 250 user accounts, this program takes
about 5 minutes to run on a lightly-loaded system.
#! /bin/sh
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron > filename for local data
# arbitron -p net.foo to post to USENET
# arbitron -m person to mail results to an accumulator
#
# Run "arbitron -p" at the end of each month, which will post your site's
# arbitron report.
# To participate in the international monthly ratings sweeps,
# run "arbitron -m net-survey@glacier" every month. I combine the results
# and post the information to net.news.
#
# The names of users reading individual groups are not posted to the network,
# only the summary counts. Whether or not somebody reads a group is private
# information.
#
# By Brian Reid, Stanford.
# v1.2 September 18, 1985
# v1.3 March 8, 1986
#
# copied to a certain extent from the "subscribers"
# script posted by Blonder, McCreery, and Herron. The awk script to format
# the report was especially stolen from them.
############################################################################
# Configuration information:
tmpdir=/tmp
news=/usr/lib/news
# For uucp, try {sun, pyramid, decwrl, hplabs, bellcore}!glacier
summarypath="net-survey@glacier.ARPA"
hostname=`hostname`
PATH=$news:/usr/local/bin:/usr/ucb:/usr/bin:/bin
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap "rm -f $tmpdir/arb.sel.$$ $tmpdir/arb.fmt.$$ $tmpdir/arb.tmp.$$" 0 1 2 15
opt1=${1-0}
case $opt1 in
-p) opt2=${2-ba.news.ratings};;
-m) opt2=${2-netsurvey@glacier};;
esac
set `date`
dat="$2 $6"
moption=0
case $opt1 in
-p) destination="$news/inews -t Monthly arbitron ratings for $hostname ($dat) -n $opt2"
poption=1;;
-m) destination="mail $opt2"
moption=1;
poption=1;;
*) destination="cat"
poption=0
esac
################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best
########## fast but usually returns too big a number
lowUID=5
highUID=999
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`
########## slow but accurate if /usr/adm/wtmp has enough data
## wc is a Berkeley-ism, I think. We just want to count the lines in the
## standard output of sort -u
# set `last | colrm 9 | sort -u | wc`
# nusers=$1
################################
cat > $tmpdir/arb.sel.$$ << 'CAT'
/^net\..*: *[0-9].*$/ { nn=split($0,n,":"); print n[1], n[2] }
/^mod\..*: *[0-9].*$/ { nn=split($0,n,":"); print n[1], n[2] }
NF == 1 && $1 ~ /^[a-z]*$/{ print $1 }
CAT
sed -e "s/POPTION/$poption/" -e "s/MOPTION/$moption/" -e "s/NUSERS/$nusers/" -e "s/HOSTNAME/$hostname/" > $tmpdir/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Shamelessly copied from the
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
BEGIN { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
NF == 4 { # 4 fields means it is a /usr/lib/news/active entry
grpcount++
grpname[grpcount] = $1
grpnumber[$1] = grpcount
grplast[grpcount] = $2
grpfirst[grpcount] = $3
grpcounts[grpcount] = 0
grpreaders[grpcount] = ""
}
# 1 field means it's a user name
NF == 1 { rdrcount++; rdrname[rdrcount] = $1; rdrnumber[$1] = rdrcount
reader = $1}
# 2 fields means it's a .newsrc line
NF == 2 { gnum = grpnumber[$1]
n1 = split($2, n2, "-")
n3 = split(n2[n1], n4, ",")
lastread = n4[n3]
if (lastread >= grpfirst[gnum]) {
grpcounts[gnum]++
grpreaders[gnum] = (grpreaders[gnum] " " reader)
if (realuser[rdrcount] == 0) {
realuser[rdrcount]=1
realusers++
}
}
}
END {bigblanks = " "
printf("9999 Host\t\t%s\n","HOSTNAME")
printf("9998 Users\t\t%d\n",NUSERS)
printf("9997 NetReaders\t%d\n",realusers)
if (0 == MOPTION) printf("9996 \n9995 rdrs rating share traffic M/R newsgroup\n")
for (i=0; i < grpcount; i++) {
if (grpcounts[i] > 0) {
rating = (100*grpcounts[i])/NUSERS
share = (100*grpcounts[i])/realusers
traffic = grplast[i]-grpfirst[i]
if (grpcounts[i] != 0) ratio = traffic/grpcounts[i]
else ratio = 0
if (0 == POPTION) {
obuf = sprintf("%4d %5d %s", grpcounts[i], traffic, grpname[i])
nf = split(grpreaders[i], rdrs, " ")
obuf = substr ((obuf bigblanks),1,35)
width = 35
for (j=1; j <= nf; j++) {
nwidth = length(rdrs[j])
obuf = (obuf rdrs[j] " ")
width = length(obuf)
}
} else if (0 == MOPTION) {
obuf = sprintf("%4d %5.1f%% %4d%% %5d %5.1f %s", grpcounts[i], rating, share, traffic, ratio, grpname[i])
} else {
obuf = sprintf("%d %s\n",grpcounts[i], grpname[i])
}
printf("%s\n",obuf)
}
}
}
DOG
awk -F: '{printf "echo %s\negrep : %s/.newsrc\n",$1,$6}' \
</etc/passwd | sh 2>/dev/null | awk -f $tmpdir/arb.sel.$$ >$tmpdir/arb.tmp.$$
egrep '^net\.|^mod\.' $news/active | sort | \
awk -f $tmpdir/arb.fmt.$$ - $tmpdir/arb.tmp.$$ | sort -nr | \
sed -e 's/^999[0-9] //' | $destination
--
Brian Reid decwrl!glacier!reid
Stanford reid@SU-Glacier.ARPAreid@glacier.ARPA (Brian Reid) (03/10/86)
A couple of days ago I posted a rating-sweep program and asked people to run
it with the -m option, which will mail its output to a collector I set up on
glacier. I've gotten 11 responses so far, which while not earth-shaking,
provide a fascinating look at what's going on. Here's the output so far.
(The file from one place (BU) was empty--somehow the program didn't work
there).
The definition that this software uses of "X reads group Y" is that X has
read at least one of the messages posted to this group since the last
expiration was run, or else that there has been no traffic. You will see
some names from the past, such as "mod.ber", in here.
This is roughly a 0.4 % sample of net sites. If you don't like these
numbers, then contribute to them by mailing in your site's arbitron results.
Summary: 1701 users at 10 sites, 15.99% of them (272) netreaders.
# of % of all % of news
readers users users
58 3.41 21.32 mod.movies
50 2.94 18.38 net.announce
47 2.76 17.28 net.jokes
47 2.76 17.28 net.general
47 2.76 17.28 mod.sources
43 2.53 15.81 net.sources
40 2.35 14.71 mod.unix
33 1.94 12.13 mod.std
32 1.88 11.76 net.lang.c++
32 1.88 11.76 net.audio
32 1.88 11.76 mod.os.unix
31 1.82 11.40 net.singles
30 1.76 11.03 mod.newprod
29 1.70 10.66 net.arch
29 1.70 10.66 mod.recipes
28 1.65 10.29 net.jobs
27 1.59 9.93 net.usenix
27 1.59 9.93 net.sources.bugs
26 1.53 9.56 net.rec.nude
26 1.53 9.56 mod.test
25 1.47 9.19 net.sources.mac
25 1.47 9.19 net.micro.mac
25 1.47 9.19 net.announce.newusers
25 1.47 9.19 mod.os
25 1.47 9.19 mod.general
24 1.41 8.82 net.movies
24 1.41 8.82 net.cooks
24 1.41 8.82 net.consumers
23 1.35 8.46 net.sources.games
23 1.35 8.46 net.rumor
23 1.35 8.46 net.news
23 1.35 8.46 net.med
23 1.35 8.46 net.jokes.d
22 1.29 8.09 net.women
22 1.29 8.09 net.emacs
22 1.29 8.09 net.bugs
22 1.29 8.09 net.bicycle
22 1.29 8.09 net.announce.arpa-internet
21 1.23 7.72 net.unix
21 1.23 7.72 net.sci
21 1.23 7.72 net.rec
21 1.23 7.72 net.astro.expert
20 1.18 7.35 net.travel
20 1.18 7.35 net.research
20 1.18 7.35 net.music.classical
19 1.12 6.99 net.ai
19 1.12 6.99 mod.std.mumps
19 1.12 6.99 mod.std.c
19 1.12 6.99 mod.human-nets
19 1.12 6.99 mod.ber
18 1.06 6.62 net.taxes
18 1.06 6.62 net.social
18 1.06 6.62 net.lang
18 1.06 6.62 net.astro
18 1.06 6.62 mod.singles
18 1.06 6.62 mod.protocols.kermit
18 1.06 6.62 mod.politics
18 1.06 6.62 mod.compilers
17 1.00 6.25 net.news.stargate
17 1.00 6.25 net.news.group
17 1.00 6.25 net.net-people
17 1.00 6.25 net.lsi
17 1.00 6.25 net.lan
17 1.00 6.25 net.invest
17 1.00 6.25 net.auto.tech
17 1.00 6.25 net.auto
17 1.00 6.25 mod.computers.macintosh
17 1.00 6.25 mod.computers.laser-printers
16 0.94 5.88 net.text
16 0.94 5.88 net.rec.ski
16 0.94 5.88 net.music.folk
16 0.94 5.88 net.micro.pc
16 0.94 5.88 net.micro
16 0.94 5.88 net.games.hack
16 0.94 5.88 net.followup
16 0.94 5.88 net.books
16 0.94 5.88 net.bio
15 0.88 5.51 net.wines
15 0.88 5.51 net.wanted
15 0.88 5.51 net.unix-wizards
15 0.88 5.51 net.puzzle
15 0.88 5.51 net.music
15 0.88 5.51 net.math
15 0.88 5.51 net.mail
15 0.88 5.51 net.lang.c
15 0.88 5.51 net.graphics
15 0.88 5.51 net.games
15 0.88 5.51 net.bugs.4bsd
15 0.88 5.51 mod.protocols
15 0.88 5.51 mod.graphics
15 0.88 5.51 mod.computers.sun
15 0.88 5.51 mod.ai
14 0.82 5.15 net.tv
14 0.82 5.15 net.rec.photo
14 0.82 5.15 net.misc
14 0.82 5.15 net.legal
14 0.82 5.15 net.columbia
14 0.82 5.15 mod.techreports
14 0.82 5.15 mod.music
14 0.82 5.15 mod.computers.workstations
13 0.76 4.78 net.space
13 0.76 4.78 net.physics
13 0.76 4.78 net.music.gdead
13 0.76 4.78 net.mag
13 0.76 4.78 net.lang.mod2
13 0.76 4.78 net.internat
13 0.76 4.78 net.games.video
13 0.76 4.78 net.dcom
13 0.76 4.78 net.bugs.v7
13 0.76 4.78 mod.telecom
12 0.71 4.41 net.religion.jewish
12 0.71 4.41 net.periphs
12 0.71 4.41 net.nlang.india
12 0.71 4.41 net.news.sa
12 0.71 4.41 net.math.symbolic
12 0.71 4.41 net.mail.headers
12 0.71 4.41 net.lang.prolog
12 0.71 4.41 net.cse
12 0.71 4.41 net.crypt
12 0.71 4.41 net.challenger
12 0.71 4.41 mod.computers.ibm-pc
11 0.65 4.04 net.wanted.sources
11 0.65 4.04 net.veg
11 0.65 4.04 net.startrek
11 0.65 4.04 net.sf-lovers
11 0.65 4.04 net.rec.wood
11 0.65 4.04 net.os
11 0.65 4.04 net.nlang
11 0.65 4.04 net.news.b
11 0.65 4.04 net.micro.68k
11 0.65 4.04 net.kids
11 0.65 4.04 net.games.chess
11 0.65 4.04 net.database
11 0.65 4.04 net.comics
11 0.65 4.04 net.college
11 0.65 4.04 net.cog-eng
11 0.65 4.04 mod.protocols.tcp-ip
11 0.65 4.04 mod.newslists
10 0.59 3.68 net.wobegon
10 0.59 3.68 net.religion
10 0.59 3.68 net.news.adm
10 0.59 3.68 net.motss
10 0.59 3.68 net.micro.amiga
10 0.59 3.68 net.ham-radio
10 0.59 3.68 net.bugs.uucp
9 0.53 3.31 net.tv.drwho
9 0.53 3.31 net.micro.atari
9 0.53 3.31 net.lang.lisp
9 0.53 3.31 net.decus
9 0.53 3.31 mod.protocols.appletalk
9 0.53 3.31 mod.motss
9 0.53 3.31 mod.map
8 0.47 2.94 net.video
8 0.47 2.94 net.sport.baseball
8 0.47 2.94 net.sport
8 0.47 2.94 net.sources.d
8 0.47 2.94 net.pets
8 0.47 2.94 net.news.notes
8 0.47 2.94 net.news.newsite
8 0.47 2.94 net.micro.16k
8 0.47 2.94 net.lang.apl
8 0.47 2.94 net.ham-radio.packet
8 0.47 2.94 net.games.board
8 0.47 2.94 net.aviation
8 0.47 2.94 net.analog
8 0.47 2.94 mod.std.unix
8 0.47 2.94 mod.computers.pyramid
7 0.41 2.57 net.religion.christian
7 0.41 2.57 net.politics
7 0.41 2.57 net.origins
7 0.41 2.57 net.news.config
7 0.41 2.57 net.micro.hp
7 0.41 2.57 net.micro.att
7 0.41 2.57 net.micro.atari16
7 0.41 2.57 net.info-terms
7 0.41 2.57 net.flame
7 0.41 2.57 mod.computers.sequent
6 0.35 2.21 net.philosophy
6 0.35 2.21 net.nlang.africa
6 0.35 2.21 net.music.synth
6 0.35 2.21 net.lang.st80
6 0.35 2.21 net.lang.forth
6 0.35 2.21 net.lang.f77
6 0.35 2.21 net.lang.ada
6 0.35 2.21 net.games.trivia
6 0.35 2.21 net.games.frp
6 0.35 2.21 net.abortion
6 0.35 2.21 mod.vlsi
6 0.35 2.21 mod.risks
6 0.35 2.21 mod.legal
6 0.35 2.21 mod.computers.gould
6 0.35 2.21 mod.computers.apollo
5 0.29 1.84 net.roots
5 0.29 1.84 net.poems
5 0.29 1.84 net.math.stat
5 0.29 1.84 net.lang.pascal
5 0.29 1.84 net.garden
5 0.29 1.84 net.games.rogue
5 0.29 1.84 net.games.emp
5 0.29 1.84 net.cycle
5 0.29 1.84 net.bio.slime
5 0.29 1.84 mod.os.os9
4 0.24 1.47 net.railroad
4 0.24 1.47 net.micro.trs-80
4 0.24 1.47 net.micro.cpm
4 0.24 1.47 net.micro.atari8
4 0.24 1.47 net.micro.apple
4 0.24 1.47 net.eunice
4 0.24 1.47 net.bugs.usg
4 0.24 1.47 mod.rec.guns
4 0.24 1.47 mod.politics.arms-d
4 0.24 1.47 mod.computers.vax
3 0.18 1.10 net.tv.soaps
3 0.18 1.10 net.test
3 0.18 1.10 net.suicide
3 0.18 1.10 net.sport.hockey
3 0.18 1.10 net.sport.football
3 0.18 1.10 net.rec.scuba
3 0.18 1.10 net.rec.birds
3 0.18 1.10 net.movies.sw
3 0.18 1.10 net.micro.ti
3 0.18 1.10 net.games.pbm
3 0.18 1.10 net.games.go
3 0.18 1.10 net.bugs.2bsd
3 0.18 1.10 mod.map.uucp
3 0.18 1.10 mod.computers.ridge
2 0.12 0.74 net.sport.hoops
2 0.12 0.74 net.rec.bridge
2 0.12 0.74 net.rec.boat
2 0.12 0.74 net.politics.theory
2 0.12 0.74 net.micro.cbm
--
Brian Reid decwrl!glacier!reid
Stanford reid@SU-Glacier.ARPA