[net.wanted.sources] local netnews usage statistics

richard@islenet.UUCP (Richard Foulk) (11/25/84)

Does anyone have some kind of utility that pokes around in peoples .newsrc
files and produces meaningful (?) statistics on general news readership
for that site?

Thanks
-- 
Richard Foulk		...{dual,vortex,ihnp4}!islenet!richard
Honolulu, Hawaii

chuqui@nsc.UUCP (Cheshire Chuqui) (11/27/84)

In article <772@islenet.UUCP> richard@islenet.UUCP (Richard Foulk) writes:
>Does anyone have some kind of utility that pokes around in peoples .newsrc
>files and produces meaningful (?) statistics on general news readership
>for that site?

This is something Fred Blonder wrote a while back and passed around. It
does come in handy, especially if you are short on disk space and trying to
figure out what groups to shorten...


#! /bin/sh
#
# @(#)subscribers.sh	(University of Maryland) Fred Blonder 19-Aug-1983
#
# Find out how many people subscribe to each newsgroup

sub_tmp=/tmp/#s.$$

trap "rm -f $sub_tmp" 0 1 2 15

for dir in `awk -F: '{ print $6 }' /etc/passwd | sort -u`
do	# locate all login directories
	if	# if .newsrc exists
		[ -r $dir/.newsrc ]
	then	# find all newsgroups subscribed to, append to $sub_tmp
		awk -F: '/^net\..*: [0-9].*$/ { print $1 }' \
			$dir/.newsrc >> $sub_tmp
	fi
done

# Count all ocurrences of all newsgroups.
# Print result sorted by decreasing number of subscribers.
awk '{ x[$1] = x[$1] + 1 } \
	END { for (i in x) print i " " x[i] }' $sub_tmp | sort +1 -rn

-- 
From the center of a Plaid pentagram:		Chuq Von Rospach
{cbosgd,decwrl,fortune,hplabs,ihnp4,seismo}!nsc!chuqui  nsc!chuqui@decwrl.ARPA

  ~But you know, monsieur, that as long as she wears the claw of the dragon
  upon her breast you can do nothing-- her soul belongs to me!~

whm@arizona.UUCP (whm) (12/01/84)

Here's an Icon program that does some .newsrc investigations.

					Bill Mitchell
					whm.arizona@csnet-relay
					{noao,mcnc,utah-cs}!arizona!whm
----- Cut Here -----
#
# newscount--read .newsrc files and produce information about them.  Accepts
#  .newsrc files as arguments or reads one on standard input.  Not a sterling
#  example of Icon coding by any means.
#
record ginfo(number,reading,nsubs,nusubs)
procedure main(a)

    digs := '0123456789'
    nt := table()
    if *a = 0 then
        a := [&input]
    every f := !a do {
        if type(f) ~== "file" then
            f := open(f) |
	    	 (write("Can't open '",f,"'") & f := open("/dev/null","r"))
        while line := read(f) do {
            line ? {
                group := 1(tab(many(~' ')-1),
                    sep := (move(1) == (":"|"!"))) | next
                tab(upto(digs)) | next
                nr := ginfo(count(tab(0)),sep,0,0)
                if \nt[group] & *a > 1 then {
                    nt[group].number +:= nr.number
		    (nr.reading == nt[group].reading == "!") |
		    	(nt[group].reading := ":")
                    }
                else
                    nt[group] := nr
		if nr.reading ~== "!" then
		    nt[group].nsubs +:= 1
		else
		    nt[group].nusubs +:= 1
                }
            }
        close(f)
        }
    nt := sort(nt)
    n := g := act := 0
    every e := !nt do {
        write(left((e[1] ||
		   ((e[2].reading == "!","(!)") | 1("",act+:=1))),20),
	    right(e[2].number,6),
	    (*a > 1 & right(e[2].nsubs||"/"||e[2].nusubs,10))|"")
        g +:= 1
        n +:= e[2].number
        }
    write("Total of ",n," articles in ",g," groups.  ",
    	   act," groups currently active.")
end
procedure count(s)
    n := 0
    s ? while (n +:= nread(tab(many(~','))),move(1))
    return n
end
procedure nread(s)
    s ? {
        first := tab(upto('-')) &
        move(1) &
        last := tab(0)
        } | (last := first := 0)
    return last - first + 1
end

kaufman@yale.ARPA (Qux the Barbarian) (12/03/84)

As long as we're all posting our News statistics generators...

Unlike the others, this is a C program not a shell script (sorry, Henry!).
Since it computes disk usages, it takes a while to run.  I run it out of
/usr/lib/news/trimlib.

shar+enjoy,
    Qux

Kaufman@Yale.Arpa       Kaufman@YaleCS.Bitnet       ..!decvax!yale!kaufman

: to unbundle, "sh" this file -- DO NOT use csh
:  SHAR archive format.  Archive created Sun Dec 2 22:03:08 EST 1984
echo x - ngstats.c
sed 's/^X//' >ngstats.c <<'+FUNKY+STUFF+'
X#include    <stdio.h>
X#include    <sys/types.h>
X#include    <sys/dir.h>
X#include    <sys/stat.h>
X#include    <pwd.h>
X
X/* A hack!  A hack!  My kingdom for a hack! */
X
X/* ngstats.c
X *
X * Generate news readership statistics.  Best run by cron, perhaps from
X * /usr/lib/news/trimlib.
X *
X * Written by Qux the Barbarian (Kaufman@Yale.Arpa, Kaufman@YaleCS.Bitnet,
X * ..!decvax!yale!kaufman).  Hereby placed in the Public Domain, for what
X * that's worth;  I would appreciate hearing about bugs/fixes/extensions
X * and ports to other Operating Systems or versions of Unix.
X *
X * Tested under 4.2 BSD; should work without changes under 4.1 BSD with the
X * ndir upward compatibility routines.
X */
X
X#define ACTIVE "/usr/lib/news/active"
X#define SPOOLDIR "/usr/spool/news"
X
X#define WEEKS 7 * 24 * 60 * 60
X#define READ_DELTA (2*WEEKS)
X/*#define RAPIDOFLEX      /* don't actually gather stats */
X
Xstruct NG {
X    char name[100];
X    int rdrs;
X    int first_art;
X    int last_art;
X    int du;
X    int artcnt;
X} ngs[512];
X
Xint ngcnt = 0, unknownngcnt = 0;
Xint cmprdrs = -1;
X
X#define rdr_du(a) ((a).rdrs ? (a).du / (a).rdrs : (a).du)
X
Xngcmp(a, b)
Xstruct NG *a, *b; {
X
X    if ((cmprdrs && a->rdrs == b->rdrs) ||
X       (!cmprdrs && rdr_du(*a) == rdr_du(*b)))
X       return(strcmp(a->name, b->name));
X    else if (cmprdrs)
X       return(a->rdrs < b->rdrs ? 1 : -1);
X    else
X       return(rdr_du(*a) < rdr_du(*b) ? 1 : -1);
X}
X
Xmain (argc, argv)
Xint argc;
Xchar **argv; {
X    struct passwd *pp, *getpwent();
X    FILE *fp;
X    int newsrccnt = 0, i, noreadcnt = 0;
X    int notreadcnt = 0, norccnt = 0;
X    int accounts = 0, dusum = 0;
X    char buf[100], c;
X    long last_read_time;
X
X    /* set up some time junk */
X    time(&last_read_time);
X    last_read_time -= READ_DELTA;
X
X    /* read in active file */
X    if ((fp = fopen(ACTIVE, "r")) == NULL) {
X       perror(ACTIVE);
X       exit(1);
X    }
X    while (!feof(fp)) {
X       fscanf(fp, "%s %d %d %c\n",
X                  ngs[ngcnt].name, &ngs[ngcnt].last_art,
X                  &ngs[ngcnt].first_art, &c);
X       ngs[ngcnt].rdrs = 0;
X       ngs[ngcnt++].du = 0;
X    }
X    fclose(fp);
X
X    /* Compute disk usages (hack hack hack) */
X    for (i=0; i <ngcnt; i++) {
X       ngs[i].artcnt = 0;
X       dusum += ngdu(&ngs[i]);
X    }
X
X    /* Now read .newsrc's */
X    while (pp = getpwent()) {
X       accounts++;
X       sprintf(buf, "%s/.newsrc", pp->pw_dir);
X       if (access(buf, 0))
X           norccnt++;
X       else {
X           struct stat stbuf;
X
X           if (!stat(buf, &stbuf) && stbuf.st_mtime < last_read_time)
X               notreadcnt++;
X           else if ((fp = fopen(buf, "r")) == NULL)
X               noreadcnt++;
X           else {
X               newsrccnt++;
X               readnewsrc(fp, buf);
X               fclose(fp);
X           }
X       }
X    }
X
X    /* sort the stats */
X    qsort(ngs, ngcnt, sizeof(struct NG), ngcmp);
X
X    /* Now, print the statistics */
X    printf("\nFor %d accounts on system %s:\n", accounts, sysname());
X    printf("\t%d have no .newsrc\n", norccnt);
X    printf("\t%d have an unreadable .newsrc\n", noreadcnt);
X    printf("\t%d have an out of date .newsrc\n", notreadcnt);
X    printf("\t%d .newsrc's were read\n", newsrccnt);
X    printf("\nFor %d newsgroups received:\n", ngcnt);
X    printf("\t%d unknown newsgroups referenced in .newsrc's read.\n", unknownngcnt);
X    printf("\t%d blocks total disk usage\n\n", dusum);
X    puts("Statistics sorted by decreasing number of readers per newsgroup:\n");
X
X    puts("Readers\t  # Articles\tDisk Space\tBlocks/Rdr\tNewsgroup");
X    for (i=0; i < ngcnt; i++)
X       printf("%5d\t   %5d\t  %6d\t  %5d\t\t%s\n",
X           ngs[i].rdrs, ngs[i].artcnt,
X           ngs[i].du, rdr_du(ngs[i]),
X           ngs[i].name);
X
X
X    /* resort the stats */
X    cmprdrs = 0;
X    qsort(ngs, ngcnt, sizeof(struct NG), ngcmp);
X
X    /* Do it again sorted differently */
X    puts("\nSorted by decreasing number of blocks per newsgroup reader:\n");
X    puts("Readers\t  # Articles\tDisk Space\tBlocks/Rdr\tNewsgroup");
X    for (i=0; i < ngcnt; i++)
X       printf("%5d\t   %5d\t  %6d\t  %5d\t\t%s\n",
X           ngs[i].rdrs, ngs[i].artcnt,
X           ngs[i].du, rdr_du(ngs[i]),
X           ngs[i].name);
X
X}
X
Xreadnewsrc(rcp, filename)
Xchar *filename;
XFILE *rcp; {
X    char buf[100], *cp, *index();
X    int i;
X
X#ifdef RAPIDOFLEX
X    return;
X#endif RAPIDOFLEX
X
X    while (fgets(buf, 100, rcp)) {
X
X       if ((cp = index(buf, ':')) == NULL)
X           continue;
X
X       *cp = '\0';
X       for (i = 0; i < ngcnt; i++)
X           if (!strcmp(buf, ngs[i].name)) {
X               ngs[i].rdrs++;
X               break;
X           }
X       if (i == ngcnt)
X           unknownngcnt++;
X    }
X}
X
Xngdu(ng)
Xstruct NG *ng; {
X    char ngdir[256], ngart[256], *cp;
X    struct stat stbuf;
X    DIR *dir;
X    struct direct *dirent;
X    int pad = 0;    /* hack hack hack */
X    int i;
X
X#ifdef RAPIDOFLEX
X    return(0);
X#endif RAPIDOFLEX
X
X    /* create directory name */
X    sprintf(ngdir, "%s/%s", SPOOLDIR, ng->name);
X    cp = ngdir;
X    while (cp = index(cp, '.'))
X       *cp = '/';
X
X    /* calculate usage of all articles combined */
X    if ((dir = opendir(ngdir)) == NULL) {
X       perror(ngdir);
X       return(0);
X    }
X    seekdir(dir, 2);
X    while (dirent = readdir(dir)) {
X       /* skip deleted files */
X       if (!dirent->d_ino)
X           continue;
X
X       /* skip directories */
X       sprintf(ngart, "%s/%s", ngdir, dirent->d_name);
X       if (stat(ngart, &stbuf) || (stbuf.st_mode & S_IFMT) == S_IFDIR)
X           continue;
X
X       /* increment article count and disk usage */
X       ng->artcnt++;
X       if ((stbuf.st_mode & S_IFMT) == S_IFREG)
X           ng->du += (stbuf.st_size + 511) / 512;
X    }
X    closedir(dir);
X
X    /* compare article counts */
X    if (ng->last_art == 0 && ng->first_art == 1)
X       i = 0;
X    else
X       i = ng->last_art - ng->first_art;
X    if (ng->artcnt != i  && ng->artcnt != i+1) {
X          printf("Article count mismatch for newsgroup %s: ", ng->name);
X          printf("active claims %d, actual is %d.\n", i, ng->artcnt);
X    }
X    return(ng->du);
X}
+FUNKY+STUFF+
ls -l ngstats.c
echo x - sysname.c
sed 's/^X//' >sysname.c <<'+FUNKY+STUFF+'
X#include <stdio.h>
X
X/* sysname()
X * Ala 4.1, but now only an interface to gethostname().
X * David H. Kaufman
X */
X
X#define SYSNAMELEN 255
Xstatic char systemname[SYSNAMELEN];
X
Xchar *sysname() {
X    if (!*systemname)
X       if (gethostname(systemname, SYSNAMELEN))
X           return(NULL);
X
X    return(systemname);
X}
+FUNKY+STUFF+
ls -l sysname.c
exit 0

chuqui@nsc.UUCP (Cheshire Chuqui) (12/04/84)

References <6644@yale.ARPA>
Reply-To: chuqui@nsc.UUCP (Cheshire Chuqui)
Distribution: net
Organization: Plaid Heaven
Keywords:
Summary:

One word of warning on all of these programs for local netnews stats-- rn
now allows you to move the .newsrc out of $HOME, so there is no guarantee
that you'll find all of them unless you want to build enough intelligence
into your program to track RNINIT variables in the environment and all
sorts of other wonderful stuff. The other thing these programs don't do but
probably should is recognize .newsrc files that haven't been used in a long
time-- if someone hasn't accessed news in 30 days there is a good chance he
probably isn't reading news anymore and you can ignore that file.

chuq (no, I haven't implemented either...)

-- 
From the center of a Plaid pentagram:		Chuq Von Rospach
{cbosgd,decwrl,fortune,hplabs,ihnp4,seismo}!nsc!chuqui  nsc!chuqui@decwrl.ARPA

  ~But you know, monsieur, that as long as she wears the claw of the dragon
  upon her breast you can do nothing-- her soul belongs to me!~