richard@islenet.UUCP (Richard Foulk) (11/25/84)
Does anyone have some kind of utility that pokes around in peoples .newsrc files and produces meaningful (?) statistics on general news readership for that site? Thanks -- Richard Foulk ...{dual,vortex,ihnp4}!islenet!richard Honolulu, Hawaii
chuqui@nsc.UUCP (Cheshire Chuqui) (11/27/84)
In article <772@islenet.UUCP> richard@islenet.UUCP (Richard Foulk) writes: >Does anyone have some kind of utility that pokes around in peoples .newsrc >files and produces meaningful (?) statistics on general news readership >for that site? This is something Fred Blonder wrote a while back and passed around. It does come in handy, especially if you are short on disk space and trying to figure out what groups to shorten... #! /bin/sh # # @(#)subscribers.sh (University of Maryland) Fred Blonder 19-Aug-1983 # # Find out how many people subscribe to each newsgroup sub_tmp=/tmp/#s.$$ trap "rm -f $sub_tmp" 0 1 2 15 for dir in `awk -F: '{ print $6 }' /etc/passwd | sort -u` do # locate all login directories if # if .newsrc exists [ -r $dir/.newsrc ] then # find all newsgroups subscribed to, append to $sub_tmp awk -F: '/^net\..*: [0-9].*$/ { print $1 }' \ $dir/.newsrc >> $sub_tmp fi done # Count all ocurrences of all newsgroups. # Print result sorted by decreasing number of subscribers. awk '{ x[$1] = x[$1] + 1 } \ END { for (i in x) print i " " x[i] }' $sub_tmp | sort +1 -rn -- From the center of a Plaid pentagram: Chuq Von Rospach {cbosgd,decwrl,fortune,hplabs,ihnp4,seismo}!nsc!chuqui nsc!chuqui@decwrl.ARPA ~But you know, monsieur, that as long as she wears the claw of the dragon upon her breast you can do nothing-- her soul belongs to me!~
whm@arizona.UUCP (whm) (12/01/84)
Here's an Icon program that does some .newsrc investigations. Bill Mitchell whm.arizona@csnet-relay {noao,mcnc,utah-cs}!arizona!whm ----- Cut Here ----- # # newscount--read .newsrc files and produce information about them. Accepts # .newsrc files as arguments or reads one on standard input. Not a sterling # example of Icon coding by any means. # record ginfo(number,reading,nsubs,nusubs) procedure main(a) digs := '0123456789' nt := table() if *a = 0 then a := [&input] every f := !a do { if type(f) ~== "file" then f := open(f) | (write("Can't open '",f,"'") & f := open("/dev/null","r")) while line := read(f) do { line ? { group := 1(tab(many(~' ')-1), sep := (move(1) == (":"|"!"))) | next tab(upto(digs)) | next nr := ginfo(count(tab(0)),sep,0,0) if \nt[group] & *a > 1 then { nt[group].number +:= nr.number (nr.reading == nt[group].reading == "!") | (nt[group].reading := ":") } else nt[group] := nr if nr.reading ~== "!" then nt[group].nsubs +:= 1 else nt[group].nusubs +:= 1 } } close(f) } nt := sort(nt) n := g := act := 0 every e := !nt do { write(left((e[1] || ((e[2].reading == "!","(!)") | 1("",act+:=1))),20), right(e[2].number,6), (*a > 1 & right(e[2].nsubs||"/"||e[2].nusubs,10))|"") g +:= 1 n +:= e[2].number } write("Total of ",n," articles in ",g," groups. ", act," groups currently active.") end procedure count(s) n := 0 s ? while (n +:= nread(tab(many(~','))),move(1)) return n end procedure nread(s) s ? { first := tab(upto('-')) & move(1) & last := tab(0) } | (last := first := 0) return last - first + 1 end
kaufman@yale.ARPA (Qux the Barbarian) (12/03/84)
As long as we're all posting our News statistics generators... Unlike the others, this is a C program not a shell script (sorry, Henry!). Since it computes disk usages, it takes a while to run. I run it out of /usr/lib/news/trimlib. shar+enjoy, Qux Kaufman@Yale.Arpa Kaufman@YaleCS.Bitnet ..!decvax!yale!kaufman : to unbundle, "sh" this file -- DO NOT use csh : SHAR archive format. Archive created Sun Dec 2 22:03:08 EST 1984 echo x - ngstats.c sed 's/^X//' >ngstats.c <<'+FUNKY+STUFF+' X#include <stdio.h> X#include <sys/types.h> X#include <sys/dir.h> X#include <sys/stat.h> X#include <pwd.h> X X/* A hack! A hack! My kingdom for a hack! */ X X/* ngstats.c X * X * Generate news readership statistics. Best run by cron, perhaps from X * /usr/lib/news/trimlib. X * X * Written by Qux the Barbarian (Kaufman@Yale.Arpa, Kaufman@YaleCS.Bitnet, X * ..!decvax!yale!kaufman). Hereby placed in the Public Domain, for what X * that's worth; I would appreciate hearing about bugs/fixes/extensions X * and ports to other Operating Systems or versions of Unix. X * X * Tested under 4.2 BSD; should work without changes under 4.1 BSD with the X * ndir upward compatibility routines. X */ X X#define ACTIVE "/usr/lib/news/active" X#define SPOOLDIR "/usr/spool/news" X X#define WEEKS 7 * 24 * 60 * 60 X#define READ_DELTA (2*WEEKS) X/*#define RAPIDOFLEX /* don't actually gather stats */ X Xstruct NG { X char name[100]; X int rdrs; X int first_art; X int last_art; X int du; X int artcnt; X} ngs[512]; X Xint ngcnt = 0, unknownngcnt = 0; Xint cmprdrs = -1; X X#define rdr_du(a) ((a).rdrs ? (a).du / (a).rdrs : (a).du) X Xngcmp(a, b) Xstruct NG *a, *b; { X X if ((cmprdrs && a->rdrs == b->rdrs) || X (!cmprdrs && rdr_du(*a) == rdr_du(*b))) X return(strcmp(a->name, b->name)); X else if (cmprdrs) X return(a->rdrs < b->rdrs ? 1 : -1); X else X return(rdr_du(*a) < rdr_du(*b) ? 1 : -1); X} X Xmain (argc, argv) Xint argc; Xchar **argv; { X struct passwd *pp, *getpwent(); X FILE *fp; X int newsrccnt = 0, i, noreadcnt = 0; X int notreadcnt = 0, norccnt = 0; X int accounts = 0, dusum = 0; X char buf[100], c; X long last_read_time; X X /* set up some time junk */ X time(&last_read_time); X last_read_time -= READ_DELTA; X X /* read in active file */ X if ((fp = fopen(ACTIVE, "r")) == NULL) { X perror(ACTIVE); X exit(1); X } X while (!feof(fp)) { X fscanf(fp, "%s %d %d %c\n", X ngs[ngcnt].name, &ngs[ngcnt].last_art, X &ngs[ngcnt].first_art, &c); X ngs[ngcnt].rdrs = 0; X ngs[ngcnt++].du = 0; X } X fclose(fp); X X /* Compute disk usages (hack hack hack) */ X for (i=0; i <ngcnt; i++) { X ngs[i].artcnt = 0; X dusum += ngdu(&ngs[i]); X } X X /* Now read .newsrc's */ X while (pp = getpwent()) { X accounts++; X sprintf(buf, "%s/.newsrc", pp->pw_dir); X if (access(buf, 0)) X norccnt++; X else { X struct stat stbuf; X X if (!stat(buf, &stbuf) && stbuf.st_mtime < last_read_time) X notreadcnt++; X else if ((fp = fopen(buf, "r")) == NULL) X noreadcnt++; X else { X newsrccnt++; X readnewsrc(fp, buf); X fclose(fp); X } X } X } X X /* sort the stats */ X qsort(ngs, ngcnt, sizeof(struct NG), ngcmp); X X /* Now, print the statistics */ X printf("\nFor %d accounts on system %s:\n", accounts, sysname()); X printf("\t%d have no .newsrc\n", norccnt); X printf("\t%d have an unreadable .newsrc\n", noreadcnt); X printf("\t%d have an out of date .newsrc\n", notreadcnt); X printf("\t%d .newsrc's were read\n", newsrccnt); X printf("\nFor %d newsgroups received:\n", ngcnt); X printf("\t%d unknown newsgroups referenced in .newsrc's read.\n", unknownngcnt); X printf("\t%d blocks total disk usage\n\n", dusum); X puts("Statistics sorted by decreasing number of readers per newsgroup:\n"); X X puts("Readers\t # Articles\tDisk Space\tBlocks/Rdr\tNewsgroup"); X for (i=0; i < ngcnt; i++) X printf("%5d\t %5d\t %6d\t %5d\t\t%s\n", X ngs[i].rdrs, ngs[i].artcnt, X ngs[i].du, rdr_du(ngs[i]), X ngs[i].name); X X X /* resort the stats */ X cmprdrs = 0; X qsort(ngs, ngcnt, sizeof(struct NG), ngcmp); X X /* Do it again sorted differently */ X puts("\nSorted by decreasing number of blocks per newsgroup reader:\n"); X puts("Readers\t # Articles\tDisk Space\tBlocks/Rdr\tNewsgroup"); X for (i=0; i < ngcnt; i++) X printf("%5d\t %5d\t %6d\t %5d\t\t%s\n", X ngs[i].rdrs, ngs[i].artcnt, X ngs[i].du, rdr_du(ngs[i]), X ngs[i].name); X X} X Xreadnewsrc(rcp, filename) Xchar *filename; XFILE *rcp; { X char buf[100], *cp, *index(); X int i; X X#ifdef RAPIDOFLEX X return; X#endif RAPIDOFLEX X X while (fgets(buf, 100, rcp)) { X X if ((cp = index(buf, ':')) == NULL) X continue; X X *cp = '\0'; X for (i = 0; i < ngcnt; i++) X if (!strcmp(buf, ngs[i].name)) { X ngs[i].rdrs++; X break; X } X if (i == ngcnt) X unknownngcnt++; X } X} X Xngdu(ng) Xstruct NG *ng; { X char ngdir[256], ngart[256], *cp; X struct stat stbuf; X DIR *dir; X struct direct *dirent; X int pad = 0; /* hack hack hack */ X int i; X X#ifdef RAPIDOFLEX X return(0); X#endif RAPIDOFLEX X X /* create directory name */ X sprintf(ngdir, "%s/%s", SPOOLDIR, ng->name); X cp = ngdir; X while (cp = index(cp, '.')) X *cp = '/'; X X /* calculate usage of all articles combined */ X if ((dir = opendir(ngdir)) == NULL) { X perror(ngdir); X return(0); X } X seekdir(dir, 2); X while (dirent = readdir(dir)) { X /* skip deleted files */ X if (!dirent->d_ino) X continue; X X /* skip directories */ X sprintf(ngart, "%s/%s", ngdir, dirent->d_name); X if (stat(ngart, &stbuf) || (stbuf.st_mode & S_IFMT) == S_IFDIR) X continue; X X /* increment article count and disk usage */ X ng->artcnt++; X if ((stbuf.st_mode & S_IFMT) == S_IFREG) X ng->du += (stbuf.st_size + 511) / 512; X } X closedir(dir); X X /* compare article counts */ X if (ng->last_art == 0 && ng->first_art == 1) X i = 0; X else X i = ng->last_art - ng->first_art; X if (ng->artcnt != i && ng->artcnt != i+1) { X printf("Article count mismatch for newsgroup %s: ", ng->name); X printf("active claims %d, actual is %d.\n", i, ng->artcnt); X } X return(ng->du); X} +FUNKY+STUFF+ ls -l ngstats.c echo x - sysname.c sed 's/^X//' >sysname.c <<'+FUNKY+STUFF+' X#include <stdio.h> X X/* sysname() X * Ala 4.1, but now only an interface to gethostname(). X * David H. Kaufman X */ X X#define SYSNAMELEN 255 Xstatic char systemname[SYSNAMELEN]; X Xchar *sysname() { X if (!*systemname) X if (gethostname(systemname, SYSNAMELEN)) X return(NULL); X X return(systemname); X} +FUNKY+STUFF+ ls -l sysname.c exit 0
chuqui@nsc.UUCP (Cheshire Chuqui) (12/04/84)
References <6644@yale.ARPA> Reply-To: chuqui@nsc.UUCP (Cheshire Chuqui) Distribution: net Organization: Plaid Heaven Keywords: Summary: One word of warning on all of these programs for local netnews stats-- rn now allows you to move the .newsrc out of $HOME, so there is no guarantee that you'll find all of them unless you want to build enough intelligence into your program to track RNINIT variables in the environment and all sorts of other wonderful stuff. The other thing these programs don't do but probably should is recognize .newsrc files that haven't been used in a long time-- if someone hasn't accessed news in 30 days there is a good chance he probably isn't reading news anymore and you can ignore that file. chuq (no, I haven't implemented either...) -- From the center of a Plaid pentagram: Chuq Von Rospach {cbosgd,decwrl,fortune,hplabs,ihnp4,seismo}!nsc!chuqui nsc!chuqui@decwrl.ARPA ~But you know, monsieur, that as long as she wears the claw of the dragon upon her breast you can do nothing-- her soul belongs to me!~