[alt.sources] News Traffic Generator reports

tale@cs.rpi.edu (David C Lawrence) (12/14/89)

In article <9A~{L|@rpi.edu> tale@cs.rpi.edu (David C Lawrence) writes:
> I made a few changes to Erik Fair's NNTP syslog summarizer; it's
> available to anyone that wants it.  I run it as part of my localdaily
> script invoked by cron every morning at 7am.

Well, I've received eleven requests for this today, so I'll post.  The
only site configuration that should need to be changed is the ``local''
array defined in the BEGIN block.

People who like time/cpu information are advised to use the original
version of this script, with perhaps the addition of domain summary
for readers.  "polled" still uses the time/cpu format, so you can
convert this back from there if you don't have access to the original.

# an awk script 
# an NNTP log summary report generator
#
# NOTE: for systems that are not as yet using the new 4.3 BSD syslog
# (and therefore have nntp messages lumped with everything else), it
# would be best to invoke this script thusly:
#
#	egrep nntp syslog.old | awk -f nntp_awk > report_of_the_week
#
# because this script will include in the report all messages in the log
# that it does not recognize (on the assumption that they are errors to
# be dealt with by a human).
#
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
# May 17, 1986 - Norwegian Independence Day
#
# Recognize some new things - February 22, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# fix "xmt is not an array" bug - March 11, 1987
# Change Elapsed/CPU fields to break out time values, HH:MM:SS
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Add reporting for newnews commands - August 27, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Add nntpxmit connection attempt counting/reporting - December 7, 1987
# Erik E. Fair <fair@ucbarpa.berkeley.edu>
#
# Some hacking on 11 Nov 89, tale.  Deal with newsxd and change output
# format a little.  Left the output for pollers alone.
#
# More whacking early December, to stop listing readers on individual machines
# but instead summarize the domain.

BEGIN {
  # set up an array to use for summarizing domains
  local["its.rpi.edu"] = 0;
  local["pawl.rpi.edu"] = 0;
  local["cs.rpi.edu"] = 0;
  local["ecs.rpi.edu"] = 0;
  local["ecse.rpi.edu"] = 0;
  local["cie.rpi.edu"] = 0;
  local["ipl.rpi.edu"] = 0;
  local["rdrc.rpi.edu"] = 0;
  local["ral.rpi.edu"] = 0;
}
### Skip stderr reports from rnews
{
  n = split($6, path, "/");
  if (path[n] == "rnews:") next;
  n = split($7, path, "/");
  if (path[n] == "rnews") next;
  host = $6;
}
$5 ~ /^newsxd\[[0-9]+\]:$/ {
  newsxds = 1;
  if ($6 == "shut" && $7 == "down")
    newsxd[$10]++;
  else if ($6 == "starting")
    newsxd["start"]++;
  else if ($6 == "reinitializing")
    newsxd["reinit"]++;
  else print;
  next;
}
  
$7 == "group" {
  readers = 1;
  ng[$8]++;
  next;
}
$7 == "ihave" {
  receive = 1;
  rec[host]++;
  if ($9 == "accepted") {
    rec_accept[host]++;
    if ($10 == "failed") rec_failed[host]++;
  } else if ($9 == "rejected") rec_refuse[host]++;
  next;
}
# this is from version 1.4 of nntpd
$7 == "ihave_stats" {
  receive = 1;
  rec[host] += $9 + $11 + $13;
  rec_accept[host] += $9;
  rec_refuse[host] += $11;
  rec_failed[host] += $13;
  next;
}
$7 == "connect" {
  systems[host]++;
  next;
}
# nntpxmit connection errors
# Ooooh! I *wish* awk had N dimensional arrays,
# so I wouldn't have to throw away the error message here!
$7 == "hello:" {
  conn[host]++;
  if ($8 == "Connection" && $9 == "refused")
    rmt_fail[host]++;
  else
    open_fail[host]++;
  next;
}
# we'll get stats from this, don't count conn[]
$7 == "xfer:" {
  open_fail[host]++;
# since these are expected to be few in number, we still print
# the exact error (no "next;" statement here).
}
$7 == "greeted" {
  conn[host]++;
  rmt_fail[host]++;
  next;
}
$7 == "host" && $8 == "unknown" {
  conn[host]++;
  ns_fail[host]++;
  next;
}
# nntpd connection abort - all "broken pipe" right now
$7 == "disconnect:" { next }
# syslogd shit
$7 == "repeated" { next }
# inews shit
$11 == "spooled" { next }
$7 == "exit" {
  if ($8 > 0) readers = 1;
  articles[host] += $8;
  groups[host] += $10;
  next;
}
$7 == "xmit" {
  xmt_cpu[host] += $9 + $11;
  xmt_ela[host] += $13;
  next;
}
$7 == "times" {
  cpu[host] += $9 + $11;
  ela[host] += $13;
  next;
}
$7 == "stats" {
  transmit = 1;
  conn[host]++;
  xmt[host] += $8;
  xmt_accept[host] += $10;
  xmt_refuse[host] += $12;
  xmt_failed[host] += $14;
  next;
}
#
#  For the Nth time, I wish awk had two dimensional associative
#  arrays. I assume that the last request is the same as all the
#  others in this section of logfile.
#
$7 == "newnews" {
  polled = 1;
  poll[host] ++;
  poll_asked[host] = $8;
  next;
}
$7 == "newnews_stats" {
  poll_offered[host] += $9;
  poll_took[host] += $11;
  next;
}
$7 == "post" {
  readers = 1;
  post[host]++;
  next;
}
$7 == "timeout" {
  timeout[host]++;
  timeouts = 1;
  next;
}
$7 == "unrecognized" {
  unknown[host]++;
#  curious = 1;  # originally by Erik.  I'll see it at the top of
                 # report anyway without it being an Unknown Explorer
  print $1, $2, $3, $4, $5, $6, $7, $8 # just print the first word,
  next;                                # which is really the unrecognised part.
}
$7 == "refused" {
  splut=1;
  refused[host]++;
  next;
}
### Print anything that we don't recognize in the report
{
  print;
}
END {
  printf("\n");

  if (newsxds) {
    printf("News Transmission Daemon Activity:\n");
    for (s in newsxd) {
      if (s == "start") printf("newsxd starts: %d\n",newsxd["start"]);
      else if (s== "reinit")
        printf("newsxd reinitialisations: %d\n",newsxd["reinit"]);
      else printf("newsxd shut downs by signal %d: %d\n",s,newsxd[s]);
    }
  }

  printf("\n");

### Article Exchange With Peers (other servers) Statistics
  if (polled) for(s in poll) servers[s]++;
  if (receive) for(s in rec) servers[s]++;
  if (transmit) for(s in xmt) servers[s]++;

  if (receive) {
    printf("Article Reception        Offered      Took         Toss         Fail\n");
    printf("Contacting Host           To Us    Total  Pct   Total  Pct   Total  Pct\n");     
    for(s in rec) {
      nrec += rec[s];
      nrec_accept += rec_accept[s];
      nrec_refuse += rec_refuse[s];
      nrec_failed += rec_failed[s];

      they_offered = rec[s];
      if (they_offered == 0) they_offered = 1;
      we_toss = (rec_refuse[s] / they_offered) * 100 + 0.5;
      we_took = (rec_accept[s] / they_offered) * 100 + 0.5;
      we_fail = (rec_failed[s] / they_offered) * 100 + 0.5;

      printf("%-25s %5d    %5d %3d%%   %5d %3d%%   %5d %3d%%\n", s, rec[s], rec_accept[s], we_took, rec_refuse[s], we_toss, rec_failed[s], we_fail);
    }

    they_offered = nrec;
    if (they_offered == 0) they_offered = 1;
    we_toss = (nrec_refuse / they_offered) * 100 + 0.5;
    we_took = (nrec_accept / they_offered) * 100 + 0.5;
    we_fail = (nrec_failed / they_offered) * 100 + 0.5;
    printf("------------------------- -----    ----------   ----------   ----------\n");
    printf("%-25s %5d    %5d %3d%%   %5d %3d%%   %5d %3d%%\n\n", "TOTALS", nrec, nrec_accept, we_took, nrec_refuse, we_toss, nrec_failed, we_fail);
  }

###############################################################################
  if (polled) {
    printf("Article Transmission (they poll us)\n");
    printf("System                     Conn Offrd  Took   Elapsed       CPU  Pct  Groups\n");
    npoll = 0;
    npoll_offered = 0;
    npoll_took = 0;
    npoll_cpu = 0;
    npoll_ela = 0;

    for(s in poll) {
      npoll += poll[s];
      npoll_offered += poll_offered[s];
      npoll_took += poll_took[s];

      if (rec[s]) {
        printf("%-25s %5d %5d %5d  (see Article Reception)  %s\n", s, poll[s], poll_offered[s], poll_took[s], poll_asked[s]);
      } else {
        npoll_ela += ela[s];
        npoll_cpu += cpu[s];

        e_hours = ela[s] / 3600;
        e_sec   = ela[s] % 3600;
        e_min   = e_sec / 60;
        e_sec   %= 60;

        c_hours = cpu[s] / 3600;
        c_sec   = cpu[s] % 3600;
        c_min   = c_sec / 60;
        c_sec   %= 60;

        tmp = ela[s];
        if (tmp == 0) tmp = 1;
        pct = ((cpu[s] / tmp) * 100.0 + 0.5);

        printf("%-25s %5d %5d %5d %3d:%02d:%02d %3d:%02d:%02d %3d%%  %s\n", s, poll[s], poll_offered[s], poll_took[s], e_hours, e_min, e_sec, c_hours, c_min, c_sec, pct, poll_asked[s]);
      }
    }
    printf("\n%-25s %5d %5d %5d", "TOTALS", npoll, npoll_offered, npoll_took);
    if (npoll_ela > 0 && npoll_cpu > 0) {

      e_hours = npoll_ela / 3600;
      e_sec   = npoll_ela % 3600;
      e_min   = e_sec / 60;
      e_sec   %= 60;

      c_hours = npoll_cpu / 3600;
      c_sec   = npoll_cpu % 3600;
      c_min   = c_sec / 60;
      c_sec   %= 60;

      tmp = npoll_ela;
      if (tmp == 0) tmp = 1;
      pct = ((npoll_cpu / tmp) * 100.0 + 0.5);

      printf(" %3d:%02d:%02d %3d:%02d:%02d %3d%%\n\n", e_hours, e_min, e_sec, c_hours, c_min, c_sec, pct);
    } else
      printf("\n\n");
  }

###############################################################################
  if (transmit) {
    printf("Article Transmission    Offered       Took         Toss         Fail\n");
    printf("Host Contacted          To Them    Total  Pct   Total  Pct   Total  Pct\n");
    for(s in xmt) {
      we_offered = xmt[s];
      if (we_offered == 0) we_offered = 1;
      they_toss = (xmt_refuse[s] / we_offered) * 100 + 0.5;
      they_took = (xmt_accept[s] / we_offered) * 100 + 0.5;
      they_fail = (xmt_failed[s] / we_offered) * 100 + 0.5;

      printf("%-25s %5d    %5d %3d%%   %5d %3d%%   %5d %3d%%\n", s, xmt[s], xmt_accept[s], they_took, xmt_refuse[s], they_toss, xmt_failed[s], they_fail);

      nxmt        += xmt[s];
      nxmt_accept += xmt_accept[s];
      nxmt_refuse += xmt_refuse[s];
      nxmt_failed += xmt_failed[s];
    }

    we_offered = nxmt;
    if (we_offered == 0) we_offered = 1;
    they_toss = (nxmt_refuse / we_offered) * 100 + 0.5;
    they_took = (nxmt_accept / we_offered) * 100 + 0.5;
    they_fail = (nxmt_failed / we_offered) * 100 + 0.5;
    printf("------------------------- -----    ----------   ----------   ----------\n");
    printf("%-25s %5d    %5d %3d%%   %5d %3d%%   %5d %3d%%\n\n", "TOTALS", nxmt, nxmt_accept, they_took, nxmt_refuse, they_toss, nxmt_failed, they_fail);

    printf("Outgoing Transmission Connexions         ------errors-------\n");
    printf("System                     Conn    OK    NS   Net   Rmt  Pct\n");
    for(s in xmt) {
      tot = conn[s];
      if (tot == 0) tot = 1;
      errs = rmt_fail[s] + ns_fail[s] + open_fail[s];
      ok = (conn[s] - errs);
      printf("%-25s %5d %5d %5d %5d %5d %3d%%\n", s, conn[s], ok, ns_fail[s], open_fail[s], rmt_fail[s], (100.0 * errs / tot + 0.5));
      ct_tot += conn[s];
      ct_ok  += ok;
      ct_ns  += ns_fail[s];
      ct_net += open_fail[s];
      ct_rmt += rmt_fail[s];
    }
    tot = ct_tot;
    if (tot == 0) tot = 1;
    errs = ct_ns + ct_net + ct_rmt;
    printf("------------------------- ----- ----- ----- ----- ----- ----\n");
    printf("%-25s %5d %5d %5d %5d %5d %3d%%\n\n", "TOTALS", ct_tot, ct_ok, ct_ns, ct_net, ct_rmt, (100.0 * errs / tot + 0.5));
  }

### Article Readership Statistics

  if (readers) {
    printf("NNTP readership statistics\n");
    printf("System                     Conn Articles Groups Post\n");
    for(s in systems) {

### servers are different animals; they don't belong in this part of the report

      if (servers[s] > 0 && groups[s] == 0 && articles[s] == 0)
        continue;

### report the curious server pokers elsewhere

      if (groups[s] == 0 && articles[s] == 0 && post[s] == 0 && refused[s] != systems[s]) {
        unknown[s] += systems[s];
        curious = 1;
        continue;
      }

      nconn += systems[s];
      nart += articles[s];
      ngrp += groups[s];
      npost += post[s];

      # V7 awk is so damn annoying.  Can't match against variable patterns.
      # so instead i break apart host name and compare elements from the rear
      domain = "";
      nso = split(s, sp, ".");
      for (l in local) {
        nl = split(l, lp, ".");
        ns = nso;
        found = 1;
        while ( nl > 0 ) {
          if ( lp[nl--] != sp[ns--] ) {
            found = 0; nl=0;
          }
        }
       	if (found) domain = "*." l;
      }
      # special-case f*cked up cs dept machines that won't tell me their names
      if (!domain && sp[1] == "128" && sp[2] == "213") domain = "*.cs.rpi.edu";
      if (domain) {
        rep_sys[domain] += systems[s];
        rep_art[domain] += articles[s];
        rep_grp[domain] += groups[s];
        rep_pst[domain] += post[s];
      } else {
        rep_sys[s] = systems[s];
        rep_art[s] = articles[s];
        rep_grp[s] = groups[s];
        rep_pst[s] = post[s];
      }
    }
    for (r in rep_sys) {
      printf("%-25s %5d %8d %6d %4d\n", r, rep_sys[r], rep_art[r], rep_grp[r], rep_pst[r]);
    }
    printf("------------------------- ----- -------- ------ ----\n");
    printf("%-25s %5d %8d %6d %4d\n\n", "TOTALS", nconn, nart, ngrp, npost);
  }

###############################################################################
  if (curious) {
    printf("Unknown NNTP server explorers\n\n");
    printf("System                     Conn\n");
    for(s in unknown) {
      printf("%-25s %5d\n", s, unknown[s]);
    }
    printf("\n");
  }
###############################################################################
  if (timeouts) {
    printf("nntpd timeouts\n");
    for(s in timeout) {
      printf("%-25s %5d\n", s, timeout[s]);
    }
    printf("\n");
  }
  if (splut) {
    printf("Refused connexions\n");
    for(s in refused) {
      if (refused[s] > 0)
        printf("%-25s %5d\n", s, refused[s]);
    }
    printf("\n");
  }
}