[alt.sources] YACLRG

nagel@paris.ics.uci.edu (Mark Nagel) (11/30/89)

Here is a report generator for straight C-news logs written in
Perl.  It has very few 3.0 dependencies and should work fine under
2.0.18.  Specifically, I did not use dbmopen since many C-news sites
might be using dbz instead.  Like another report generator I saw,
this one caches message-ids in a list to minimize the number of
calls out to newshist.  Seems to work quite fast considering the
fact that C-news fails to provide useful information directly in the
log file. 

Be sure to tune the program to your local system before trying
this.  There are three variables you will may to modify:

$MAXARGLEN	Set this to the largest number of characters that
		can appear in an argument list submitted to exec.
		The larger the better.

@local		Set this to a list of regular expressions that match
		sites that really should be local (like if you have
		sites posting via NNTP and you'd like them all to 
		show up as local in the log file).

@gateway	Set this to a list of regular expressions that match
		sites that really should be gateway sites.  For
		example, we have a bunch of mailing lists forwarded
		to local groups and the sending machine always shows
		up as 'local-<something>', so I alias those names to
		be gateways.

Usage
	rep_log [-s] logfile ...

	or

	rep_log [-s] < logfile

The optional -s flag supresses the output of erroneous lines prior
to the main statistics area -- this is useful for submitting log
reports to regional statistics groups or whatever.  The script can
take all log file names on the command line or from standard input.
The log report appears on standard output.

Anyway, that's not much documentation, but hopefully it's enough to
use the script.  Please send any comments or suggestions my way --
I'd be happy to make this even better, faster, stronger...

#! /bin/perl

#
# C News Log Report Generator
#
# Mark Nagel <nagel@ics.uci.edu>
# $Id: rep_log.pl,v 1.6 89/11/29 11:00:51 news Exp $
#
# Large parts of this script were based on the B News log report
# awk script.
#

$NEWSBIN = $ENV{"NEWSBIN"} || "/usr/local/lib/newsbin";
$NEWSCTL = $ENV{"NEWSCTL"} || "/usr/local/lib/news";
$newshist = "$NEWSBIN/maint/newshist";

##############################################################################
# 				customization				     #
##############################################################################

#
# The MAXARGLEN variable controls how many message-ids will be queried
# for at one time via the newshist program.  Tune to your system (make
# as large as allowed).  The length here is the total length in
# characters of all the arguments.
#
$MAXARGLEN = 2048;

#
# The "local" array contains a list of regular expressions that
# identify a site entry in the log file as local.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@local = (
  "uci-ics",			# news server name
  "[^.]*.ics.uci.edu"		# other local client names
);

#
# The "gateway" array contains a list of regular expressions that
# identify a site entry in the log file as a gateway.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@gateway = (
  "local-.*",
  "gateway"
);

##############################################################################
# 			       initialization				     #
##############################################################################

$duplicates = 0;
@msgids = ();
$arglen = 0;

$silent = 0;
while ($_ = $ARGV[0], /^-/) {
  shift;
  last if (/^--$/);
  /^-s/ && ($silent = 1);
}

##############################################################################
# 				log file scan				     #
##############################################################################

while (<>) {
  next if /^$/;			# skip blank lines
  chop;

  #
  # extract fields from line
  #
  ($month,$date,$time,$site,$code,$msgid,@logent) = split;

  #
  # fix up the site name as necessary
  #
  for $regexp (@gateway) {
    if ($site =~ /^$regexp$/i) {
      $site = "(GATEWAY)";
      last;
    }
  }
  for $regexp (@local) {
    if ($site =~ /^$regexp$/i) {
      $site = "local";
      last;
    }
  }
  $site =~ s/\..*$//;

  #
  # check the receipt code
  #
  if ($code eq "-") {			# rejected article
    $reject{$site}++;
    if ($logent[0] eq "duplicate") {
      $duplicates++;
    } elsif ($logent[0] eq "no" && $logent[1] eq "subscribed") {
      #
      # "no subscribed groups in `...'"
      #
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $unsub{$i}++;
      }
    } elsif ($logent[0] eq "all" && $logent[3] eq "excluded") {
      #
      # "all groups `...' excluded in active"
      #
      $ng = $logent[2];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $excluded{$i}++;
      }
    } else {
      #
      # print any others as-is for inspection
      #
      print "$_\n" unless ($silent);
    }
  } elsif ($code eq "+") {		# accepted article
    $accept{$site}++;
    if ($arglen + length($msgid) > $MAXARGLEN) {
      do recordgroups(@msgids);
      @msgids = ($msgid);
      $arglen = length($msgid);
    } else {
      push(@msgids, $msgid);
      $arglen += length($msgid);
    }
    for ($i = 0; $i <= $#logent; $i++) {
      $n = $logent[$i];
      $neighbor{$n} = 1;
      $xmited{$n}++;
    }
  } elsif ($code eq "j") {		# junked after accepted
    $junked{$site}++;
    if ($logent[0] eq "junked") {
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $badng{$i}++;
      }
    }
  } elsif ($code eq "i") {		# ihave message
    $ihave++;
  } elsif ($code eq "s") {		# sendme message
    $sendme++;
  } else {				# illegal/unknown code
    print "$_\n" unless ($silent);
  }
}
do recordgroups(@msgids) if ($#msgids >= 0);

##############################################################################
# 			    statistics generation			     #
##############################################################################

#
# rejected messages
#
$rtot = 0;
while (($key, $val) = each(reject)) {
  if ($val > 0) {
    $list{$key} = 1;
    $rtot += $val;
  }
}

#
# accepted messages
#
$atot = 0;
while (($key, $val) = each %accept) {
  if ($val > 0) {
    $list{$key} = 1;
    $atot += $val;
  }
}

#
# transmitted messages
#
$xtot = 0;
while (($key, $val) = each(xmited)) {
  if ($val > 0) {
    $list{$key} = 1;
    $xtot += $val;
  }
}

#
# junked messages
#
$jtot = 0;
while (($key, $val) = each(junked)) {
  if ($val > 0) {
    $list{$key} = 1;
    $jtot += $val;
  }
}

##############################################################################
# 			      report generation				     #
##############################################################################

#
# Transmission Statistics
#
$totalarticles = $atot + $rtot;
$totalarticles++ if ($totalarticles == 0);
print "\n" unless ($silent);
print "System      \tAccept\tReject\tJunked\tXmit to\t %total\t%reject\n";
for $i (sort(keys(list))) {
  $sitetot = $accept{$i} + $reject{$i};
  $sitetot++ if ($sitetot == 0);
  $articles{$i} = $sitetot;

  printf "%-14s\t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$i, $accept{$i}, $reject{$i}, $junked{$i}, $xmited{$i},
	($sitetot * 100) / $totalarticles, ($reject{$i} * 100) / $sitetot;
}
printf "\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$atot, $rtot, $jtot, $xtot, 100, ($rtot * 100) / $totalarticles;
print "\nTotal Articles processed $totalarticles";
print " (1 duplicate)" if ($duplicates == 1);
print " ($duplicates duplicates)" if ($duplicates > 1);
print "\n";

#
# Netnews Categories
#
if ($atot > 0) {
  print "\nNetnews Categories Received\n";
  $l = 0;
  for $i (keys(ngcount)) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  while (1) {
    $max = 0;
    for $j (keys(ngcount)) {
      if ($ngcount{$j} > $max) {
	$max = $ngcount{$j};
	$i = $j;
      }
    }
    last if ($max == 0);
    printf $fmt, $i, $ngcount{$i};
    $ngcount{$i} = 0;
  }
}

#
# Bad Newsgroups
#
@keys = sort(keys(badng));
if ($#keys >= 0) {
  print "\nBad Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $badng{$i};
  }
}

#
# Unsubscribed Newsgroups
#
@keys = sort(keys(unsub));
if ($#keys >= 0) {
  print "\nUnsubscribed Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $unsub{$i};
  }
}

#
# Excluded Newsgroups
#
@keys = sort(keys(excluded));
if ($#keys >= 0) {
  print "\nExcluded Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $excluded{$i};
  }
}

##############################################################################
# recordgroups(msgid)
#
# Given a list of message-ids, retrieve the newsgroups associated with each
# message-id and update the global ngcount table appropriately.

sub recordgroups {
  local(@msgids) = @_;
  local($i, @groups);

  for ($i = 0; $i <= $#msgids; $i++) {
    $msgids[$i] =~ s/<([^>]*)>/$1/;
  }
  open(NH, "-|") || exec $newshist, '--', @msgids;
  while (<NH>) {
    chop;
    ($_, $_, @groups) = split;
    foreach $i (@groups) {
      $i =~ s/\/.*$//;
      if ($i =~ /\./) {
        $i =~ s/\..*//;
        $ngcount{$i}++;
      }
    }
  }
  close(NH) || warn("exec($newshist): $!\n");
}
-- 
Mark Nagel
UC Irvine Department of ICS   +----------------------------------------+
ARPA: nagel@ics.uci.edu       | Help!  Somebody!  I'm being oppressed! |
UUCP: ucbvax!ucivax!nagel     +----------------------------------------+