[news.software.b] Wanted: C news log file report generator

tar@ksuvax1.cis.ksu.edu (Tim Ramsey) (02/16/90)

I'm looking for an awk/perl script that will produce a report summary from
the C news log file.  Does anyone have something they could send to me?

If it matters, I'm running C news at the latest patchdate on an ATT 3B15
running SysV 2.1.2.

Thanks in advance.
--
Tim Ramsey                         Dept. of Computing and Information Sciences
Internet: tar@ksuvax1.cis.ksu.edu  Kansas State University, Manhattan KS 66506
UUCP:  ...!{rutgers,texbell}!ksuvax1!tar   (913) 539-4977 (voice) 2-7114 (FAX)

eastick@me.utoronto.ca (Doug Eastick) (02/17/90)

tar@ksuvax1.cis.ksu.edu (Tim Ramsey) writes:
>I'm looking for an awk/perl script that will produce a report summary from
>the C news log file.  Does anyone have something they could send to me?

I assume there will be some ``me too'' requests, so here's the one I
grabbed a while ago.

-----

#!/usr/local/bin/perl
#
# C News Log Report Generator
#
# Mark Nagel <nagel@ics.uci.edu>
# $Id: rep_log.pl,v 1.6 89/11/29 11:00:51 news Exp $
#
# Large parts of this script were based on the B News log report
# awk script.
#

$NEWSBIN = $ENV{"NEWSBIN"} || "/usr/lib/newsbin";
$NEWSCTL = $ENV{"NEWSCTL"} || "/usr/lib/news";
$newshist = "$NEWSBIN/maint/newshist";

##############################################################################
# 				customization				     #
##############################################################################

#
# The MAXARGLEN variable controls how many message-ids will be queried
# for at one time via the newshist program.  Tune to your system (make
# as large as allowed).  The length here is the total length in
# characters of all the arguments.
#
$MAXARGLEN = 2048;

#
# The "local" array contains a list of regular expressions that
# identify a site entry in the log file as local.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@local = (
  "me",			# news server name
  "[^.]*.ics.uci.edu"		# other local client names
);

#
# The "gateway" array contains a list of regular expressions that
# identify a site entry in the log file as a gateway.  Each regular
# expression will be matched case-independently and anchored at the
# beginning/end.
#
@gateway = (
  "local-.*",
  "gateway"
);

##############################################################################
# 			       initialization				     #
##############################################################################

$duplicates = 0;
@msgids = ();
$arglen = 0;

$silent = 0;
while ($_ = $ARGV[0], /^-/) {
  shift;
  last if (/^--$/);
  /^-s/ && ($silent = 1);
}

##############################################################################
# 				log file scan				     #
##############################################################################

while (<>) {
  next if /^$/;			# skip blank lines
  chop;

  #
  # extract fields from line
  #
  ($month,$date,$time,$site,$code,$msgid,@logent) = split;

  #
  # fix up the site name as necessary
  #
  for $regexp (@gateway) {
    if ($site =~ /^$regexp$/i) {
      $site = "(GATEWAY)";
      last;
    }
  }
  for $regexp (@local) {
    if ($site =~ /^$regexp$/i) {
      $site = "local";
      last;
    }
  }
  # $site =~ s/\..*$//;

  #
  # check the receipt code
  #
  if ($code eq "-") {			# rejected article
    $reject{$site}++;
    if ($logent[0] eq "duplicate") {
      $duplicates++;
    } elsif ($logent[0] eq "no" && $logent[1] eq "subscribed") {
      #
      # "no subscribed groups in `...'"
      #
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $unsub{$i}++;
      }
    } elsif ($logent[0] eq "all" && $logent[3] eq "excluded") {
      #
      # "all groups `...' excluded in active"
      #
      $ng = $logent[2];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $excluded{$i}++;
      }
    } else {
      #
      # print any others as-is for inspection
      #
      print "$_\n" unless ($silent);
    }
  } elsif ($code eq "+") {		# accepted article
    $accept{$site}++;
    if ($arglen + length($msgid) > $MAXARGLEN) {
      do recordgroups(@msgids);
      @msgids = ($msgid);
      $arglen = length($msgid);
    } else {
      push(@msgids, $msgid);
      $arglen += length($msgid);
    }
    for ($i = 0; $i <= $#logent; $i++) {
      $n = $logent[$i];
      $neighbor{$n} = 1;
      $xmited{$n}++;
    }
  } elsif ($code eq "j") {		# junked after accepted
    $junked{$site}++;
    if ($logent[0] eq "junked") {
      $ng = $logent[4];
      $ng =~ s/`([^']*)'/$1/;
      @ng = split(/,/, $ng);
      for $i (@ng) {
        $badng{$i}++;
      }
    }
  } elsif ($code eq "i") {		# ihave message
    $ihave++;
  } elsif ($code eq "s") {		# sendme message
    $sendme++;
  } else {				# illegal/unknown code
    print "$_\n" unless ($silent);
  }
}
do recordgroups(@msgids) if ($#msgids >= 0);

##############################################################################
# 			    statistics generation			     #
##############################################################################

#
# rejected messages
#
$rtot = 0;
while (($key, $val) = each(reject)) {
  if ($val > 0) {
    $list{$key} = 1;
    $rtot += $val;
  }
}

#
# accepted messages
#
$atot = 0;
while (($key, $val) = each %accept) {
  if ($val > 0) {
    $list{$key} = 1;
    $atot += $val;
  }
}

#
# transmitted messages
#
$xtot = 0;
while (($key, $val) = each(xmited)) {
  if ($val > 0) {
    $list{$key} = 1;
    $xtot += $val;
  }
}

#
# junked messages
#
$jtot = 0;
while (($key, $val) = each(junked)) {
  if ($val > 0) {
    $list{$key} = 1;
    $jtot += $val;
  }
}

##############################################################################
# 			      report generation				     #
##############################################################################

#
# Transmission Statistics
#
$totalarticles = $atot + $rtot;
$totalarticles++ if ($totalarticles == 0);
print "\n" unless ($silent);
print "System      \tAccept\tReject\tJunked\tXmit to\t %total\t%reject\n";
for $i (sort(keys(list))) {
  $sitetot = $accept{$i} + $reject{$i};
  $sitetot++ if ($sitetot == 0);
  $articles{$i} = $sitetot;

  printf "%-14.14s\t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$i, $accept{$i}, $reject{$i}, $junked{$i}, $xmited{$i},
	($sitetot * 100) / $totalarticles, ($reject{$i} * 100) / $sitetot;
}
printf "\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%6d%%\t%6d%%\n",
	$atot, $rtot, $jtot, $xtot, 100, ($rtot * 100) / $totalarticles;
print "\nTotal Articles processed $totalarticles";
print " (1 duplicate)" if ($duplicates == 1);
print " ($duplicates duplicates)" if ($duplicates > 1);
print "\n";

#
# Netnews Categories
#
if ($atot > 0) {
  print "\nNetnews Categories Received\n";
  $l = 0;
  for $i (keys(ngcount)) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  while (1) {
    $max = 0;
    for $j (keys(ngcount)) {
      if ($ngcount{$j} > $max) {
	$max = $ngcount{$j};
	$i = $j;
      }
    }
    last if ($max == 0);
    printf $fmt, $i, $ngcount{$i};
    $ngcount{$i} = 0;
  }
}

#
# Bad Newsgroups
#
@keys = sort(keys(badng));
if ($#keys >= 0) {
  print "\nBad Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $badng{$i};
  }
}

#
# Unsubscribed Newsgroups
#
@keys = sort(keys(unsub));
if ($#keys >= 0) {
  print "\nUnsubscribed Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $unsub{$i};
  }
}

#
# Excluded Newsgroups
#
@keys = sort(keys(excluded));
if ($#keys >= 0) {
  print "\nExcluded Newsgroups Received\n";
  $l = 0;
  for $i (@keys) {
    $l = length($i) if ($l < length($i));
  }
  $fmt = "%-${l}s %d\n";
  for $i (@keys) {
    printf $fmt, $i, $excluded{$i};
  }
}

##############################################################################
# recordgroups(msgid)
#
# Given a list of message-ids, retrieve the newsgroups associated with each
# message-id and update the global ngcount table appropriately.

sub recordgroups {
  local(@msgids) = @_;
  local($i, @groups);

  for ($i = 0; $i <= $#msgids; $i++) {
    $msgids[$i] =~ s/<([^>]*)>/$1/;
  }
  open(NH, "-|") || exec $newshist, '--', @msgids;
  while (<NH>) {
    chop;
    ($_, $_, @groups) = split;
    foreach $i (@groups) {
      $i =~ s/\/.*$//;
      if ($i =~ /\./) {
        $i =~ s/\..*//;
        $ngcount{$i}++;
      }
    }
  }
  close(NH) || warn("exec($newshist): $!\n");
}
--
Doug Eastick -- eastick@me.utoronto.ca

ross@contact.uucp (Ross Ridge) (02/18/90)

This is a awk file I wrote for generating a simple report of the C news log
file. You'll probably want to edit this to taste, for instance if you 
junk a lot of newsgroups you may want to remove the part that list them.
Also you may want to change the size of some the fields according to
the number of articles yor system processes.

This is an excerpt from our $NEWSBIN/maint/newsdaily file. It mails 
generated report daily and appends the table part of the report to
$NEWSCTL/stats.

# keep one generation of log -- it's big
rm -f log.o
mv log log.o && >log
awk -f $NEWSBIN/maint/newsstats.awk log.o > stats.last
mail -s "News statistics" $gurus < stats.last
date >> stats
sed '/^$/,$d' stats.last >> stats
echo "" >> stats

I believe this awk file should work under olds awks, but I don't have
one handy to try it out. Let me know if you find any bugs or campatibility
problems. (We don't do ihave/sendme so it may not report this correctly.)

								Ross Ridge

---- cut here ---
#
# newsstats.awk -- by Ross Ridge (ross@contact.uucp) Public Domain
#

$5 == "+"      {
		accepted[$4]++;
		for(i = 7; i <= NF; i++) {
			sent[$i]++;
			name[$i] = $i;
		}
               }
$5 == "-"      {
		rejected[$4]++;
		s = ""
		for(i = 7; i <= NF; i++)
			s = s " " $i
		why[s]++
               }
$5 == "j"      {
		junked[$4]++;
		badngs[$NF]++;
	       }
$5 == "i"      {
		ihave[$8]++;
               }
$5 == "s"      {
		sendme[$8]++;
	       }
               {
		if ($5 ~ /[-+j]/) {
			total[$4]++;
			name[$4] = $4;
		} else if ($5 ~ /is/) {
			total[$8]++;
			name[$8] = $8;
		} else
			unknown++;
	       }
END	       {
#		       123456789012345678901234567890123456789012345678901234567890112345678901
		print "Sitename              Accept Reject Junked I-Have Sendme   Total    Sent"
		print "------------------------------------------------------------------------"
		for (s in name) {
			printf("%-20s  ", s);
			if (accepted[s] == "")
				printf("       ");
			else {
				printf("%6d ", accepted[s]);
				atotal += accepted[s];
			}
			if (rejected[s] == "")
				printf("       ");
			else {
				printf("%6d ", rejected[s]);
				rtotal += rejected[s];
			}
			if (junked[s] == "")
				printf("       ");
			else {
				printf("%6d ", junked[s]);
				jtotal += junked[s];
			}
			if (ihave[s] == "")
				printf("       ");
			else {
				printf("%6d ", ihave[s]);
				itotal += ihave[s];
			}
			if (sendme[s] == "")
				printf("       ");
			else {
				printf("%6d ", sendme[s]);
				stotal += sendme[s];
			}
			if (total[s] == "")
				printf("        ");
			else {
				printf(" %6d ", total[s]);
				thetotal += total[s];
			}
			if (sent[s] == "") 
				printf("\n");
			else {
				printf(" %6d\n", sent[s]);
				senttotal += sent[s];
			}
		}
		printf("%20s  ", "Totals:");
		if (atotal == "")
			printf("       ");
		else
			printf("%6d ", atotal);
		if (rtotal == "")
			printf("       ");
		else
			printf("%6d ", rtotal);
		if (jtotal == "")
			printf("       ");
		else
			printf("%6d ", jtotal);
		if (itotal == "")
			printf("       ");
		else
			printf("%6d ", itotal);
		if (stotal == "")
			printf("       ");
		else
			printf("%6d ", stotal);
		if (thetotal == "")
			printf("        ");
		else
			printf(" %6d ", thetotal);
		if (senttotal == "")
			printf("\n");
		else
			printf(" %6d\n", senttotal);
		print
		print "Junked Newsgroups:"
		for (s in badngs)
			printf("\t%s: %d\n", s, badngs[s]);
		print
		print "Why aritcles were rejected:"
		for (s in why)
			printf("\t%s: %d\n", s, why[s]);
		print
		if (unknown != "")
			printf("\nUnknown lines: %d\n", unknown);
	       }

---- cut here ----
-- 
Ross Ridge								 //
"The Great HTMU"							[oo]
ross@contact.uucp							-()-
ross@watcsc.waterloo.edu						 //