[comp.sources.misc] v09i059: analysis of Bnews logfile

allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) (12/13/89)

Posting-number: Volume 9, Issue 59
Submitted-by: jv@mh.nl (Johan Vromans)
Archive-name: scanlog.pl

The following perl program reads a Bnews logfile, and produces reports.

#---------------------------------- cut here ----------------------------------
#!/bin/sh
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Johan Vromans <jv@mhres> on Sun Dec 10 14:53:47 1989
#
# This archive contains:
#	scanlog.pl	
#
# Error checking via wc(1) will be performed.

LANG=""; export LANG

echo x - scanlog.pl
sed 's/^@//' >scanlog.pl <<'@EOF'
#!/usr/bin/perl -s

# This program requires perl version 3.0, patchlevel 4 or higher

# @(#)@ scanlog	1.2 - scanlog.pl

# This program scans a BNews logfile (default "/usr/lob/news/log"),
# and produces statistics.
#
# It is able to generate three different reports.  The reports are
# written to standard output. Unrecognized log entries are written to
# standard error, together with their line numbers in the logfile. These
# entries usually need manual inspection, or may be used to enhance the
# program logic.
# 
# These are the reports:
# 
#  1. Report of articles received, posted, ... for each host
# 
#     For each news host, the following messages are tallied and
#     reported:
#       Recv'd: # of articles received
#       Posted: # of articles posted
#       Xmit'd: # of articles sent to other hosts
#       Ctrl:   # of control messages
#       Cancel: # of cancel messages
#       Junked: # of articles moved to junk
#       Dupl:   # of duplicate, hence rejected, articles
# 
#     Report format:
# 
#       News logfile report from Dec  3 05:21 to Dec 10 04:27
# 
#       Host     Recv'd  Posted  Xmit'd    Ctrl  Cancel  Junked    Dupl
#       ------- ------- ------- ------- ------- ------- ------- -------
#       hp4nl     11653       0    1140      37      30      48       9
#       local         0      18      11       0       0       0       0
#       ------- ------- ------- ------- ------- ------- ------- -------
#       Total     11653      18    1151      37      30      48       9
# 
#  2. Report of non-local newsgroups referenced
# 
#     For each newsgroup which was not localized, the references are
#     tallied and reported:
# 
#       Unrecognized newsgroups from Dec  3 05:21 to Dec 10 04:27
# 
#       Newsgroup                   Refs
#       --------------------------- ----
#       tue.humour                     1
#       philnet.general                1
# 
#  3. Report of new newsgroups created
# 
#     For each newsgroup is indicated if the group was created (i.e.
#     via a newgroup message), and if the spool area was created (i.e.
#     upon receipt of the first article).
# 
#       Newsgroups created from Dec  3 05:21 to Dec 10 04:27
# 
#       Newsgroup          created: group dir
#       --------------------------- ----- ---
#       alt.prose.d                         +
#       alt.recovery                    +   +
# 
# Usage:
# 
#     scanlog [ options ] [ logfile ]
# 
#       options:
#         -report     generate standard report
#         -foreign    generate report of unknown groups
#         -new        generate report of new groups
#        default is all reports.
# 
#       logfile       name of the logfile to use
#        default is "/usr/lib/news/log"
# 
# Copyright 1989 Johan Vromans
# Declared public domain in the hope it may be useful to others as
# well.

# run-time options
#
$do_report = defined $report;
$do_foreign = defined $foreign;
$do_new = defined $new;

# default values
$do_report = $do_new = $do_foreign = 1 
  unless ($do_report | $do_foreign | $do_new);

# default logfile
@@ARGV = ("/usr/lib/news/log") if $#ARGV < 0;

$firstdate = $lastdate = "";

# process logfile
while ( $line = <> ) {

  chop ($line);
  @a = split (/\t/, $line);
  if ( $#a >= 2 ) {		# need three fields
    $lastdate = $a[0];
    $firstdate = $lastdate unless $firstdate;
    $host = $a[1];
    $used{$host}++;

    $msg = join(" ",@a[2..$#a]);	# join, in case the message was split

    # Analysis of messages
    if ( $msg =~ /^received / ) {
      $received{$host}++;
    }
    elsif ( $msg =~ /^Duplicate / ) {
      $duplicate{$host}++;
    }
    elsif ( $msg =~ /^Cancelling / ) {
      $cancelled{$host}++;
    }
    elsif ( $msg =~ /^Ctl Msg / ) {
      $ctrl{$host}++;
      if ( $' =~ /^(\S+) .*: newgroup / ) {
        $created{$1} = "+";
        $brandnew{$1} = "";
      }
    }
    elsif ( $msg =~ /^linecount expected / ) {
      # ignore
    }
    elsif ( $msg =~ /^No valid newsgroups / ) {
      $junked{$host}++;
    }
    elsif ( $msg =~ /^posted / ) {
      $posted{$host}++;
    }
    elsif ( $msg =~ / newsgroup (\S+) not localized$/ ) {
      $foreign{$1}++;
    }
    elsif ( $msg =~ / sent to / ) {
      $xmit{$host}++;
    }

    # Misc
    elsif ( $msg =~ /^make newsgroup (\S+) in dir / ) {
      $brandnew{$1} = "+";
    }

    # These entries are ignored
    elsif ( $msg =~ /^Expired article / ) {
    }
    elsif ( $msg =~ /^Can't cancel .* non-existent$/ ) {
    }

    # Notify
    else {
      print STDERR "? $. $line\n";
    }
  }
  else {
    printf STDERR "? $. $line\n";
  }
}

format std_hdr =
News logfile report from @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
"$firstdate to $lastdate"

Host     Recv'd  Posted  Xmit'd    Ctrl  Cancel  Junked    Dupl
@.
format std_out =
@@<<<<<< @>>>>>> @>>>>>> @>>>>>> @>>>>>> @>>>>>> @>>>>>> @>>>>>>
$host, $received, $posted, $xmit, $ctrl, $cancelled, $junked, $duplicate
@.

if ( $do_report ) {

  $^ = "std_hdr";
  $~ = "std_out";

  $host = $received = $posted = $ctrl = $cancelled = 
    $duplicate = $xmit = $junked = "--------";
  write;

  $Treceived = $Tposted = $Tduplicate = $Txmit = 
    $Tcancelled = $Tctrl = $Tjunked = 0;
  foreach $host ( sort (keys (%used))) {
    $Treceived += ($received = $received{$host});
    $Tposted += ($posted = $posted{$host});
    $Tduplicate += ($duplicate = $duplicate{$host});
    $Tcancelled += ($cancelled = $cancelled{$host});
    $Tctrl += ($ctrl = $ctrl{$host});
    $Txmit += ($xmit = $xmit{$host});
    $Tjunked += ($junked = $junked{$host});
    write;
  }

  $host = $received = $posted = $ctrl = $cancelled = 
    $duplicate = $xmit = $junked = "--------";
  write;

  $host = "Total";
  $received = $Treceived;
  $posted = $Tposted;
  $duplicate = $Tduplicate;
  $xmit = $Txmit;
  $junked = $Tjunked;
  $ctrl = $Tctrl;
  $cancelled = $Tcancelled;
  write;
}

format for_hdr =
Unrecognized newsgroups from @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
"$firstdate to $lastdate"

Newsgroup                      Refs
---------------------------- ------
@.
format for_out =
@@<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>
$group, $refs
@.

if ( $do_foreign) {

  $^ = "for_hdr";
  $~ = "for_out";
  $- = 0;

  @foreign = sort (keys (%foreign));
  if ( $#foreign >= 0 ) {

    foreach $group ( @foreign ) {
      $refs = $foreign{$group};
      write;
    }
  }
}

format new_hdr =
Newsgroups created from @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
"$firstdate to $lastdate"

Newsgroup           Created: group dir
---------------------------- ----- ---
@.
format new_out =
@@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<  @   @
$group, $cr, $dir
@.

if ( $do_new ) {

  $^ = "new_hdr";
  $~ = "new_out";
  $- = 0;

  @brandnew = sort (keys (%brandnew));
  if ( $#brandnew >= 0 ) {

    foreach $group ( @brandnew ) {

      $dir = $brandnew{$group};
      $cr = $created{$group};
      write;
    }
  }
}
@EOF
set `wc -lwc <scanlog.pl`
if test $1$2$3 != 26910176847
then
	echo ERROR: wc results of scanlog.pl are $* should be 269 1017 6847
fi

chmod 444 scanlog.pl

exit 0
-- 
Johan Vromans				       jv@mh.nl via internet backbones
Multihouse Automatisering bv		       uucp: ..!{uunet,hp4nl}!mh.nl!jv
Doesburgweg 7, 2803 PL Gouda, The Netherlands  phone/fax: +31 1820 62944/62500
------------------------ "Arms are made for hugging" -------------------------