[alt.sources] newsgroup information

nagel@ics.uci.edu (Mark Nagel) (03/23/90)

Very few (read: none that I know of) newsreaders actually use the
newsgroups file to show users what's what in the world of Usenet.
It is done for local only newsreaders sometimes, but for distributed
newsreaders where users read news via a central NNTP server, finding
out what group is for what is hard.  I was (and still am) posting a
monthly list to a local group, but then I thought, "Hey!  NNTP has
the LIST newsgroups command now.  Why not use it?"  So, here is my
initial version of nginfo.  It is in perl.  It allows users to query
the NNTP server with a string.  The default behavior (no flags
given) is to use the arguments as regexps to match against newsgroup
descriptions.  This seems to be desirable.  Options allow users to
use soundex matching (slow and I'm not sure exactly why I put it in
there :-) and/or matching against newsgroup names instead.  Please
send comments and/or enhancements my way.  I'd be especially
interested in speeding up the search.  I get the feeling that
associative tables are not always the best way of approaching
things...

Mark

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  nginfo.1 nginfo
# Wrapped by nagel@wintermute.ics.uci.edu on Thu Mar 22 19:08:58 1990
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'nginfo.1' -a "${1}" != "-c" ; then
  echo shar: Will not clobber existing file \"'nginfo.1'\"
else
echo shar: Extracting \"'nginfo.1'\" \(1354 characters\)
sed "s/^X//" >'nginfo.1' <<'END_OF_FILE'
X.\" $Id: nginfo.p,v 1.1 90/03/08 15:06:48 sources Exp Locker: sources $
X.\" Process with `*roff -man`.
X.if n .nr FI 1
X.if t .nr FI .25
X.TH NGINFO P "8 March 1990" "UC Irvine"
X.SH NAME
Xnginfo - retrieve newsgroup descriptions
X.SH SYNOPSIS
X.B nginfo
X[-ns] pattern [pattern...]
X.br
X.SH DESCRIPTION
X.I Nginfo
Xallows you to look up information on one or more newsgroups
Xbased either on the newsgroup name or on the newsgroup description.
XThe -n flag specifies that the pattern(s) given as arguments are
Xto be used to match against the newsgroup name.  The default is to
Xmatch against the newsgroup description.  If the -s flag is given,
Xthe patterns will be searched for using the soundex algorithm,
Xotherwise, each pattern will be interpreted as a regular
Xexpression like in perl(p).  In either case, all patterns must
Xmatch in order for
X.I nginfo
Xto display a newsgroup description.
X.PP
X.I Nginfo
Xworks by contacting the NNTP server and retrieving the newsgroup
Xdescription list.  The default server is obtained by looking at
Xthe file /usr/local/lib/rn/server, but an alternate server may be
Xspecified by setting the NNTPSERVER environment variable.
X.SH AUTHOR
X.nf
XMark Nagel <nagel@ics.uci.edu>
XDepartment of Information and Computer Science
XUniversity of California
XIrvine, CA  92717
X.fi
X.SH "SEE ALSO"
Xperl(p)
X.SH DIAGNOSTICS
XSelf-explanatory.
X.SH BUGS
END_OF_FILE
if test 1354 -ne `wc -c <'nginfo.1'`; then
    echo shar: \"'nginfo.1'\" unpacked with wrong size!
fi
# end of 'nginfo.1'
fi
if test -f 'nginfo' -a "${1}" != "-c" ; then
  echo shar: Will not clobber existing file \"'nginfo'\"
else
echo shar: Extracting \"'nginfo'\" \(4116 characters\)
sed "s/^X//" >'nginfo' <<'END_OF_FILE'
X#! /usr/bin/perl
X
X$rcs = '$Id: nginfo,v 1.1 90/03/08 15:06:47 sources Exp Locker: sources $';
X
X##############################################################################
X# nginfo
X#
X# Look up newsgroup information by contacting a NNTP server.  Match
X# based on newsgroup name(s) or newsgroup description.
X#
X# usage: nginfo [-ns] pattern [pattern...]
X#
X# Mark Nagel <nagel@ics.uci.edu>
X# Department of Information and Computer Science
X# University of California
X# Irvine, CA  92717
X##############################################################################
X
X($prog = $0) =~ s/.*\///;
X
Xdo "sys/socket.h" || die "unable to load socket package\n";
Xdo "getopts.pl" || die "unable to load getopts package\n";
X
X&Getopts("ns") || die "usage: $prog [-s] key\n";
X$match = $opt_s ? "soundex_match" : "regex_match";
X
Xchop($hostname = `hostname`);
Xchop($server = `cat /usr/local/lib/rn/server`);
X$server = $ENV{"NNTPSERVER"} if ($ENV{"NNTPSERVER"} ne "");
X
X#
X# get socket information
X#
X$sockaddr_t = "S n a4 x8";
X($name, $aliases, $proto) = getprotobyname("tcp");
X($name, $aliases, $port) = getservbyname("nntp", "tcp");
X($name, $aliases, $type, $len, $thisaddr) = gethostbyname($hostname);
X($name, $aliases, $type, $len, $thataddr) = gethostbyname($server);
X$this = pack($sockaddr_t, &AF_INET, 0, $thisaddr);
X$that = pack($sockaddr_t, &AF_INET, $port, $thataddr);
X
X#
X# connect to NNTP server
X#
Xsocket(S, &PF_INET, &SOCK_STREAM, $proto) || die "socket: $!\n";
Xbind(S, $this) || die "bind: $!\n";
X$| = 1;
Xprint "Connecting to NNTP server on $server...";
Xconnect(S, $that) || die "connect: $!\n";
Xselect(S); $| = 1; select(STDOUT);
X
X#
X# retrieve opening banner
X#
Xchop($banner = <S>);
Xdie "(rejected by server)\n" if ($banner !~ /^20[01]/);
X($version = $banner) =~ s/.* version ([^ ]*).*/\1/;
Xdie "(version too old)\n" if ($version lt "1.5.7");
Xprint "connected.\n";
X$| = 0;
X
X#
X# retrieve the list of newsgroup names
X#
Xprint S "LIST active\n";
X$response = <S>;
Xdie "unable to retrieve list of newsgroups\n" if ($response !~ /^215 /);
Xwhile (<S>) {
X  chop;
X  chop;
X  last if ($_ eq ".");
X  ($ng) = /([^ \t]*)/;
X  $description{$ng} = "unknown (probably a new group)";
X}
X
X#
X# retrieve the list of newsgroup descriptions
X#
Xprint S "LIST newsgroups\n";
X$response = <S>;
Xdie "unable to retrieve newsgroup descriptions\n" if ($response !~ /^215 /);
Xwhile (<S>) {
X  chop;
X  chop;
X  last if ($_ eq ".");
X  ($ng, $text) = /([^ \t]*)[ \t]*(.*)/;
X  $description{$ng} = $text;
X}
X
X#
X# close NNTP server connection
X#
Xclose(S);
X
X#
X# use information to determine a newsgroup name or to describe a group
X#
Xif ($opt_n) {
X  while (($ng, $text) = each(%description)) {
X    print "$ng\n\t$text\n" if (&$match($ng, @ARGV));
X  }
X} else {
X  while (($ng, $text) = each(%description)) {
X    print "$ng\n\t$text\n" if (&$match($text, @ARGV));
X  }
X}
X
X#############################################################################
X# search for words in a line using soundex matching
X
Xsub soundex_match
X{
X  local($text, @words) = @_;
X  local(%soundkey);
X  local($w, $retval);
X
X  $retval = 1;
X  for $w (split(/[ \t]+/, $text)) {
X    $soundkey{&soundex($w)} = 1;
X  }
X  for $w (@words) {
X    $retval = 0 unless ($soundkey{&soundex($w)});
X  }
X  $retval;
X}
X
X#############################################################################
X# search for regex's in a line
X
Xsub regex_match
X{
X  local($text, @words) = @_;
X  local($w, $retval);
X
X  $retval = 1;
X  for $w (@words) {
X    $retval = 0 unless ($text =~ /$w/i);
X  }
X  $retval;
X}
X
X#############################################################################
X# soundex algorithm
X
Xsub soundex
X{
X  local($word) = @_;
X  local(@code) = (0,1,2,3,0,1,2,0,0,2,2,4,5,5,0,1,2,6,2,3,0,1,0,2,0,2);
X  local($key) = "Z";
X  local($last, $next);
X
X  $word =~ y/a-z/A-Z/;
X  $word =~ s/^[^A-Z]*//;
X  $word =~ s/(.)// && ($key = $1);
X  $last = $code[ord($key) - ord("A")];
X  while ($word ne "" && length($key) != 4) {
X    $word =~ s/^[^A-Z]*//;
X    if ($word =~ s/(.)//) {
X      next if (($next = $code[ord($1) - ord("A")]) == $last);
X      $key .= $last if (($last = $next) != 0);
X    }
X  }
X  $key .= "0" x (4 - length($key));
X}
END_OF_FILE
if test 4116 -ne `wc -c <'nginfo'`; then
    echo shar: \"'nginfo'\" unpacked with wrong size!
fi
chmod +x 'nginfo'
# end of 'nginfo'
fi
echo shar: End of shell archive.
exit 0
--
Mark Nagel
UC Irvine Department of ICS   +----------------------------------------+
ARPA: nagel@ics.uci.edu       | You are in a twisty maze of little     |
UUCP: ucbvax!ucivax!nagel     | newsgroups, all looking alike.         |