[comp.sources.misc] v19i063: log_archie - Script for easy access to Archie, Part01/01

dank@blacks.jpl.nasa.gov (Dan Kegel) (05/14/91)

Submitted-by: Dan Kegel <dank@blacks.jpl.nasa.gov>
Posting-number: Volume 19, Issue 63
Archive-name: log_archie/part01

log_archie is a csh script that logs in to quiche.cs.mcgill.ca to access 
Archie, the Archive Server Listing Service.  Just about any publically 
available program posted to the net can be located with archie.

A history of the session is saved in the file archie.log, and a summarized 
version of the log is placed in the file archie.logg.  Each line in archie.logg
gives the FTP hostname, the directory name, the file name, and the file size 
and date. Since each line in archie.logg contains complete information, you can
operate on it with grep and sort conveniently.  

Dan
----
#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# Contents:  Makefile clipline.1 clipline.c log_archie log_archie.1
#   postarchie.awk
# Wrapped by kent@sparky on Tue May 14 11:28:29 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
echo If this archive is complete, you will see the following message:
echo '          "shar: End of archive 1 (of 1)."'
if test -f 'Makefile' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'Makefile'\"
else
  echo shar: Extracting \"'Makefile'\" \(589 characters\)
  sed "s/^X//" >'Makefile' <<'END_OF_FILE'
X# Makefile for log_archie utility
X
X# Set BIN and MAN to the directories you want to install log_archie executable
X# and man pages.
XBIN = ../bin
XMAN = ../man
XSECTION = man1
X
Xall: clipline log_archie
X
Xinstall: all
X	cp clipline $(BIN)
X	chmod +x log_archie
X	cp log_archie $(BIN)
X	chmod +x postarchie.awk
X	cp postarchie.awk $(BIN)
X	cp clipline.1 log_archie.1 $(MAN)/$(SECTION)
X	# Catman -M works properly on SunOS, creating $(MAN)/whatis, but on 
X	# Concentrix this tries to overwrite /usr/lib/whatis.
X	# Let's be careful and only run it if running on a Sun workstation.
X	sun
X	catman -M $(MAN)
END_OF_FILE
  if test 589 -ne `wc -c <'Makefile'`; then
    echo shar: \"'Makefile'\" unpacked with wrong size!
  fi
  # end of 'Makefile'
fi
if test -f 'clipline.1' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'clipline.1'\"
else
  echo shar: Extracting \"'clipline.1'\" \(540 characters\)
  sed "s/^X//" >'clipline.1' <<'END_OF_FILE'
X.TH CLIPLINE 1
X.SH NAME
Xclipline \- clip long lines, filter out ^M and backspace
X.SH SYNOPSIS
Xclipline < infile > outfile
X.SH DESCRIPTION
X\fIclipline\fR
Xis a stupid C program used to postprocess script files from logins to
Xremote hosts to execute backspaces and delete ^M characters, and to clip 
Xlines longer than 200 characters.
XLong lines make all Unix tools (vi, awk, cut, etc.) choke.
X.LP
XThis is especially useful for the output of Archie, as its running status
Xmessage becomes a single output line many thousands of characters long.
END_OF_FILE
  if test 540 -ne `wc -c <'clipline.1'`; then
    echo shar: \"'clipline.1'\" unpacked with wrong size!
  fi
  # end of 'clipline.1'
fi
if test -f 'clipline.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'clipline.c'\"
else
  echo shar: Extracting \"'clipline.c'\" \(775 characters\)
  sed "s/^X//" >'clipline.c' <<'END_OF_FILE'
X/*--------------------------------------------------------------------------
X Program to display the first COLS columns of a bunch of text lines.
X Deletes ^M and backspace characters.
X No line length limit (unlike cut or awk).
X--------------------------------------------------------------------------*/
X#include <stdio.h>
X
X#define COLS 200
X
Xmain()
X{
X    char obuf[COLS+1];
X    int c;
X    int i;
X
X    i = 0;
X    while ((c=getchar()) != EOF) {
X	switch (c) {
X	case '\n':	/* newline */
X	    obuf[i] = 0;
X	    puts(obuf);
X	    i = 0;
X	    break;
X	case '\r':	/* return */
X	    break;
X	case '\010': 	/* backspace */
X	    if (i > 0) i--;
X	    break;
X	default:
X	    if (i < COLS)
X		obuf[i++] = c;
X	    break;
X	}
X    }
X    if (i > 0) {
X	obuf[i] = 0;
X	puts(obuf);
X    }
X    exit(0);
X}
END_OF_FILE
  if test 775 -ne `wc -c <'clipline.c'`; then
    echo shar: \"'clipline.c'\" unpacked with wrong size!
  fi
  # end of 'clipline.c'
fi
if test -f 'log_archie' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'log_archie'\"
else
  echo shar: Extracting \"'log_archie'\" \(365 characters\)
  sed "s/^X//" >'log_archie' <<'END_OF_FILE'
X#!/bin/csh -f
X# Log in to Archie
Xset LOG=archie.log
Xecho Logging in to archie.  Saving session as ${LOG}.
Xrsh quiche.cs.mcgill.ca -l archie | tee ${LOG}.tmp
X# Postprocess log to delete ^M and clip long lines:
Xclipline < ${LOG}.tmp > ${LOG}
Xpostarchie.awk ${LOG} > ${LOG}g
Xrm ${LOG}.tmp
Xecho Session complete.  Session saved in ${LOG}, grep-able version in ${LOG}g.
END_OF_FILE
  if test 365 -ne `wc -c <'log_archie'`; then
    echo shar: \"'log_archie'\" unpacked with wrong size!
  fi
  # end of 'log_archie'
fi
if test -f 'log_archie.1' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'log_archie.1'\"
else
  echo shar: Extracting \"'log_archie.1'\" \(13950 characters\)
  sed "s/^X//" >'log_archie.1' <<'END_OF_FILE'
X.TH ARCHIE 1
X.SH NAME
Xlog_archie \- log in to archive server, save session in log file
X.SH SYNOPSIS
Xlog_archie
X.SH DESCRIPTION
X\fIlog_archie\fR
Xlogs in to quiche.cs.mcgill.ca to access Archie, the totally awesome database
Xof all source code in the universe.  Just about any public-domain program
Xposted to the net can be located with this interactive tool.
X.SH OUTPUT FILES
XA history of the session is saved in the file archie.log, and a summarized version 
Xof the log is placed in the file archie.logg.  Each line in archie.logg gives 
Xthe FTP hostname, the directory name, the file name, and the file size and date.
XSince each line in archie.logg contains complete information, you can operate on
Xit with grep and sort conveniently.  (The lines are quite wide, though, which
Xsome may find annoying.)
X.SH EXAMPLE
XLet's say you're looking for a program to convert a certain kind of file
Xto Postscript.  This kind of program tends to have the word '2ps' in the title.
XTo search for all programs like this, type
X.nf
Xlog_archie
Xprog 2ps
Xquit
X.fi
X.LP
XAll the matching programs and their FTP sites and directories are displayed on
Xthe screen and saved in archie.log and archie.logg.  
XTo look for only sites in Japan, and sort by filename, you would then type
X.nf
Xgrep '\\.jp' archie.logg | sort \-b +2
X.fi
X(Only users of SunOS need the \-b option; this works around a bug in SunOS's sort
Xutility.)
X
X.SH README
XHere's the information posted by the Archie Group about their server:
X.nf
X.DS
X
XArchie 2.0
X----------
X
XThe "Archie Group" of McGill University is pleased to announce Archie,
Xthe "Archive Server Server" Version 2.0.
X
X
X
X
X
XMcGill University Operating "archie"
X------------------------------------
X- An Internet  Archive Server Listing Service
X----------------------------------------------
X
XGiven the number of hosts being used as archive sites nowadays, there can
Xbe great difficulty in finding needed software in a distributed
Xenvironment. You may know that the software that you need is out there,
Xbut it can sometimes be difficult to find.  The School of Computer
XScience at McGill University has one solution to the problem - "archie".
X
XSince the announcement of the dedicated-database version of archie in
XNovember, the popularity of the program has grown by leaps and bounds.
X>From an average of about 30 logins/day in November we are now averaging
Xover 500 with our all-time high coming in at 700 for a single day.
X(Our ex-boss owes us lunch for the 500+ mark :-). Archie's email
Xinterface averages about 40/day and anonymous ftp to quiche (for
Xretrieval of the compressed site listings files in ~ftp/archie/listings)
Xis over 70/day. Needless to say, quiche is a well-used system right
Xabout now :-)
X
X
XGetting To The Point:
X---------------------
X
XSo how do you get to use archie? If you are Internet connected, it's
Xeasy. Telnet to quiche.cs.mcgill.ca (132.206.2.3 or 132.206.51.1) and
Xlogin as user "archie". You should get a banner message and status
Xreport on our latest additions (there's no password, although we do log
Xthe sessions to provide rudimentary stats). "help" gets a list of valid
Xcommands. Feedback welcome and can be sent to archie-l@cs.mcgill.ca
X
X
XNOTE:  The following changes only apply to the interactive version of
Xarchie (the one you see when you telnet or rlogin to quiche) and NOT to
Xthe E-Mail interface. We will hopefully be overhauling that interface in
Xthe coming week(s).
X
X
XQuick Summary
X-------------
X
XFor those of you who don't want to read the whole thing, here's a quick
Xsummary of what's new in V2.0. If you want the full explanation, skip to
Xthe next section. Otherwise, see the archie online help facility.
X
X
X(a) Speed and performance under load should be improved. Feedback (to
Xarchie-l@cs.mcgill.ca) on this would be appreciated.
X
X(b) 3 new searching methods added. See help section under "set search".
X
X(c) Output may now be sorted. See help under "set sortby".
X
X(d) New Software Description database to help you find the names of
Xpackages to do what you want done, as well as an RFC index and other
Xuseful information. See help under "whatis".
X
X(e) New "mail" command allows you to mail archie results back to you.
XSay goodbye to those hated script sessions :-). See help under
X"mail" and "set mailto".
X
X(f) "list" command now tells the truth. Help "list".
X
X(g) A "status" variable allows you to turn on or off search progress
Xinformation. Help "set status".
X
X
X
XChanges in Version 2.0 
X---------------------- 
X
XThanks to all the feedback we've gotten over the past couple of months,
Xwe have modified archie into what we hope will be a more friendly and
Xefficient service. 
X
X
XThe changes in V2.0 are:
X
X
X
X(1) Speed & Implementation
X----------------------
X
XFor faster execution, Archie has been rewritten using a shared memory
Xmodel which greatly improves execution times especially when the
Xhost on which archie is running is under load [which, for those of
Xyou who use archie regularly, know that quiche has been for some
Xtime now :-]. This model also allows for much faster database
Xupdates. We'd appreciate feedback on what kind of response times you
Xare getting (subjective rather than objective).
X
X
X(2) Searching
X---------
X
XWider range of search methods. Until this point, archie could only
Xsearch using regular expressions (as defined in ed(1)). Since most
Xusers don't require the power of regex's (and many who don't use them
Xregularly have (understandably) trouble composing them), 3 new search
Xmethods have been added, bringing the total to 4.
X
X
XTo change the search method, set the "search" variable and use the
X"prog" command per usual.  Command line options are in the works
Xbut have not yet been incorporated into this version of archie. The
Xvalue of the search variable for each method is listed in brackets
X'[ ]' below.  Type "help set search" at the "archie>" prompt if you
Xwant more info.
X
X(1) Substring (case insensitive) ["sub"]. As above but ignoring
Xthe case of the strings involved. Speed about on par with the
Xregex equivalent.
X
X(2) Substring (case sensitive) ["subcase"]. A simple, everyday substring
Xsearch. A match occurs if the the file (or directory) name in
Xthe database contains the user-given substring. Slightly faster
Xthan the equivalent regex.
X
X(3) Exact match ["exact"]. The fastest search method of all.  The
Xrestriction is that the user (search) string has to exactly
Xmatch (including case) the string in the database. Provided for
Xthose of who who know just what you are looking for. For example,
Xif you wanted to know where all the xlock.tar.Z files were, this
Xis the kind of search to use. [For those of you that are
Xinterested, the search is O(1) in this case via the magic of
Xdbm].
X
X(4) Regex ["regex"]. The "old" method. Searches the database with
Xthe user (search) string which is given in the form of an ed(1)
Xregular expression. This is the DEFAULT search method.
X
X
XNote  : The "status" line that used to appear when the "pager" variable
Xwas set and the search was proceeding (showing the number of
Xmatches found and the percentage of the database) can be enabled
Xor disabled by the use of the "status" variable, which can
Xeither be set or unset depending on if you want the line to be
Xdisplayed or not. Therfore there will be no search ouput
Xdisplayed until the search is complete or aborted by the user.
X
X
X(3) Sorting
X-------
X
XOrdering the output. Archie V1.X had no concept of sorted output,
Xexcept for the fact that we tried to do the updates in lexical order
Xso that the output would be (mostly) sorted in that order. It didn't
Xwork. Consequently, you may now sort your 'prog' command output in 5
Xdifferent ways.  For each method, the "natural" sort order (or at
Xleast, what we consider to be the natural order) is the default.
X
XTo change the sort method, set the "sortby" variable.  The value of
Xthe sortby variable for each method is listed in brackets '[ ]'
Xbelow. Command line options are not available at this time. 
X
XThe reverse sorting orders from those described here are obtained by
Xprepending "r" to the sortby value given. (Eg. reverse hostname
Xorder "hostname" is "rhostname").
X
X(1) Hostname order ["hostname"]. Output is sorted on the archive
Xhostname in lexical order. 
X
X(2) File/Directory name modification time ["time"]. Output is sorted
Xwith the most recent modifcation times of the found
Xfile/directory names coming first (youngest -> oldest).
X
X(3) File/Directory size ["size"]. Output is sorted by the size of
Xthe found files/directories, largest first.
X
X(4) File/Directory name lexical order ["filename"]. 
X
X(5) Database order ["none"]. In other words, effectively non sorted.
XThis is the default order and is the one that most users of
Xarchie 1.X versions will be used to.
X
X
X
XNote: Typing the keyboard interrupt character ( Ctl-C for most people on
XUNIX) during a search will cause the search to aborted. The
Xresults up to that time will be sorted (determined by the value of
Xthe sortby variable) and the results output. Typing an abort character
Xduring the sort will cause that to be aborted. Results up to that
Xpoint will be output.
X
X
X(4) PD Software Description Database
X--------------------------------
X
X
XA new database, similar to the one that the man(1) UNIX command uses
Xwhen doing a "keyword" ( -k option ) lookup has been added to
Xarchie. The database currently contains about 2600 entries that we
Xhave gleaned from various sources (such as the comp.sources.*,
Xalt.sources and RFC indices).  The format is basically the name of a
XPD program, document, or software package followed by a short
Xdescription of said object. 
X
XThe command is "whatis" and takes a (sub)string as an argument. All
Xlines in the database containing that substring (case insensitive)
Xwill be printed.
X
XI think such a beast would be very useful if it were properly
Xmaintained. These current entries should be considered the mere start
Xof the database and I'm depending on all you authors and maintainers
Xout there to send me additions, corrections and updates to the
Xvarious entries in the database. All such info should be sent to 
X
X
Xarchie-admin@cs.mcgill.ca
X
X
XAll entries are welcome, and I'll endevour to keep the database
Xuptodate. I have not finialized what will and will not be in it so
Xsend whatever you have along and I'll make up the policy as we go
Xalong.
X
X
X
X
X(5) Getting rid of those crummy "script" sessions
X-------------------------------------------
X
XYour days of typing "script" before every interactive archie session
Xare now over: archie can now mail you the results of your interactive 
Xsessions. It works like this: 
X
X(a) Set the "mailto" variable to your E-mail address
X
X(b) Run archie as you normally would. When you get a result that you
Xwant to keep a record of (and after you have finished browsing
Xthrough it if you have the pager set on) type "mail". Archie will
Xautomatically forward the results of the last request (site,
Xprog, etc) to the email address set before. If you have not set
Xthe address in the mailto variable you may specify one on the
Xcommand line to the "mail" command. [If you do neither, and type
X"mail", archie will tell you].
X
X(c) The mail is sent asynchronously (you don't have to wait for it to
Xbe sent). You will be informed when it is complete.
X
XIf the generated output from archie is greater than 45K bytes, it
Xwill automatically be split it into as many parts as required to get
Xit to you in chunks this size or less. This is so as to cooperate with
Xcertain mail systems which don't handle 50+ K chunks. [Many thanks to
XMark Crispin's c-client library of mail routines which made this code
XSOOOO much easier]
X
XNote: For those of you who have to do source routing for your email,
Xremember that the mail address given has to be a path from our
Xmachines to yours. Our mail setup here is pretty darned good
X(if I might say so myself) so the results should get to you in
Xreasonable time (there's no queueing on our part unless the load
Xgets abnormally high).
X
X
X(6)  What achive sites does archie know about ?
X-----------------------------------
X
XThe "list" command which has been out for a couple of weeks under
Xversion 1.3 is now formally part of archie. This command allows you
Xto specify a regular expression as an argument and prints the site
Xnames in the database which match that expression, along with the
Xprimary IP address of the site and the date that archie last updated
Xthe site for the database. "list" without an argument prints the
Xdata on all sites that archie knows about.
X
X
X
X(7)  Getting kicked off for loitering
X--------------------------------
X
XArchie now has an autologout feature (well, actually it has had one
Xfor the past couple of weeks, but we're now telling you about it
X:-). If you hang around for too long without doing anything, we'll
Xbump you off and free up the resources for the next person along. We
Xaren't very strict on this and, in fact, you can set the autologout
Xperiod yourself, varying from 1 minute to 5 hours, with 1 hour being
Xthe default. The variable "autologout" controls this feature.
X
X
X
XThings to be done
X-----------------
X
XA couple of things on our wishlist that still haven't been done:
X
X(1) Restricting searches to specific sites (soon hopefully).
X
X(2) Non UNIX sites aren't in the database (soon, maybe).
X
X(2) GUI interface (a little further off).
X
X
XThe email interface will have to be brought up to the level of the
Xinteractive interface (as well as fixing some pretty annoying bugs
Xin it), and hopefully that will be done fairly soon.
X
X
XThat's all for the moment folks. We would really like to see that
X"whatis" database get off of the ground and all contributions are welcome.
X
XIf you have any comment, suggestions or constructive critisism, please
Xdon't hesitate to drop us a line at
X
Xarchie-l@cs.mcgill.ca
X
X
XIt was your comments which led to the above improvements and we'd like to
Xkeep hearing from you.
X
X
X- The "Archie Group":  Bill Heelan (wheelan@cs.mcgill.ca)
XPeter Deutsch (peterd@cc.mcgill.ca)
XAlan Emtage (bajan@cs.mcgill.ca)
X.DE
X.fi
END_OF_FILE
  if test 13950 -ne `wc -c <'log_archie.1'`; then
    echo shar: \"'log_archie.1'\" unpacked with wrong size!
  fi
  # end of 'log_archie.1'
fi
if test -f 'postarchie.awk' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'postarchie.awk'\"
else
  echo shar: Extracting \"'postarchie.awk'\" \(559 characters\)
  sed "s/^X//" >'postarchie.awk' <<'END_OF_FILE'
X#!/bin/awk -f
X# Awk script to produce a single-line-per-file output format for archie.
X# Output format is
X#  hostname	directory	filename	size	date
X# padded with many tabs.
X# You WILL run into a bug in SunOS<=4.1.1b's sort utility if you run this stuff
X# through 'sort +2'; sort fails to ignore whitespace properly.
X/^Host / {host=$2 substr("\t\t\t\t\t\t", 1, (47-length($2))/8); next}
X/^ *Location:/ {dir=$2 substr("\t\t\t\t\t\t\t\t", 1, (63-length($2))/8); next}
X/^ *FILE/ { print host dir $7 substr("\t\t\t\t", 1, (31-length($7))/8) $3 " " $4 " " $5; next}
END_OF_FILE
  if test 559 -ne `wc -c <'postarchie.awk'`; then
    echo shar: \"'postarchie.awk'\" unpacked with wrong size!
  fi
  # end of 'postarchie.awk'
fi
echo shar: End of archive 1 \(of 1\).
cp /dev/null ark1isdone
MISSING=""
for I in 1 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have the archive.
    rm -f ark[1-9]isdone
else
    echo You still must unpack the following archives:
    echo "        " ${MISSING}
fi
exit 0
exit 0 # Just in case...
-- 
Kent Landfield                   INTERNET: kent@sparky.IMD.Sterling.COM
Sterling Software, IMD           UUCP:     uunet!sparky!kent
Phone:    (402) 291-8300         FAX:      (402) 291-4362
Please send comp.sources.misc-related mail to kent@uunet.uu.net.