[alt.sources] better man && makewhatis

tchrist@convex.com (Tom Christiansen) (02/21/90)

Here is a reworked version of /usr/ucb/man and /usr/lib/makewhatis.
This is the beta version.  The final version will make its way to
comp.sources.misc; please send bug reports and enhancement requests
to me.  (Thanks in advance!)

They both use (and require) DBM versions of the whatis database.
you probably want to have NDBM to use this because I keep separate
databases for each element in the colon-delimited $MANPATH.  This
makes for extremely rapid location of man pages and whatis information.
A text whatis file is kept around for apropos.

Here are other features of this version of man:

    *	no restrictions on what subsections go in what sections.
	if you want subsection 8v or even 9j, just add man pages
	for it.

    *   you may define your own section search order.

    *   you don't need to have links to man pages or 
	short duplicate entries with just .so's in them
	for multiple references (saving inodes and obsoleting
	the recently posted .so burster).

    *	a -w flag to tell you which man pages are out there
	on something; somewhat like 'which' for man pages.

    *   a -t flag for troff'ing the man page.  if you set your
	TROFF envariable to a previewer, you can get troff'd
	docs at your workstation.

Here are some features of this version of makewhatis:

    *	tries hard to make pretty output, stripping troff
	directives.

    *   doesn't blow up on more files in a man directory 
	than the shell will glob.  (this happened to me!)

    *   accepts troff string macros for the dashes in the
	the NAME section

    *   prints a diagnostic for a malformed NAME section

    *   finds *all* references in the NAME sectoin

    *   recognizes MH's man macros (and .Sh from lwall)

    *   man other things that makewhatis used to do wrong

Here's one disadvantage:

    *   You *MUST* run makewhatis because I only look in 
	the database.  A way to run makewhatis incrementally
	will be provided for the final version so you can just
	add a few pages without running all of makewhatis.


See the configuration section at the start for some things
you'll want to tweak.  I've included the version of getopts.pl
that returns true or false to indicate parsing success.  


I hope you like it.

--tom

#!/bin/sh
#    This is a shell archive.
#    Run the following text with /bin/sh to extract.

echo x man
sed -e 's/^X//' << \EOFMARK > man
X#!/usr/bin/perl 
X# 
X# man - perl rewrite of man, whatis, apropos
X#
X# tom christiansen <tchrist@convex.com>
X# beta version
X#
X
X($program = $0) =~ s,.*/,,;
X
X###########################################################################
X# begin configuration section
X
X$PAGER     = "/usr/ucb/more" 		unless $PAGER   = $ENV{'PAGER'};
X$PAGER    .= ($PAGER =~ /less$/) ? ' -sf' : ' -s';
X
X$MANPATH  = "/usr/man:/usr/local/man" 	unless $MANPATH = $ENV{'MANPATH'};
X$MANSECT  = "l168n23457pot" 		unless $MANSECT = $ENV{'MANSECT'};
X$TROFF    = "/usr/bin/troff" 		unless $TROFF   = $ENV{'TROFF'};
X
X$NROFF    = "/usr/bin/nroff";
X$UL	  = "/usr/ucb/ul";
X
X$EGREP	  = "/usr/local/bin/egrep"; 
X$FAST_GREP = 1;				# probably only true for GNU grep
X
X# end configuration section
X###########################################################################
X
X&source('getopts.pl');
X
X$apropos = $program eq 'apropos';
X$whatis  = $program eq 'whatis';
X$whereis = $program eq 'whman';
X
X&Getopts('P:M:c:bfkltwd') || &usage;
X
X&usage if $#ARGV < 0;
X
X$MANPATH = $opt_P 	if $opt_P;
X$MANPATH = $opt_M 	if $opt_M;
X$want_section = $opt_c 	if $opt_c;
X
X$whatis = 1		if $opt_f;
X$apropos = 1		if $opt_k;
X$fromfile = 1		if $opt_l;
X$whereis = 1  		if $opt_w;
X$EGREP = ''		if $opt_b;
X$debug	= 1		if $opt_d;
X
X$roff = $opt_t ? 'troff' : 'nroff';
X
X@MANPATH = split(/:/,$MANPATH);
X
Xif ($whatis) {
X    &whatis;
X} elsif ($apropos) {
X    &apropos;
X} elsif ($whereis) {
X    &whereis;
X} else {
X    &man;
X} 
X
Xexit $status;
X
X###########################################################################
Xsub genwhatis {
X    local($elt,$whatis);
X
X    for $elt (@MANPATH) {
X	$whatis = "$elt/whatis";
X	push(@whatis, $whatis);
X    } 
X} 
X
X###########################################################################
Xsub whatis {
X    local($target, %seeking, $rhs, $cmd, $page, $section, $desc);
X    &genwhatis;
X
X
X    for $target (@ARGV) { $seeking{$target} = 1; } 
X
X    for $INDEX (@whatis) {
X	unless (dbmopen(INDEX,$INDEX,0644)) {
X	    warn "Can't dbmopen $INDEX: $!";
X	    next;
X	} 
X       	for $target (@ARGV) {
X	    next unless $entry = &fetch($target,'INDEX');
X	    delete $seeking{$target};
X	    for $rhs (split(/\002/, $entry)) {
X		($cmd, $page, $section, $desc) = split(/\001/, $rhs);
X		next unless $cmd =~ /$target/;
X		printf("%-30s - %s\n", "$cmd (see $page($section))", $desc);
X	    }
X	} 
X	eval 'dbmclose(INDEX)';
X    } 
X
X    for $target (keys %seeking) {
X	print "$program: $target: not found.\n";
X	$status = 1;
X    } 
X} 
X
X###########################################################################
Xsub apropos {
X    &genwhatis;  
X
X    $query = join('|',@ARGV);
X
X    for $target (@ARGV) { 
X	$target =~ y/A-Z/a-z/; 
X	$seeking{$target} = 1; 
X    } 
X
X    if ( $EGREP && ! -x $EGREP ) {
X	warn "can't run $EGREP: $!";
X	$EGREP = '';
X    } 
X
X    if ($EGREP && ($FAST_GREP || $#ARGV > 0))  {
X	if ($status = system $EGREP, '-hi', $query, @whatis) {
X	    print STDERR "$program: @ARGV: nothing appropriate\n";
X	    $status = 1;  
X	} 
X    } else {  # perl is faster than /bin/grep
X	foreach $WHATIS (@whatis) {
X	    unless (open WHATIS) {
X		warn "can't open $WHATIS: $!";
X		next;
X	    } 
XWHATIS:	    while (<WHATIS>) {
X		next unless /$query/io;
X		($target = $+) =~ y/A-Z/a-z/;
X		delete $seeking{$target};
X		print;
X	    } 
X	    close WHATIS;
X	} 
X
X	for $target (keys %seeking) {
X	    print STDERR "$program: $target: nothing appropriate\n";
X	    $status = 1;
X	}
X    } 
X}
X
X
X###########################################################################
Xsub source {
X    local($file) = @_;
X    local($return) = 0;
X
X
X    $return = do $file;
X    die "couldn't do \"$file\": $!" unless defined $return;
X    die "couldn't parse \"$file\": $@" if $@;
X    die "couldn't run \"$file\"" unless $return;
X}
X
X###########################################################################
Xsub usage {
X    print STDERR <<USAGE;
Xusage: man -w title ...  
X   or: whman title ...
X
X       man -f title ...
X   or: whatis title ...
X
X       man -k keyword ...
X   or: apropos keyword ...
X
X       man -l filename ...
X       man [ -t ] [ -P pathname ] [[ -c ] section ] title ...
XUSAGE
X    exit 1;
X}
X
X###########################################################################
X
Xsub fetch {
X    local($key,$array) = @_;
X    local($value);
X
X    return '' unless $value = eval "\$$array".'{$key};';
X    if ($@) { chop $@; die "bad eval: $@"; }
X
X    until ($value =~ /\001/) {
X	$value = eval "\$$array".'{$value};';
X	if ($@) { chop $@; die "bad eval: $@"; }
X    } 
X    return $value;
X} 
X
X###########################################################################
Xsub whereis {
X    local($target, @files);
X
X    foreach $target (@ARGV) {
X	@files = &find_files($target);
X	if ($#files < $[) {
X	    print STDERR "$program: $target not found\n";
X	    $status = 1;
X	} else {
X	    print "$target: @files\n";
X	}
X    } 
X} 
X
X
X###########################################################################
Xsub find_files {
X    local($target) = @_;
X    local($root, $rhs);
X    local(@retlist) = ();
X    local(@tmplist) = ();
X    # globals: $vars, $called_before, %dbm
X
X    $vars = 'dbm00';
X
X    if (!$called_before++) {
X	# generate dbm names
X	for $root (@MANPATH) {
X	    $dbm{$root} = $vars++; # magic incr
X	    $string = "dbmopen($dbm{$root},\"$root/whatis\",0644);";
X	    unless (eval $string) {
X		if ($@) { 
X		    chop $@;
X		    warn "Can't eval $string: $@";
X		} else {
X		    warn "Can't eval $string: $!";
X		}
X		$status = 1;
X		next;
X	    } 
X	    $dbmopened{$root} = 1;
X	}
X    } 
X
X    for $root (@MANPATH) {
X	@tmplist = ();
X	next unless $dbmopened{$root};
X	next unless $entry = &fetch($target,$dbm{$root});
X	for $rhs (split(/\002/, $entry)) {
X	    ($cmd, $page, $section, $desc) = split(/\001/, $rhs);
X	    next unless $cmd =~ /$target/;
X	    ($stem) = $section =~ /^(.)/;
X	    push(@tmplist,  "$root/man$stem/$page.$section");
X	}
X	push(@retlist, sort bysection @tmplist);
X    }
X    return @retlist;
X} 
X
X###########################################################################
Xsub man {
X    local($target);
X    $isatty = -t STDOUT;
X
X    if (length($ARGV[0]) <= 2 && $ARGV[0] =~ /^\d/) {
X	$want_section = shift @ARGV;
X	die "But what do you want from section $want_section?\n" if $#ARGV < 0;
X    } 
X
X    while ($target = shift(@ARGV)) {
X	$target = &get_page($target) unless $fromfile;
X	do $roff($target) if $target;
X    } 
X} 
X
X###########################################################################
Xsub get_page {
X    local($target) = @_;
X    local(@places);
X
X    @places = &find_files($target);
X    if ($#places < 0) {
X	&no_entry($target);
X	return '';
X    } 
X    for ( ; $#places >= 0; shift @places) {
X	if ($want_section) {
X	    if (length($want_section) == 1) {
X		next unless $places[0] =~ /\.$want_section[^.]*$/;
X	    } else {
X		next unless $places[0] =~ /\.$want_section$/;
X	    }
X	} 
X	last;
X    } 
X    if ($#places < 0) {
X	&no_entry($target);
X	return '';
X    }
X    return $places[0];
X}
X
X###########################################################################
Xsub no_entry {
X    print STDERR "No manual entry for $_[0]";
X    print " in section $want_section of the manual" if $want_section;
X    print ".\n";
X    $status = 1;
X} 
X
X###########################################################################
Xsub bysection {
X	  (index($MANSECT,substr($a,rindex($a,'.')+1,1))
X	<
X	  index($MANSECT,substr($b,rindex($b,'.')+1,1)))
X	? -1
X	:  1;
X} 
X
X###########################################################################
Xsub troff {
X    $status = 1 if system $TROFF, '-man', $_[0];
X} 
X
X###########################################################################
Xsub nroff {
X    local($manpage) = @_;
X    local($catpage);
X    local($tmppage);
X    local($is_tmp) = 0; 
X    local($output) = '';
X    local($command);
X
X
X    if ($fromfile) {
X	$command = "$NROFF -man $manpage |";
X    } else {
X	&source('stat.pl') unless defined &Stat;
X	($catpage = $manpage) =~ s,^(.*)/man(.*)/([^/]*)$,$1/cat$2/$3,;
X	$manroot = $1;
X	$tmppage = "/tmp/$3";
X
X	@st_man = &Stat($manpage);
X	@st_cat = &Stat($catpage);
X
X	if ($st_cat[$ST_MTIME] < $st_man[$ST_MTIME]) {
X	    if ($isatty) {
X		$| = 1;
X		print "Reformating page.  Please wait ... ";
X	    }
X	    unless (&ready($catpage)) {
X		$catpage = $tmppage;
X		$is_tmp++;
X		return unless &ready($catpage);
X	    } 
X	    if (system "chdir $manroot && $NROFF -man $manpage > $catpage") {
X		print STDERR "nroff of $catpage failed\n";
X		$status = 1;
X		return;
X	    } 
X	    print "done\n" if $isatty;
X	} 
X    }
X
X    $output =  "| $PAGER"  if $isatty;
X    # $status = 1 if system "$UL $catpage $output";  
X
X    system "$command $UL $catpage $output";
X} 
X
X###########################################################################
Xsub ready {
X    local($file) = @_;
X    unless (open (CAT, "> $file")) {
X	print STDERR "$program: can't open $file: $!\n";
X	$status = 1;
X	return 0;
X    }
X    close CAT;
X    chmod 0666, $file;
X    return 1;
X}
X
X
EOFMARK
echo x makewhatis
sed -e 's/^X//' << \EOFMARK > makewhatis
X#!/usr/bin/perl
X#
X# makewhatis: perl rewrite for makewhatis
X#
X
X($program = $0) =~ s,.*/,,;
X
Xdo 'getopts.pl' || die "Cannot do getopts.pl";
X
Xdo Getopts('dP:') || &usage;
X
X&usage if $#ARGV > -1;
X
Xsub usage {
X    die "usage: $program [-d] [-P manpath]\n";
X} 
X
X$manpath = $opt_P ? $opt_P : $ENV{'MANPATH'};
X$manpath = "/usr/man" unless $manpath;
X@manpath = split(/:/,$manpath);
X
X$debug = $opt_d;
X
X$SIG{'INT'} = 'CLEANUP';
X
X$WHATIS = "whatis";
X
Xforeach $root ( @manpath ) {
X    $filecount = $entries = 0;
X    chdir $root || die "can't chdir to $root: $!";
X    print "root to $root\n" if $debug;
X
X    open (WHATIS, "> $WHATIS.$$") || die "can't open $root/$WHATIS.$$: $!";
X    unlink "$WHATIS.dir", "$WHATIS.pag";
X    dbmopen(WHATIS, $WHATIS, 0644) || die "Can't dbmopen $root/$WHATIS: $!";
X
X    foreach $mandir ( <man?> ) {
X	next if $mandir eq 'man0';
X	chdir $mandir || die "can't chdir to $root/$mandir: $!";
X	print "subdir is $mandir\n" if $debug;
X	opendir(mandir,'.') || die "can't opendir('$root/$mandir'): $!";
X
XFILE:	while ($FILE = readdir(mandir)) {
X	    next if $FILE =~ /\.(bak|OLD)$/ || $FILE =~ /^\./;
X	    open FILE || (warn "can't open $FILE: $!\n", next FILE);
X	    $filecount++;
X	    print "opened $root/$mandir/$FILE\n" if $debug;
X	    &extract_names();
X
X
X	} 
X	closedir mandir;
X	chdir $root || die "can't chdir back to $root: $!";
X    } 
X    $, = "\n";
X    print WHATIS (sort @WHATIS),'';
X    $, = '';
X    close WHATIS || warn "can't close $WHATIS.$$: $!";
X    rename ("$WHATIS.$$", $WHATIS) 
X	|| warn "can't rename $WHATIS.$$ to $WHATIS: $!";
X    dbmclose(WHATIS) || warn  "can't dbmclose $WHATIS: $!";
X    print "$program: $root: found $entries entries in $filecount files\n";
X} 
X
X
X
X
Xexit 0;
X
Xsub CLEANUP {
X    print stderr "<<INTERRUPTED>> reading $FILE\n";
X    unlink "$WHATIS.$$", "WHATIS.pag", "$WHATIS.dir";
X    exit 1;
X} 
X
Xsub getline {
X    local ($_);
X    #print "getline called\n" if $debug;
X
X    $_ = <FILE>;
X    #print "gonna loop\n" if $debug;
X    {
X        chop;
X        if (/\\$/) {
X            chop;
X	    #print "gonna continue\n" if $debug;
X            $_ .= ' ';
X            $_ .= <FILE>;
X            redo;
X        }
X    }
X    #print "gonna return\n" if $debug;
X    $_;
X}
X
Xsub extract_names {
X    local($_);
X
XLINE: while (<FILE>) {
X	next LINE unless /^\.S[hH]\s+"?NAME"?/ || /^\.NA\s?/;
X	$linecount = 0;
X	@lines = ();
X	$nameline = '';
XNAME:	while ($_ = &getline()) {
X	    last NAME if /^\.(S[hH]|SY)\s?/;  # damn MH
X	    if ( $_ eq '.br' ) {
X		push(@lines, $nameline) if $nameline;
X		$nameline = '';
X		next;
X	    } 
X	    next if /^\./;
X	    $nameline .= ' ' if $nameline;
X	    $nameline .= $_;
X	    $linecount++;
X	} 
X
X	print "READ $linecount LINES in $FILE\n" if $linecount > 1 && $debug;
X
X	push(@lines, $nameline);
X
X	for ( @lines ) {
X	    next unless ord;
X	    s/\\f([PBIR]|\(..)//g;	# kill font changes
X	    s/\\s[+-]?\d+//g;		# kill point changes
X	    s/\\&//g;		
X	    s/\\\((ru|ul)/_/g;		
X	    s/\\\((mi|hy|em)/-/g;
X	    s/\\\(..//g;
X	    s/\\//g;		   	 # kill backslashes 
X	    s/^\.\\"\s*//;
X	    if (!/\s+-+\s+/) {
X		printf STDERR "%s: %s: no dash in \"%s\"\n",
X				$program, $FILE, $_;
X		return; #  for next file
X	    } 
X	    ($cmdlist, $desc) = ( $`, $' );
X	    ($page, $section) = $FILE =~ /^(\S+)\.(\S+)$/;
X	    #($section, $subsection) = $section =~ /^(.)(.*)/;
X	    push(@WHATIS,sprintf("%-30s - %s",
X		"$cmdlist (see $page($section))", $desc));
X	    $prototype = '';
X	    foreach $cmd (split(/[\s,]+/,$cmdlist)) {
X		$WHATIS{$cmd} .= "\002" if $WHATIS{$cmd};
X		if (! $prototype) {
X		    #print "storing $cmd\n" if $debug;
X		    $WHATIS{$cmd} .= join("\001",
X			    $cmdlist, $page, $section, $desc);
X		    $prototype = $cmd;
X		    
X		} else {
X		    #print "also storing $cmd as $prototype\n" if $debug;
X		    $WHATIS{$cmd} = $prototype;
X		} 
X		$entries++;
X	    } 
X	}
X	next FILE;
X    }  
X}
EOFMARK
echo x getopts.pl
sed -e 's/^X//' << \EOFMARK > getopts.pl
X
Xsub Getopts {
X    local($argumentative) = @_;
X    local(@args,$_,$first,$rest);
X    local($errs);
X
X    $errs = 0;
X
X    @args = split( / */, $argumentative );
X    while(($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
X        ($first,$rest) = ($1,$2);
X        $pos = index($argumentative,$first);
X        if($pos >= $[) {
X            if($args[$pos+1] eq ':') {
X                shift(@ARGV);
X                if($rest eq '') {
X                    $rest = shift(@ARGV);
X                }
X                eval "\$opt_$first = \$rest;";
X            }
X            else {
X                eval "\$opt_$first = 1";
X                if($rest eq '') {
X                    shift(@ARGV);
X                }
X                else {
X                    $ARGV[0] = "-$rest";
X                }
X            }
X        }
X        else {
X            print STDERR "Unknown option: $first\n";
X            $errs = 1;
X            if($rest ne '') {
X                $ARGV[0] = "-$rest";
X            }
X            else {
X                shift(@ARGV);
X            }
X        }
X    }
X    return $errs == 0;
X}
X
X1;
EOFMARK
--

    Tom Christiansen                       {uunet,uiucdcs,sun}!convex!tchrist 
    Convex Computer Corporation                            tchrist@convex.COM
		 "EMACS belongs in <sys/errno.h>: Editor too big!"