[alt.sources] lq-text Full Text Retrieval Database Part 08/13

lee@sq.sq.com (Liam R. E. Quin) (03/04/91)
: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 08
echo x - lq-text/src/lqtext/lqword.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/lqword.c <<'@@@End of lq-text/src/lqtext/lqword.c'
X/* lqword.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* lqword -- simple program to print information about individual words.
X *
X * $Id: lqword.c,v 2.8 90/10/06 00:51:00 lee Rel1-10 $
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h>
X#include <sys/types.h>
X#include <malloc.h>
X#include <fcntl.h> /* for fileinfo.h */
X#include <ctype.h>
X
X#ifdef BSD
X# define USI_MAX ((unsigned int) -1)
X#else
X# include <limits.h>
X  /* for USI_MAX, the largest unsigned integer.
X   * 4.3 BSD doesn't seem to have this.  I don't know how to get this
X   * on BSD systems.
X   */
X#endif
X
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "smalldb.h"
X#include "pblock.h"
X#include "wordrules.h"
X#include "emalloc.h"
X
X/*** Declarations: ***/
X/** System calls and library routines: **/
Xextern void exit();
X
X/** System calls: **/
X
X/** Unix Library Functions: **/
Xextern char *strncpy();
X#ifndef tolower
X extern int tolower();
X#endif
X
X/** lqtext library functions: **/
Xextern char *UnFlag();
Xextern t_WordInfo *WID2WordInfo();
Xextern int TooCommon();
Xextern void cleanupdb();
Xextern void SetDefaults();
Xextern void DefaultUsage();
Xextern void DeleteWord();
X
X/** functions defined within this file: */
Xvoid PrintWordInfo(), AllWordInfo();
Xvoid Display(), ShowWordList();
Xvoid dbmmarch();
X
X/** Macros and variable definitions **/
X
X#define DISPLAY_ALL 1
X#define DISPLAY_NAME 2
X    /* These are the possible DisplayMode values -- see main() */
X
Xchar *progname = 0;
X    /* Used for error messages */
X
Xint SilentMode = 0;
X    /* Set if we were invoked with the -s option.  In this mode, we behave
X     * like grep -s, and exit with a zero exit status if one or more of
X     * the words were found in the database.
X     */
X
Xint ListMode = 0;
X    /* Set if we are to provide a terser output format suitable for use
X     * with lqshow(1L).
X     */
X
Xint AsciiTrace = 0;
X    /* If this is non-zero, we provide debugging information.  The lqtext
X     * library also uses this variable.  Setting it to values greater
X     * than 1 or 2 will generally provide large amounts of debugging
X     * information.  If the library was compiled with -UASCIITRACE,
X     * however, there will be much less diagnostic output at higher
X     * levels.
X     */
X
Xstatic char *Revision = "lqword 2.2";
X
X/** end of declarations... **/
X
X
Xint
Xmain(argc, argv)
X    int argc;
X    char *argv[];
X{
X    extern int optind, getopt();  /* For getopt(3) */
X    extern char *optarg;	  /* For getopt(3) */
X    int ch;			  /* For getopt(3) */
X    int ErrorFlag = 0;		  /* For getopt(3) */
X    int DisplayMode = 0;
X	/* DisplayMode indicates what kind of information we are to
X	 * print in response to queries.  The values understood are
X	 * the DISPLAY_* constants.  Perhaps this should be an enum.
X	 */
X
X    progname = argv[0];
X	/* I see this as a library program, so I am leaving the full
X	 * path.  lqaddfile(1L) and lqphrase(1L) set progname to be
X	 * the filename of the command, rather than the full pathname.
X	 */
X
X    SetDefaults(argc, argv);
X	/* Deal with any arguments that are understood by all lqtext
X	 * programs.
X	 */
X
X    while ((ch = getopt(argc, argv, "aAD:lsVxZz:")) != EOF) {
X	switch (ch) {
X	case 'a':
X	    DisplayMode = DISPLAY_NAME;
X	    break;
X	case 'A':
X	    DisplayMode = DISPLAY_ALL;
X	    break;
X	case 'D':
X	    DeleteWord(optarg); /* MISFEATURE */
X		/* This actually removes all entries for the given word
X		 * from the database.  You need write permission, of
X		 * course.
X		 */
X	    break;
X	case 'l':
X	    ListMode = 1;
X	    break;
X	case 's':
X	    SilentMode = 1;
X	    break;
X	case 'V':
X	    fprintf(stderr, "%s version %s\n", progname, Revision);
X	    break;
X	case 'x':
X	    ErrorFlag++;
X	    break;
X	case '?':
X	    ErrorFlag++;
X	    break;
X	case 'z':
X	case 'Z':
X	    break; /* done by SetDefaults(); */
X	}
X    }
X
X    /* Normally put call to lrqError here to give a helpful message,
X     * but not yet ready to ship the error handling package, sorry
X     */
X    if (ErrorFlag) {
X	fprintf(stderr, "%s: options are:\n", progname);
X	fputs("\
X	-D Word -- delete the named word (DANGEROUS!)\n\
X	-l	-- list mode, for use with lqshow\n\
X	-s	-- silent mode (like grep -s)\n", stderr);
X	DefaultUsage();
X	    /* DefaultUsage() prints the list of the standard options. */
X	fputs("\n\
XIn addition, if no words are given, the following are understood:\n\
X	-a	-- print all words\n\
X	-A	-- print all matches to all words\n", stderr);
X	exit(1);
X    }
X
X    if (optind >= argc) {
X	if (SilentMode) exit(1);
X	    /* if there were no words given, none of them matched.
X	     * It could be argued that this case should be an error.
X	     */
X	if (DisplayMode) {
X	    AllWordInfo(DisplayMode);
X	} else {
X	    /* In this case, there were no command-line options and no
X	     * display-mode flags, so we do the default thing.
X	     * This happens to be to print every word in the database.
X	     * This is probably bogus behaviour -- there should be a better
X	     * way of finding words that match a given pattern than using
X	     * lqword | grep
X	     * which is what this allows.
X	     */
X	    dbmmarch();
X	}
X    } else {
X	if (!SilentMode && !ListMode) {
X	    /* Print some pretty headers */
X	    printf("       WID | Where   | Total   | Word\n");
X	    puts(
X"===========|=========|=========|============================================");
X	}
X
X	while (optind < argc) {
X	    PrintWordInfo(argv[optind++]);
X	}
X    }
X    cleanupdb();
X	/* close database files.  This is particularly important if we are
X	 * updating the database -- the horrible -D option -- but should
X	 * probably be done by liblqtext itself.
X	 */
X    exit(SilentMode); /* 0 or 1 (this is a little devious) */
X#ifdef lint
X    /*NOTREACHED*/
X    return 1;
X	/* this is for versions of lint and gcc that don't understand
X	 * that exit() doesn't return -- or, if it douse, that there is
X	 * nothing that can be done about it!
X	 */
X#endif
X}
X
Xvoid
XPrintWordInfo(Word)
X    char *Word;
X{
X    extern t_WordInfo *FindWordInfoFromIndex();
X    extern long atol();
X    extern t_WID Word2WID();
X    extern char *WordRoot();
X
X    register char *p;
X    t_WordInfo *WordInfo;
X    t_WID WID;
X    t_WordInfo Root;
X
X    Root.WordPlace.Flags = 0;
X
X    /** Find the canonical form of the word, with plurals reduced to the
X     ** singular and letters folded into lower case.
X     **/
X
X    /* First, remember if the word originally started with an upper case
X     * letter:
X     */
X    if (isupper(*Word)) {
X	Root.WordPlace.Flags |= WPF_UPPERCASE;
X    }
X
X    /* now convert to lower case and measure its length at the same time: */
X    for (p = Word; *p; p++) {
X	if (isupper(*p)) *p = tolower(*p);
X    }
X
X    Root.Length = p - Word;
X    Root.Word = Word;
X
X    /* Now call WordRoot() to find the canonical form: */
X    Word = WordRoot(&Root);
X
X    /** Now see if the canonical word is too common to list: **/
X
X    if (TooCommon(&Root)) {
X	/* It is listed in the common word list, so don't bother looking
X	 * it up at all
X	 */
X	if (!SilentMode) {
X	    fprintf(stderr, "No index information for: %s (too common)\n",
X								    Word);
X	}
X	return;
X    }
X
X    /** It is not too common, so look it up: **/
X
X    if (((WID = Word2WID(Word, Root.Length)) == (t_WID) 0) ||
X	(WordInfo = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
X	if (!SilentMode) {
X	    if (WID) {
X		/* In this case the word is in the database (since it has
X		 * a non-zero WID), but not in the word index.  This might
X		 * happen if the word is being deleted (or added) by someone
X		 * else at this very moment, or if the database is corrupt.
X		 */
X		fprintf(stderr, "No index information for: %s (WID %lu)\n",
X								Word, WID);
X	    } else {
X		/* In this case the word is neither listed as common nor
X		 * found in the database.  Either it was spelt differently
X		 * there or it isn't there at all.
X		 */
X		fprintf(stderr, "No index information for: %s\n", Word);
X	    }
X	}
X	return;
X    }
X    if (SilentMode && WordInfo->NumberOfWordPlaces > 0) {
X	/* We found something, so there is no point looking further --
X	 * we already know enough to exit.  If a lot of words are given,
X	 * this could be a big efficiency win.
X	 */
X	exit(0);
X    }
X
X    /** Now we have the database entry for the word, so let's print it!
X     **/
X    Display(WordInfo, DISPLAY_ALL);
X
X    /** Now return the storage used...
X     **/
X    if (WordInfo) {
X	SlayWordInfo(WordInfo);
X    }
X
X    /** All done for this word.
X     **/
X}
X
X/* Display() -- print information about a single word */
Xvoid
XDisplay(WordInfo, Verbose)
X    t_WordInfo *WordInfo;
X    int Verbose;
X{
X    char *Buf = emalloc(WordInfo->Length + 1);
X
X    /* Words in a t_WordInfo might not be null terminated, since the
X     * storage overhead and the work of putting the nulls there might
X     * be significant...
X     */
X    (void) strncpy(Buf, WordInfo->Word, WordInfo->Length);
X    Buf[WordInfo->Length] = '\0';
X
X    if (!ListMode) {
X	/* Print a little header for the word, unless we were asked not to */
X	printf("%10lu | %7lu | %7lu | %s\n", WordInfo->WID,
X					     WordInfo->Offset,
X					     WordInfo->NumberOfWordPlaces,
X					     WordInfo->Word
X	);
X			    
X    }
X    if ((ListMode || Verbose == DISPLAY_ALL) && WordInfo->NumberOfWordPlaces) {
X	/* If  there are occurrences in the database (there might not be if
X	 *     the word has been deleted, or has only just been added),
X	 * and  we want all the matches,
X	 * then  print the list of matches in the appropriate format:
X	 */
X	ShowWordList(WordInfo);
X    }
X
X    (void) efree(Buf);
X    /* reclaim storage */
X}
X
Xvoid
XShowWordList(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    extern t_pblock *Getpblock();
X    t_FileInfo *GetFileInfo();
X
X    t_FileInfo *FileInfo = (t_FileInfo *) 0;
X    t_pblock *pblock = (t_pblock *) 0;
X    t_WordPlace *PP = (t_WordPlace *) 0;
X    int Place;
X    char *LastRoot = "[internal error lqword.c 392]";
X	/* the message is in case I make a coding error!.  The number
X	 * was once the line number of the message, but it only needs to
X	 * be a distinct enough message to search for.
X	 */
X
X    if (WordInfo->WordPlacesInHere >= WordInfo->NumberOfWordPlaces) {
X	/* In this case, the match info all fits in the index, so it
X	 * does not matter if automatic pre-fetching from the overflow
X	 * file "data" happens or not (i.e. if we are using Lazy Evaluation,
X	 * it doesn't happen, but it makes no difference in this case).
X	 */
X	PP = WordInfo->WordPlaces;
X    } else if ((pblock = Getpblock(WordInfo)) != (t_pblock *) 0) {
X	PP = pblock->WordPlaces;
X	/* If Lazy Evaluation is enabled, liblqtext might not have fetched
X	 * all of the match information from the overflow database, in
X	 * which case we must do it now.
X	 */
X    }
X
X    if (PP) {
X	t_FID LastFID = USI_MAX;
X	    /* This is not a plausible FID (File IDentifier), so it
X	     * will force a call to GetFileInfo() in the loop below.
X	     */
X	unsigned int LastFlags = 256 * 2;
X	    /* Similarly, this is an impossible flag value, since the
X	     * flags are constrained to fit in a single byte.
X	     */
X
X	/* cycle through the Place... */
X	for (Place = 0; Place < WordInfo->NumberOfWordPlaces; Place++) {
X
X	    char BIF[100]; char WIB[100];
X	    register char *p;
X	    char *Bp, *Wp;
X	    long l;
X
X	    if (LastFlags != PP[Place].Flags) {
X		LastFlags = PP[Place].Flags;
X		LastRoot = UnFlag(WordInfo, LastFlags);
X		    /* UnFlag() takes a canonical (singular, lower-case)
X		     * word and a set of flags, and reverses the
X		     * transformations implied by the flags.  For example,
X		     * if WordInfo->Word is "boy" and flags contain the
X		     * Plural flag, you should get "boys" returned.
X		     * Since we don't remember whether a word was in all
X		     * caps or had only the first letter capitalised (at
X		     * the moment, anyway), the routine will return Boys
X		     * even if the input was BOYS or BoYs.
X		     * Possessives (the boy's books) may also be indicated.
X		     */
X	    }
X
X	    if (LastFID != PP[Place].FID || FileInfo == (t_FileInfo *) 0) {
X		/* The first part of the test means we don't call the
X		 * function to retrieve the file name lots of times if
X		 * there are multiple matches in the same data file. 
X		 * This turns out to be a common case.
X		 */
X
X		/* Reclaim storage */
X		if (FileInfo) {
X		    if (FileInfo->Name) {
X			(void) efree(FileInfo->Name);
X		    }
X		    (void) efree(FileInfo);
X		}
X
X		/* Find the file name from the FID.  This routine should
X		 * be called FID2FileName(), and may in fact be renamed
X		 * in the future.
X		 */
X		if ((FileInfo = GetFileInfo(LastFID = PP[Place].FID)) ==
X						(t_FileInfo *) 0) {
X		    /* No filename information available.  This sometimes
X		     * happens if you rin lqword diring an lqaddfile
X		     * session and match a word in one of the new files.
X		     * Note that if the output is for reuse, we don't
X		     * want to include references to files whose names
X		     * we don't have!
X		     */
X		    if (!ListMode) {
X			printf("%20s | %-.5lu/%-.3lu | [FID %d]\n",
X			    LastRoot,
X			    PP[Place].BlockInFile,
X			    PP[Place].WordInBlock,
X			    PP[Place].FID);
X		    }
X		    continue;
X		}
X	    }
X
X	    /* This is an inline printf, because otherwise this call
X	     * to printf takes over 20% of the execution time, and nearly
X	     * 40% for a frequent word (e.g. over 1000 places) !!
X	     */
X	    p = &BIF[sizeof(BIF) - 1];
X	    *p = '\0';
X	    if (PP[Place].BlockInFile == 0) {
X		*--p = '0';
X	    } else for (l = PP[Place].BlockInFile; l; l /= 10) {
X		*--p = "0123456789"[l % 10];
X	    }
X	    Bp = p;
X
X	    p = &WIB[sizeof(WIB) - 1];
X	    *p = '\0';
X	    {
X		register int i = PP[Place].WordInBlock;
X		if (i == 0) {
X		    *--p = '0';
X		} else for (; i; i /= 10) {
X		    *--p = "0123456789"[i % 10];
X		}
X		Wp = p;
X	    }
X
X  	    if (ListMode) {
X		while (*Bp) {
X		    putchar(*Bp);
X		    Bp++;
X		}
X		putchar(' ');
X		while (*Wp) {
X		    putchar(*Wp);
X		    Wp++;
X		}
X		putchar(' ');
X		puts(FileInfo->Name);
X  	    } else {
X		/* Well, if we are not reusing the output, maybe the speed
X		 * is not quite so critical...
X		 */
X  		printf("%20s | %5lu/%3lu F=%3u S=%3u | %s\n",
X		    LastRoot,
X		    PP[Place].BlockInFile,
X		    PP[Place].WordInBlock,
X		    PP[Place].Flags, /* XXX */
X		    PP[Place].StuffBefore,
X		    FileInfo->Name);
X	    }
X	}
X    }
X
X    if (pblock) {
X	/* If we had to go and get the matches ourselves, we had better
X	 * release the storage.
X	 * Actually we should also be freeing the FileInfo and possibly
X	 * the WordInfo as well, but the pblock is the biggest... and I
X	 * am only adding comments today, not fixing code (I hope)...
X	 * NOTDONE FIXME
X	 */
X	(void) efree(pblock);
X    }
X}
X
Xvoid
XAllWordInfo(Verbose)
X    int Verbose;
X{
X    extern char *WID2Word();
X    extern t_WID GetMaxWID();
X
X    t_WID i;
X    t_WID MaxWid = GetMaxWID();
X    t_WordInfo *WordInfo;
X    char *Name;
X
X    /* Loop over all possible WID numbers and print information
X     * for each of them.
X     */
X    for (i = (t_WID) 1; i <= MaxWid; i++) {
X	if ((Name = WID2Word(i)) != (char *) 0) {
X
X	    /* If Name is zero, that WID is unused.  There might be gaps
X	     * if a word was deleted.
X	     */
X
X	    if ((WordInfo = WID2WordInfo(i)) != (t_WordInfo *) 0) {
X		Display(WordInfo, Verbose);
X		SlayWordInfo(WordInfo);
X	    } else {
X		/* In this case the word is known, but there is no further
X		 * information about it.  In the current inplementation,
X		 * this cannot happen unless someone else is updating the
X		 * database and replacing a WID whose word had been deleted.
X		 */
X		if (!ListMode) {
X		    /* If we are in list mode, it is probably because the
X		     * output is wanted by another prpgram, so we had
X		     * better not print out this (useless) entry.
X		     */
X		    printf("%10lu | %7lu |           | ?? %s\n",
X			i, 0L, Name);
X		}
X	    }
X
X	    /* Reclaim the storage used... */
X	    (void) efree(Name);
X	} /* end if */
X    } /* for each WID */
X
X    if (!ListMode) {
X	printf("Maximum WID is %lu\n", MaxWid);
X    }
X}
X
X/* dbmmarch -- print every value in a dbm database.  This might go
X * wrong (omitting some values) if the database is being concurrently
X * updated.
X */
Xvoid
Xdbmmarch()
X{
X    DBM *db;
X    datum d;
X
X    if ((db = startdb(WordIndex)) == (DBM *) 0) {
X	/* WordIndex is the list of words, defined in "globals.h".
X	 * If we didn't open it, the user probably has not set
X	 * $LQTEXTDIR, or didn't use the -d database-dir option that
X	 * is handled bu SetDefaults() called from main().
X	 */
X	fprintf(stderr, "Can't open database file \"%s\"\n", WordIndex);
X	exit(1);
X    }
X
X    /* The word database contains WID-->word matches, that look like
X     * (key = "Word", content = WID)
X     */
X    for (d = dbm_firstkey(db); d.dsize != 0; d = dbm_nextkey(db)) {
X	register char *s;
X
X	/* IMPORTANT NOTE:
X	 * The words are not nul-terminated in the database.  It is
X	 * therefore not safe to use printf() or puts() unless we make
X	 * a copy or are careful...
X	 */
X	for (s = d.dptr; s - d.dptr < d.dsize; s++) {
X	    putchar(*s);
X	}
X	putchar('\n');
X    }
X    enddb(db);
X}
X
X/*
X * $Log:	lqword.c,v $
X * Revision 2.8  90/10/06  00:51:00  lee
X * Prepared for first beta release.
X * 
X * Revision 2.7  90/08/29  21:45:37  lee
X * Alpha release
X * 
X * Revision 2.6  90/08/08  22:22:53  lee
X * Added heavy comments.  Cleaned up dbmmarch() and made some other
X * minor fixes.
X * 
X * Revision 2.5  90/08/08  21:06:21  lee
X * Added -x option; removed rude message about getpts bugs.
X * 
X * Revision 2.4  90/04/21  18:50:38  lee
X * fixed a serious bug in the -l mode -- now prints the entire match!
X * 
X * Revision 2.3  90/03/27  13:20:57  lee
X * now passes gcc -Wall
X * 
X * Revision 2.2  89/10/08  20:47:23  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:16:10  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.3  89/09/17  23:04:42  lee
X * Various fixes; NumberInBlock now a short...
X * 
X * Revision 1.2  89/09/16  21:18:50  lee
X * First demonstratable version.
X * 
X * Revision 1.1  89/09/07  21:06:14  lee
X * Initial revision
X * 
X */
@@@End of lq-text/src/lqtext/lqword.c
echo x - lq-text/src/lqtext/matchword.sh 1>&2
sed 's/^X//' >lq-text/src/lqtext/matchword.sh <<'@@@End of lq-text/src/lqtext/matchword.sh'
X:
X# matchword pattern [...] -- grep for words in the database
X#
X# matchword -- Copyright 1990 Liam R. Quin.  All Rights Reserved.
X# This code is NOT in the public domain.
X# See the file ../COPYRIGHT for full details.
X#
X# $Id: matchword.sh,v 1.2 90/10/06 00:51:02 lee Rel1-10 $
X#
X
X# "echo" portability test:
XN=; C='\c'; if [ x"`echo -n hello`" = x"hello" ]; then N=-n;C=; fi
Xexport N C
X
Xans=no
Xwhile [ x"$ans" != x"q" ]
Xdo
X    echo $N "Enter a word or pattern: $C"
X    read pattern
X    if [ x"$pattern" = x"q" ]
X    then
X	break
X    fi
X    WORDS=`lqword | grep "^${pattern}\$"`
X    if [ "$WORDS" = "" ]
X    then echo "(no match in the database for ${pattern})"
X    else echo `echo "$WORDS" | wc -l` words found:
X	 echo "$WORDS" | sort -d | rs | ${PAGER-more}
X	 # If you don't have rs, you could use cat instead.
X	 # PAGER could also be "pg -nse", or "less -q".
X    fi
Xdone
X
@@@End of lq-text/src/lqtext/matchword.sh
echo x - lq-text/src/lqtext/sizes.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/sizes.c <<'@@@End of lq-text/src/lqtext/sizes.c'
X/* sizes.c -- Copyright 1990 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X#ifndef lint
X static char *Rcs = "$Id: sizes.c,v 1.3 90/10/06 00:51:03 lee Rel1-10 $";
X#endif
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h>
X#include <sys/types.h>
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "pblock.h"
X#include "wordrules.h"
X#include "wordindex.h"
X
Xmain()
X{
X    printf("FileInfo  %u bytes\n", sizeof(t_FileInfo));
X    printf("WordInfo  %u bytes\n", sizeof(t_WordInfo));
X    printf("WordPlace %u bytes\n", sizeof(t_WordPlace));
X    printf("pblock    %u bytes\n", sizeof(t_pblock));
X}
@@@End of lq-text/src/lqtext/sizes.c
echo x - lq-text/src/lqtext/wordtable.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/wordtable.c <<'@@@End of lq-text/src/lqtext/wordtable.c'
X/* wordtable.c -- Copyright 1989, 1990 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file ../COPYRIGHT for full details.
X */
X
X/* Symbol Table Interface to text retrieval database.
X * Handles both the internal and external indexes.
X *
X * This originally used a linked list.  Converting to a hash table reduced
X * the time to index comp.os.vms from nearly an hour to one and a half
X * minutes...
X *
X * Liam Quin, 1989
X */
X
X/* 
X * $Id: wordtable.c,v 2.11 91/02/20 19:07:37 lee Rel1-10 $
X */
X
X#ifndef lint
X static char *Rcs = "$Id: wordtable.c,v 2.11 91/02/20 19:07:37 lee Rel1-10 $";
X#endif
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#ifdef SYSV
Xextern int _filbuf();
X#endif
X#include <stdio.h>
X#include <malloc.h>
X#include <ctype.h>
X#include <sys/types.h>
X#include <fcntl.h> /* for O_RDWR wtc */
X#include "smalldb.h"
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "pblock.h"
X#include "wordrules.h"
X#include "emalloc.h"
X
X#define HASHSIZ 32768 /* MUST be a power of two */
X
X#ifndef MAXWORDSINCACHE
X# define MAXWORDSINCACHE  (HASHSIZ * 10)
X#endif
Xint MaxWordsInCache = MAXWORDSINCACHE;
X
Xextern int AsciiTrace;
X
X/* useful macros */
X#define NumberOfElements(array, type) (sizeof(array)/sizeof(type))
X#define STRCMP(a,b) ((*(a) > *(b)) ? 1 : ((*(a) < *(b)) ? -1 : strcmp(a,b)) )
X/* #define Hash(WordInfo) \
X *	(dbm_hash(WordInfo->Word, WordInfo->Length) % HashSize)
X */
X
X/** System calls and library functions used in this file: **/
X
X/** Lqtext calls */
Xextern unsigned int Putpblock();
Xextern void DeleteWordPlaces();
X
X/** System calls: */
X
X/** Library Functions: */
Xextern char *strncpy();
Xextern int strcmp();
Xextern void perror();
Xextern void exit();
X/**/
X
X#define enew(var, type) (var = (type *) emalloc(sizeof (type)))
X
Xextern char *progname;
Xstatic int HashSize = HASHSIZ; /* MUST be a power of two */
X
X#ifdef NEWSYM
X
X#define NPLACES 7
X/* THis is small to optimise the common case -- by far the majority of
X * words are used less than 10 times.  In the cases where we've gone
X * wrong, well, there'll be a few thousand.
X */
X
Xtypedef struct s_HashEl {
X    char *Word;
X    t_WID WID;
X    int PlacesUsed;
X    t_WordPlace Places[NPLACES];
X    struct s_HashEl *Next;
X} t_HashEl;
X
Xstatic t_HashEl *SymbolTable;
Xstatic t_HashEl *LastEl;
Xstatic int WordsInCache = 0;
X
XStartHash()
X{
X    if (MaxWordsInCache) HashSize = MaxWordsInCache / 16;
X    SymbolTable = (t_HashEl *) emalloc(sizeof(t_HashEl) * HashSize);
X    /* Note that we only need to initialise the Word pointers... */
X    for (LastEl = SymbolTable; LastEl != &SymbolTable[HashSize]; LastEl++) {
X	LastEl->Word = (char *) 0;
X    }
X    /* ASSERT: LastEl == &SymbolTable[HashSize] */
X    MaxWordsInCache = HashSize;
X}
X
XSetElEmpty(El)	/* Initialisation function for Hash Elements */
X    t_HashEl *El;
X{
X    El->Word = (char *) 0;
X    El->WID = (t_WID) -1;
X		/* NOT zero, so we can distinguish between unknown and
X		 * "haven't looked"
X		 */
X    El->PlacesUsed = 0;
X    El->Next = (t_HashEl *) 0;
X}
X
Xvoid DumpCache();
X
Xvoid
XAddWord(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    register t_HashEl *HashEl;
X    int Slot;
X    t_HashEl *FirstEl;
X
X    if (!WordInfo || !WordInfo->Word || !WordInfo->Word[0]) {
X	(void) fprintf(stderr, "%s: warning: Null Word in AddWord\n", progname);
X	return;
X    }
X
X    if (!LastEl) {
X	StartHash();
X    } else if (MaxWordsInCache && ++WordsInCache > MaxWordsInCache) {
X	DumpCache(1);
X    }
X
X    if (WordInfo->Word[0] == 'q') {
X	register char *xp;
X
X	for (xp = &WordInfo->Word[1]; *xp && *xp == 'x'; xp++) {
X	    /*NULLBODY*/
X	}
X	if (!*xp) {
X	    if (AsciiTrace >= 10) {
X		(void) fprintf(stderr, "Discard %d\n", WordInfo->Word);
X	    }
X	    return;
X	}
X    }
X
X    Slot = Hash(WordInfo);
X    FirstEl = HashEl = &SymbolTable[Slot];
X
X
X    for (;;) {
X	if (!HashEl->Word) {
X	    extern char *strcpy();
X	    extern t_WID Word2WID();
X
X	    if (AsciiTrace > 9) {
X		(void) fprintf(stderr, "New ", WordInfo->Word);
X	    }
X	    /* make a new element */
X	    SetElEmpty(HashEl);
X	    HashEl->Word = emalloc(WordInfo->Length + 1);
X	    (void) strcpy(HashEl->Word, WordInfo->Word);
X	    /**
X	    HashEl->WID = (t_WID) -1;
X	    **/
X	    HashEl->WID = Word2WID(HashEl->Word, WordInfo->Length);
X	    /** **/
X	    break;
X	} else if (STREQ(HashEl->Word, WordInfo->Word)) {
X	    break;
X	}
X
X	if (++HashEl == LastEl) HashEl = SymbolTable;
X
X	if (HashEl == FirstEl) {
X	    /* We need to dump the cache and start again */
X	    DumpCache(1);
X	    AddWord(WordInfo);
X	    return;
X	}
X    }
X    /* If we get here, all we need to do is add the WordPlace */
X    if (AsciiTrace > 9) {
X	(void) fprintf(stderr, "AddWord %s\n", WordInfo->Word);
X    }
X    FirstEl = HashEl;
X
X    while (HashEl->PlacesUsed >= NPLACES && HashEl->Next != (t_HashEl *) 0) {
X	HashEl = HashEl->Next;
X    }
X
X    if (HashEl->PlacesUsed >= NPLACES) {
X	t_HashEl *New;
X
X	New = (t_HashEl *) malloc(sizeof(t_HashEl));
X	SetElEmpty(New);
X
X	New->Next = FirstEl->Next;
X	FirstEl->Next = HashEl = New;
X    }
X    HashEl->Places[HashEl->PlacesUsed] = WordInfo->WordPlace; /* structure copy */
X    HashEl->PlacesUsed++;
X    return;
X}
X
Xvoid
XDumpCache(CallFree)
X    int CallFree;
X{
X    register t_HashEl *HashEl, *MeNext;
X    int Progress = 0;
X
X    for (HashEl = SymbolTable; HashEl != LastEl; HashEl++) {
X	if (HashEl->Word) {
X	    extern t_WordInfo *MakeWordInfo();
X	    unsigned len;
X	    t_WordInfo *WP;
X
X	    /* We are going to make a new index entry for the word.
X	     * There are two cases -- depending on whether the word
X	     * is already indexed or not.
X	     * In the former case we must merge the new information.
X	     * In the latter case we don't have to read the old info,
X	     * but we must make a new entry in the WID Index.
X	     */
X
X	    len = strlen(HashEl->Word);
X	    if (HashEl->WID == (t_WID) -1) {
X		HashEl->WID = Word2WID(HashEl->Word, len);
X	    }
X	    WP = MakeWordInfo(HashEl->WID, len, HashEl->Word);
X
X	    if (HashEl->WID == (t_WID) 0) {
X		NewEntry(HashEl, WP);
X	    } else {
X		UpdateEntry(HashEl, WP);
X	    }
X	    /* Reclaim storage */
X	    if (CallFree) {
X		extern void SlayWordInfo();
X		register t_HashEl *FreeMe = HashEl;
X
X		(void) SlayWordInfo(WP);
X
X		efree(HashEl->Word);
X		FreeMe->Word = (char *) 0;
X		FreeMe = FreeMe->Next; /* don't do the first one */
X		while (FreeMe) {
X		    MeNext = FreeMe->Next;
X		    (void) efree((char *) FreeMe);
X		    FreeMe = MeNext;
X		}
X	    }
X	}
X	if (AsciiTrace > 1) {
X	    if (HashEl - SymbolTable >= Progress * (HashSize / 16)) {
X		fputc(" 01234567890ABCDEFGHIJKL"[Progress], stderr);
X		++Progress;
X	    }
X	}
X    }
X    WordsInCache = 0;
X}
X
XNewEntry(HashEl, WP)
X    t_HashEl *HashEl;
X    t_WordInfo *WP;
X{
X    extern t_WID GetNextWID();
X    t_pblock *pblock;
X    long MatchCount;
X    t_HashEl *Ep;
X
X    /** Assign a new WID */
X    WP->WID = GetNextWID();
X
X    /** make a WIDIndex entry and mark it as invalid (NOTDONE) */
X
X    /* In order to do this, we must make a "pblock", a structure that
X     * reflects the physical database.  This is fairly low-level stuff
X     * for efficiency's sake...
X     */
X
X    /* count the total number of entries we're adding: */
X    for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
X	MatchCount += Ep->PlacesUsed;
X    }
X
X    /* allocate a pblock structure.  These are rather devious things, a
X     * structure with an array tacked onto the end.
X     */
X    pblock = (t_pblock *) emalloc(sizeof(t_pblock) +
X				MatchCount * sizeof(t_WordPlace));
X    
X    pblock->WID = WP->WID;
X    pblock->ChainStart = 0L; /* address on disk -- not there yet, so 0! */
X    pblock->NumberOfWordPlaces = WP->NumberOfWordPlaces = MatchCount;
X
X    /* fill in the WordPlaces */
X    for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
X	register int i;
X
X	for (i = 0; i < Ep->PlacesUsed; i++) {
X	    pblock->WordPlaces[MatchCount++] = Ep->Places[i]; /* struct copy */
X	}
X    }
X
X    /* Now fill in enough of WP to let us use the low-level routines: */
X    WP->FID = (t_FID) 0;
X    WP->Next = (t_WordInfo *) 0;
X    WP->DataBlock = (char *) 0;
X    WP->WordPlaceStart = (char *) 0;
X    WP->WordPlaces = (t_WordPlace *) 0;
X    WP->WordPlacesInHere = 0;
X    WP->WordPlace.FID = 0;
X    WP->WordPlace.Flags = 0;
X    WP->Offset = 0;
X
X    /* First, let's make an index entry: */
X#ifndef MaxWordPlacesInAWordBlock
X# define MaxWordPlacesInAWordBlock ((WIDBLOCKSIZE-(WP->Length+2)/3))
X#endif
X    if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
X	(void) MkWIB(WP, pblock);
X    }
X
X    /** write out the new entry */
X    if (WP->WordPlacesInHere == pblock->NumberOfWordPlaces) {
X	/* In this case it all fits into the main index */
X	if (PutWordInfoIntoIndex(WP, (unsigned long) 0L) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't insert word \"%s\" into the index",
X				progname, WP->Word);
X	    perror("");
X	    exit(1);
X	}
X    } else {
X	(void) Putpblock(WP, pblock);
X	if (PutWordInfoIntoIndex(WP, pblock->ChainStart) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't re-insert word \"%s\" into the index",
X				progname, WP->Word);
X	    perror("");
X	    exit(1);
X	}
X    }
X
X    /** mark it as valid (NOTDONE) */
X
X    /** reclaim storage */
X    (void) efree((char *) pblock);
X    /* the caller *must* do SlayWordInfo(WP) */
X}
X
XUpdateEntry(HashEl, WP)
X    t_HashEl *HashEl;
X    t_WordInfo *WP;
X{
X    extern t_pblock *Getpblock();
X    extern t_WordInfo *WID2WordInfo();
X    t_pblock *pblock;
X    long MatchCount;
X    t_HashEl *Ep;
X    t_WordInfo *Wpp;
X
X    /** Mark the old entry as invalid (NOTDONE) */
X
X    /** get the old entry */
X    if ((Wpp = WID2WordInfo(WP->WID)) == (t_WordInfo *) 0) {
X	/* someone else has just deleted it! */
X	NewEntry(HashEl, WP);
X	return;
X    }
X    /* It would be best if we could append to the old entry... which is what
X     * I had in mind when I designed the disk storage stuff... but you can't.
X     */
X    pblock = Getpblock(Wpp);
X
X    /** merge the old and new entries */
X
X    /* count the total number of entries we're adding: */
X    for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
X	MatchCount += Ep->PlacesUsed;
X    }
X
X    pblock = (t_pblock *) erealloc((char *) pblock, sizeof(t_pblock) +
X	     (Wpp->NumberOfWordPlaces + MatchCount) * sizeof(t_WordPlace));
X
X    /* delete the old entry from disk */
X    if (Wpp->Offset) {
X	DeleteWordPlaces(Wpp->Offset, Wpp->WID);
X    }
X
X    /* fill in the WordPlaces */
X    for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
X	register int i;
X
X	for (i = 0; i < Ep->PlacesUsed; i++) {
X	    pblock->WordPlaces[pblock->NumberOfWordPlaces++] =
X					Ep->Places[i]; /* struct copy */
X	}
X    }
X
X    Wpp->Offset = 0L; /* it's invalid now... */
X    Wpp->WordPlacesInHere = 0;
X
X    /* First, let's make an index entry: */
X    if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
X	(void) MkWIB(WP, pblock);
X    }
X
X    /** write out the new entry */
X    if (Wpp->WordPlacesInHere == pblock->NumberOfWordPlaces) {
X	/* In this case it all fits into the main index */
X	if (PutWordInfoIntoIndex(Wpp, (unsigned long) 0L) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't insert word \"%s\" into the index",
X				progname, Wpp->Word);
X	    perror("");
X	    exit(1);
X	}
X    } else {
X	(void) Putpblock(Wpp, pblock);
X	if (PutWordInfoIntoIndex(Wpp, pblock->ChainStart) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't re-insert word \"%s\" into the index",
X				progname, Wpp->Word);
X	    perror("");
X	    exit(1);
X	}
X    }
X
X    /** mark it as valid (NOTDONE) */
X
X    /** reclaim storage */
X    (void) efree((char *)pblock);
X    /* the caller *must* do SlayWordInfo(WP) */
X    (void) SlayWordInfo(Wpp);
X}
X
X#else /* NEWSYM */
Xstatic t_WordPlaceList *SymbolTable[HASHSIZ]; /* static --> initialised to 0 */
X#endif /* NEWSYM */
X
X#ifdef __GNU__
Xinline
X#endif
X#ifndef Hash
Xint
XHash(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    register unsigned long n = 0;
X    register int len = WordInfo->Length;
X    register char *str = WordInfo->Word;
X
X#ifdef DUFF /* clever stuff for speedup... dmr-approved!... */
X
X#define HASHC	n = *str++ + 65599 * n
X
X    if (len > 0) {
X	register int loop = (len + 8 - 1) >> 3;
X
X	switch(len & (8 - 1)) {
X	case 0:	do {
X		HASHC;	case 7:	HASHC;
X	case 6:	HASHC;	case 5:	HASHC;
X	case 4:	HASHC;	case 3:	HASHC;
X	case 2:	HASHC;	case 1:	HASHC;
X		} while (--loop);
X	}
X
X    }
X#else /* DUFF */
X    while (len--)
X	n = *str++ + 65599 * n;
X#endif /* DUFF */
X    /**
X    return n & (HashSize - 1);
X    **/
X    return n % HashSize;
X}
X#endif
X
Xstatic int HashOK = 0;
X
Xvoid
XInitHash()
X{
X    HashOK = 1;
X}
X
X#ifndef NEWSYM
Xstatic int WordsInCache = 0;
X
X/* FIXME: this ought to taks a WordInfo and a WordPlaceList instead.
X * Using a hash table means that we can end up with really pathalogical
X * paging pehaviour.  Nearly all of lqaddfile is resident when running
X * on a Sun.  Hence, I shall be replacing this code entirely soon with
X * something that has less memory fragmentation, perhaps by coalescing
X * list members or with a tree.
X * For now, MaxWordsInCache is a parameter that you can set to zero if
X * you want.
X *
X * Also, the cache structure should be cleaver enough to avoid writing
X * out the more common words if it can, so as to minimise the number
X * of data _fetches_ that have to be done.
X * You could also argue that it should be more efficient to add new data,
X * of course.  I couldn't disagree.
X *
X * Next change required is to make AddWord do a little more of the work --
X * in particular, to call Word2WID for each new word, in an attempt to
X * make cache dumping faster.
X */
X
XAddWord(WordInfo) /* old version */
X    t_WordInfo *WordInfo;
X{
X    int Slot;
X    int GreaterOrLess = 1;
X    t_WordPlaceList *SaveOldNext;
X    t_WordPlaceList **WPL;
X
X    if (!HashOK) InitHash();
X
X    /* The following are all awfully serious internal errors.
X     * They will only happen if I make a huge coding error, whereupon
X     * they tend to happen for every word in the input...
X     */
X    if (!WordInfo) {
X	fprintf(stderr, "AddWord(0)\n");
X	return;
X    } else if (!WordInfo->Word) {
X	fprintf(stderr, "AddWord(Word=0)\n");
X	return;
X    } else if (!WordInfo->Word[0]) {
X	fprintf(stderr, "AddWord(Word[0]=0)\n");
X	return;
X#ifdef ASCIITRACE
X    } else if (AsciiTrace > 20) {
X	fprintf(stderr, "[%s.len %d]\n", WordInfo->Word, WordInfo->Length);
X#endif
X    }
X
X    Slot = Hash(WordInfo);
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 10) {
X	fprintf(stderr, "H %d %s\n", Slot, WordInfo->Word);
X    }
X#endif
X
X    if (WordInfo->Word[0] == 'q') {
X	register char *p = WordInfo->Word;
X
X	/* Words of the form qxxxxx* are not indexed.  This is so the filters
X	 * can preprocess the files without upsetting the word counts.
X	 * If you can think of a better way to do this, well, tell me!
X	 * Lee
X	 */
X	
X	for (++p; p - WordInfo->Word < WordInfo->Length; p++) {
X	    if (*p != 'x') break;
X	}
X
X	if (p - WordInfo->Word == WordInfo->Length) {
X#ifdef ASCIITRACE
X	    if (AsciiTrace > 10) {
X		(void) fprintf(stderr, "rejected %s (too boring)\n",
X					WordInfo->Word);
X	    }
X#endif
X	    return;
X	}
X    }
X
X    for (WPL = &SymbolTable[Slot]; *WPL; WPL = &((*WPL)->Next)) {
X	if ((GreaterOrLess = STRCMP((*WPL)->Word, WordInfo->Word)) <= 0) {
X	    break;
X	}
X    }
X
X    /* Insert the new word at the head of the Word Chain,
X     * i.e. at the start of the group of similar words
X     */
X    SaveOldNext = *WPL;
X
X    enew(*WPL, t_WordPlaceList);
X    (*WPL)->WordPlace = WordInfo->WordPlace; /* structure copy */
X    (*WPL)->WordPlace.FID = WordInfo->WordPlace.FID;
X    (*WPL)->Next = SaveOldNext;
X
X    if (GreaterOrLess || !SaveOldNext) {
X	(*WPL)->Word = emalloc(WordInfo->Length + 1);
X	(void) strncpy((*WPL)->Word, WordInfo->Word, (int) WordInfo->Length);
X	(*WPL)->Word[WordInfo->Length] = '\0';
X    } else {
X	/* The word is already saved, so we only need to link to it */
X	(*WPL)->Word = SaveOldNext->Word;
X    }
X    if (MaxWordsInCache && ++WordsInCache > MaxWordsInCache) {
X	void DumpCache();
X
X	DumpCache(1);
X	WordsInCache = 0;
X    }
X}
X
Xvoid
XDumpCache(CallFree)
X    int CallFree; /* call efree() if non-zero */
X{
X    extern int WriteWordChain();
X
X    register int Slot;
X    register t_WordPlaceList *WordPlaceList;
X    int WordsLeft = WordsInCache;
X    int EmptySlots = 0, UsedSlots = 0;
X    int Progress = 0;
X
X    if (WordsInCache == 0) return; /* save some work maybe */
X
X    if (AsciiTrace) {
X	fprintf(stderr, "Writing%s%d words\n",
X			(CallFree) ? " and freeing " : " ", WordsInCache);
X    }
X
X    for (Slot = 0; WordsLeft > 0 && Slot < HASHSIZ; Slot++) {
X
X	if (AsciiTrace > 1) {
X	    if (Slot >= Progress * (HASHSIZ / 16)) {
X		fputc(" 01234567890ABCDEFGHIJKL"[Progress], stderr);
X		++Progress;
X	    }
X	}
X	if (SymbolTable[Slot] == (t_WordPlaceList *) 0) {
X	    ++EmptySlots;
X	    continue;
X	} else {
X	    char *LastFreed = (char *) 0;
X
X	    ++UsedSlots;
X	    WordPlaceList = SymbolTable[Slot];
X	    WordsLeft -= WriteWordChain(WordPlaceList);
X
X	    if (CallFree) {
X		while (WordPlaceList) {
X		    register t_WordPlaceList *SavePointer;
X
X		    if (WordPlaceList->Word &&
X					WordPlaceList->Word != LastFreed) {
X			efree(WordPlaceList->Word);
X			LastFreed = WordPlaceList->Word;
X		    }
X
X		    SavePointer = WordPlaceList->Next;
X		    efree((char *) WordPlaceList);
X		    WordPlaceList = SavePointer;
X		}
X		SymbolTable[Slot] = (t_WordPlaceList *) 0;
X	    }
X	}
X    }
X
X    if (AsciiTrace) {
X	double d = UsedSlots;
X	d /= (EmptySlots + UsedSlots);
X	d *= 100.0;
X
X	fprintf(stderr, "%4.3f%% cache used -- %d out of (%d <= %d)\n",
X			d, UsedSlots, UsedSlots + EmptySlots, HASHSIZ);
X#ifdef MALLOCTRACE
X	mallocmap();
X#endif
X    }
X
X    if (WordsInCache != 0 && CallFree) {
X	WordsInCache = 0;
X    }
X}
X
X#endif /*!NEWSYM*/
X
X/*
X * $Log:	wordtable.c,v $
X * Revision 2.11  91/02/20  19:07:37  lee
X * The qxxx fix only worked if ASCIITRACE was defined!
X * 
X * Revision 2.10  90/10/06  00:51:05  lee
X * Prepared for first beta release.
X * 
X * Revision 2.9  90/10/05  23:44:30  lee
X * Major experimentation with new symbol table failed...
X * 
X * Revision 2.8  90/09/26  19:45:02  lee
X * Added call to mallocmap() in ifdef MALLTRACE.
X * 
X * Revision 2.7  90/09/20  18:58:25  lee
X * Added some comments, and deleted a needless test.  Reorderered a loop
X * in the (probably vain) hope of a speed-up in the face of paging...
X * 
X * Revision 2.6  90/09/19  20:25:44  lee
X * Don't index "qxxxxxxxx" words (this is a hook for filters...)
X * 
X * Revision 2.5  90/08/29  21:46:11  lee
X * Alpha release
X * 
X * Revision 2.4  90/08/09  19:17:37  lee
X * BSD lint and Saber
X * 
X * Revision 2.3  90/03/21  17:32:31  lee
X * new hashing function, masses, masses better -- the old one only ever
X * used abuot 6% of the available values!
X * 
X * Revision 2.2  89/10/08  20:47:47  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:16:22  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.3  89/09/17  23:05:15  lee
X * Various fixes; NumberInBlock now a short...
X * 
X * Revision 1.2  89/09/16  21:18:55  lee
X * First demonstratable version.
X * 
X * Revision 1.1  89/09/07  21:06:20  lee
X * Initial revision
X * 
X */
@@@End of lq-text/src/lqtext/wordtable.c
echo x - lq-text/src/menu/Makefile 1>&2
sed 's/^X//' >lq-text/src/menu/Makefile <<'@@@End of lq-text/src/menu/Makefile'
X# Makefile for simple curses-based menu interface.
X#
X# $Id: Makefile,v 1.3 90/10/06 01:28:02 lee Rel1-10 $
X
XPWD=menu
X
X# PERFORMANCE makes curses go faster
XEXTRA=-I../h -DPERFORMANCE
XOPT=-O -g
XDEFS= -DASCIITRACE -UBSD -DSYSV
XWHICHDBM=sdbm
X# change the next three lines to be the same as the lq-text definitions.
XDBMLIBS=$(LIBDIR)/libsdbm.a
XBCOPY=bcopy.o
X# DBMLIBS=-lndbm -linet # 386/ix with hbtcpip provides a good bcopy()
X
XCFLAGS= $(OPT) $(DEFS) -UBSD -DSYSV $(GCCF) -D$(WHICHDBM) $(EXTRA)
XCC=gcc
XTERMCAP=-lcurses
XRANLIB=ranlib
X
XTEXT=lqtext
XPROG=m # a simple example of using the library...
XPROGOBJS=example.o
XPROGSRC=example.c
XLIBDIR=../lib
XBINDIR=../bin
XLIAMLIB=$(LIBDIR)/liblq.a
XLQTEXTLIB=$(LIBDIR)/liblqtext.a
XMENULIB=liblqmenu.a
X
XPROGS=$(PROG) $(TEXT) # removed by make clean
X
XOBJS=menu.o error.o stringbox.o OldCurses.o
XSRCS=menu.c error.c stringbox.c OldCurses.c
X
Xall: $(TEXT) m
X
Xinstall: $(MENULIB) $(TEXT)
X	cp $(MENULIB) $(LIBDIR)/$(MENULIB)
X	cp $(TEXT) $(BINDIR)/$(TEXT)
X	strip $(BINDIR)/$(TEXT)
X
X$(TEXT): text.o $(LQTEXTLIB) $(LIAMLIB) $(MENULIB) $(BCOPY)
X	$(CC) $(CFLAGS) -o $(TEXT) text.o $(BCOPY) \
X	$(MENULIB) $(LQTEXTLIB) $(DBMLIBS) $(LIAMLIB) $(TERMCAP)
X
X$(MENULIB): $(OBJS)
X	rm -f $(MENULIB)
X	ar rv $(MENULIB) $(OBJS)
X	$(RANLIB) $(MENULIB)
X
X$(PROG): $(PROGOBJS)
X	$(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(PROGOBJS) $(BCOPY) $(TERMCAP)
X
Xlint$(PROG): $(OBJS) $(SRCS) $(PROGSRCS)
X	lint $(CFLAGS) $(SRCS) $(TERMCAP) 2>&1 | tee lint$(PROG)
X
X# Tidy should leave the final executables, but otherwise remove all
X# generated files
Xtidy:
X	/bin/rm -f *.o core make.log .mk m.log
X
X# Clean should revert to a distribution state as far as possible
Xclean:
X	/bin/rm -f *.o core *.a $(PROGS) $(CHARGEN) make.log .mk m.log
X
Xtext.o: text.c
X	$(CC) $(CFLAGS) $(TEXTINC) -c text.c
X
X
Xdepend:
X	mkdep $(CFLAGS) *.c
X
X#
X# $Log:	Makefile,v $
X# Revision 1.3  90/10/06  01:28:02  lee
X# deleted mkdep output.
X# 
X# Revision 1.2  90/10/01  20:33:09  lee
X# Added BSD compatibility hooks and improved "make clean".
X# 
X# Revision 1.1  90/08/29  21:48:48  lee
X# Initial revision
X# 
X# Revision 2.1  89/08/07  13:52:22  lee
X# First fully working release; this is the basis for all
X# future development.
X# 
X# Revision 1.2  89/08/04  17:59:23  lee
X# Fully working with Basic Functionality.
X# Scrolling menubar, scrolling menus, moveable Info windows.
X# 
X# Revision 1.1  89/07/27  11:41:39  lee
X# Initial revision
X#
X 
X# DO NOT DELETE THIS LINE -- mkdep uses it.
X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
X
X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
@@@End of lq-text/src/menu/Makefile
echo x - lq-text/src/menu/README 1>&2
sed 's/^X//' >lq-text/src/menu/README <<'@@@End of lq-text/src/menu/README'
XThis directory contains as much as necessary of my curses menu library
Xto demonstrate lq-text.  (and I just saw some more files I could remove...)
XThis is a fairly simple curses-based front end to the lq-text text retrieval
Xsoftware.
X
XIt has only been tested on System V Release 3.2, and almost certainly will
Xnot work on anything else without at least a little effort.
X
XSee the notes on porting below if you want to try...
X
XPlease do not ask me for the rest of the package.  If I get the time and the
Xnecessary facilities, I will make it available.  I am hoping to have a
Xversion which will run with no source changes under X windows as well as
Xunder Curses, but it is not trivial...  What you see here is a hacked version,
Xin order to minimise what I post.  Sorry.
X
X
XTo install:
X
X(1) You need to have lq-text already.
X    Make it and test that it works, for example by indexing the unix man pages.
X    In particular, check that lqphrase works with two-word phrases.  If it
X    doesn't, there is little point in proceeding.
X
X(2) You will need to edit Makefile in this directory to point to the directory
X    containing the lq-text source:
X    Change the defintion of $(NX) as appropriate -- for example,
X	NX=../../../src/lq-text/src
X    or something.
X
X(3) make text
X
X(4) ln text lqtext (if you want)
X
X(5) try it.  If you don't have working function keys, you can use ESC
X    followed by a digit (e.g. ESC 1 is the same as F1), and when you are
X    entering phrases, ^D at the start of the line will take you back to the
X    main menu just as F1 does.
X
X    **>>> You will need to have lqshow in your path for this to work. <<<**
X    **>>> You will need want to set $LQTEXTDIR, or use the -d option.
X    see the man page for lq-text for command-line options to "text".
X
X    Try
X	from the File menu, select "new words"
X	(you can type 'x' for an explanation at any point in the menus)
X
X	type some words or phrases
X
X	select "match all" from the "All Words" menu
X
X	the numbers by the phrases indicate the number of matches;
X	you can then do "browse all" from the "All Words" menu.
X
X    To exit, press "q" from the main menu, or select "Finish" from the
X    Main Menu.
X
X    When you have typed 'x', a box will appear containing an explanation.
X    You can type 'x' at this point for an explanation of what to do with
X    the box... for example, you can move the explain-box around the sceen
X    or resize it if you want.  I have no idea why you would want to do
X    this, but it can be a little fun for people who are bored... and is
X    a facility that came for free from my curses/menu package...
X
X(6) You might also like to try making "m", and running examples/vsh, which
X    is a simple shell-script.  It is not meant to be a useful shell -- just
X    a tiny demo I wrote at home in some spare time...
X
X
X(7) Now investigate internal.h and menu.h if you want to change things.
X    If you change these, go back to step (3)
X
XLee
Xsq.com
XThu Dec 14 21:06:34 EST 1989
@@@End of lq-text/src/menu/README
echo x - lq-text/src/menu/bcopy.c 1>&2
sed 's/^X//' >lq-text/src/menu/bcopy.c <<'@@@End of lq-text/src/menu/bcopy.c'
X#ifdef BCOPYTEST
X# include <stdio.h>
X#endif
X
X/* this is a simple replacement for bcopy() where the native bcopy()
X * does not handle overlapping blocks.
X * do
X *	cc -DBCOPYTEST -o bcopy bcopy.c
X * and run "./bcopy" for a simple test.  You should get three
X * identical lines of output.
X */
X
Xbcopy(src, dest, nbytes)
X    char *dest;
X    char *src;
X    int nbytes;
X{
X    /* We have to be clever about this...
X     * If src < dest then we copy from the top down
X     * otherwise, copy from the bottom up...
X     */
X    
X    register char *p, *q;
X
X    if (src < dest) {
X	for (p = &src[nbytes - 1], q = &dest[nbytes - 1]; nbytes--; q--, p--) {
X	    *q = *p;
X	}
X    } else {
X	for (p = src, q = dest; nbytes--; p++, q++) {
X	    *q = *p;
X	}
X    }
X}
X
X#ifdef BCOPYTEST
Xmain()
X{
X    char buffer[4096];
X    char *s = "The naked children hugged each other";
X
X    puts(s); /* first line */
X    (void) sprintf(&buffer[12], "%s", s);
X    bcopy(&buffer[12], buffer, strlen(s) + 1);
X    printf("[%s]\n", buffer); /* 2nd line */
X    bcopy(buffer, &buffer[12], strlen(s) + 1);
X    printf("[%s]\n", &buffer[12]); /* 3rd line */
X}
X#endif
@@@End of lq-text/src/menu/bcopy.c
echo x - lq-text/src/menu/OldCurses.c 1>&2
sed 's/^X//' >lq-text/src/menu/OldCurses.c <<'@@@End of lq-text/src/menu/OldCurses.c'
X/* Compatibility routines for older versions of curses...
X * $Id: OldCurses.c,v 1.2 90/10/04 16:27:58 lee Rel1-10 $
X *
X */
X
X#include <curses.h>
X#include <ctype.h>
X
X#ifndef A_STANDOUT
X#include "oldcurses.h"
X
X#undef CONTROL
X#define CONTROL(c) (c ^ 64)
X
X#undef wgetch
X
Xchtype
XLqwgetch(w)
X    WINDOW *w;
X{
X    int ch = wgetch(w);
X
X    if (isprint(ch)) return ch;
X
X    switch (ch) {
X    case CONTROL('^'): return KEY_HOME;
X    case CONTROL('P'): return KEY_UP;
X    case CONTROL('N'): return KEY_DOWN;
X    case CONTROL('B'): return KEY_LEFT;
X    case CONTROL('F'): return KEY_RIGHT;
X    case CONTROL('X'): return KEY_HELP; /* Xplain.... (groan) */
X    case '\033': /* Escape */
X	(void) fprintf(stderr, "ESC\007");
X	(void) fflush(stderr);
X
X	switch (ch = wgetch(w)) {
X	case 0:		return KEY_F0;
X	case 1:		return KEY_F(1);
X	case 2:		return KEY_F(2);
X	case 3:		return KEY_F(3);
X	case 4:		return KEY_F(4);
X	case 5:		return KEY_F(5);
X	case 6:		return KEY_F(6);
X	case 7:		return KEY_F(7);
X	case 8:		return KEY_F(8);
X	case 9:		return KEY_F(9);
X	case 'a': case 'A':	return KEY_F(10);
X	case 'b': case 'B':	return KEY_F(11);
X	case 'c': case 'C':	return KEY_F(12);
X	case 'd': case 'D':	return KEY_F(13);
X	case 'e': case 'E':	return KEY_F(14);
X	case 'f': case 'F':	return KEY_F(15);
X	case 'h':	return KEY_HELP;
X	}
X	break;
X    }
X    return ch;
X}
X
Xvoid
Xbeep()
X{
X    (void) putc('\b', stderr);
X    (void) fflush(stderr);
X}
X
Xvoid
Xbox(win, vert, hor)
X    WINDOW *win;
X    int vert;
X    int hor;
X{
X#undef box
X    if (hor == 0) hor = ACS_HLINE;
X    if (vert == 0) vert = ACS_VLINE;
X    box(win, vert, hor);
X}
X
Xvoid
XLqattrset(win, attr)
X    WINDOW *win;
X    int attr;
X{
X    if (attr) {
X	wstandout(win);
X    } else {
X	wstandend(win);
X    }
X}
X
Xwnoutrefresh(win)
X    WINDOW *win;
X{
X    touchwin(win);
X}
X#endif
X
X/* $Log:	OldCurses.c,v $
X * Revision 1.2  90/10/04  16:27:58  lee
X * SysV compat improved.
X * 
X * Revision 1.1  90/10/03  21:54:04  lee
X * Initial revision
X * 
X *
X */
@@@End of lq-text/src/menu/OldCurses.c
echo x - lq-text/src/menu/oldcurses.h 1>&2
sed 's/^X//' >lq-text/src/menu/oldcurses.h <<'@@@End of lq-text/src/menu/oldcurses.h'
X/* oldcurses.h -- compatibility with pre-System V.3 curses...
X * $Id: oldcurses.h,v 1.2 90/10/04 16:28:31 lee Rel1-10 $
X */
X
Xtypedef int chtype;
X
X#define ACS_LARROW	'>'
X#define ACS_RARROW	'<'
X#define ACS_HLINE	'='
X#define ACS_VLINE	'|'
X#define ACS_LRCORNER	'+'
X#define ACS_LLCORNER	'+'
X
X/* Line drawing: */
X#define ACS_BSSS	'+'	/* T-piece */
X#define ACS_SBSS	'+'	/* -| */
X#define ACS_SSBS	'+'	/* inverted T-piece */
X#define ACS_SSSB	'+'	/* |- */
X#define ACS_BBSS	'+'	/* top right corner */
X#define ACS_BSSB	'+'	/* bottom left corner */
X
X#define KEY_DOWN	257
X#define KEY_UP  	258
X#define KEY_LEFT	259
X#define KEY_RIGHT	260
X#define KEY_HELP	261
X#define KEY_HOME	262
X#define KEY_F0		300
X#define KEY_F(n)	(KEY_F0+n)
X
X#undef getch
X#define getch()	Lqwgetch(stdscr)
X#define wgetch	Lqwgetch
X
X#undef box
X#define box LqBox
X
X#define A_STANDOUT 1
X#undef standout
X#undef standend
X#define wattrset Lqattrset
X#define attrset(a) Lqattrset(stdscr, a)
X#define keypad(win, bool)	1 /* ignore this one please */
X
X/* $Log:	oldcurses.h,v $
X * Revision 1.2  90/10/04  16:28:31  lee
X * SysV compat improved.
X * 
X * Revision 1.1  90/10/03  21:56:32  lee
X * Initial revision
X * 
X *
X */
@@@End of lq-text/src/menu/oldcurses.h
echo end of part 08
-- 
Liam R. E. Quin,  lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337