[alt.sources] lq-text Full Text Retrieval Database Part 03/13

lee@sq.sq.com (Liam R. E. Quin) (03/04/91)

: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 03
echo x - lq-text/src/UseHash
cat > lq-text/src/UseHash << 'barefoot_choirboy'
# Run this if you want to use the BSD hash package (ozmahash)
cd src
cp ozmahash/*.h h
cp ozmahash/ndbm.h h/ozmahash.h
barefoot_choirboy
chmod +x UseHash
echo x - lq-text/src/liblqtext/Defaults.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/Defaults.c <<'@@@End of lq-text/src/liblqtext/Defaults.c'
X/* Defaults.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X *
X * $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
X *
X * $Log:	Defaults.c,v $
X * Revision 1.7  90/10/06  00:11:37  lee
X * Prepared for first beta release.
X * 
X * Revision 1.6  90/08/29  21:46:25  lee
X * Alpha release.
X * 
X * Revision 1.5  90/08/09  19:16:08  lee
X * *** empty log message ***
X * 
X * Revision 1.4  90/04/21  17:26:26  lee
X * now passes gcc -W (before Canada...)
X * 
X * Revision 1.3  90/03/23  17:58:57  lee
X * Integrated with globals.h and added a few more comments.
X * Also fixed a bug whereby the configuration file over-rode both
X * command-line options and environment variables!
X * 
X * Revision 1.2  90/03/20  20:52:38  lee
X * removed some globals...
X * 
X *
X */
X
X#define DefineThem /* turn externs off so we do initialisations here */
X# include "globals.h" /* defines and declarations for database filenames */
X#undef DefineThem
X#undef EXTERN
X#include <fcntl.h>
X#include <errno.h>
X#ifdef SYSV
Xextern int _filbuf(); /* this must appear before stdio.h is included... */
X#endif
X#include <stdio.h>
X#include <malloc.h>
X#include <ctype.h>
X#include "emalloc.h"
X#include <sys/types.h>
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "phrase.h"
X
X/* $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
X *
X * This file is part of nx-text, Liam Quin's text retrieval package.
X *
X * Defaults.c -- set up filenames etc. from defaults + cmd line + env.
X *
X * -DUNDERHOME is used here, as is DEFAULTCOMMONWORDS, etc. from Makefile.
X * See comments in Makefile.
X *
X * $Log:	Defaults.c,v $
X * Revision 1.7  90/10/06  00:11:37  lee
X * Prepared for first beta release.
X * 
X * Revision 1.6  90/08/29  21:46:25  lee
X * Alpha release.
X * 
X * Revision 1.5  90/08/09  19:16:08  lee
X * *** empty log message ***
X * 
X * Revision 1.4  90/04/21  17:26:26  lee
X * now passes gcc -W (before Canada...)
X * 
X * Revision 1.3  90/03/23  17:58:57  lee
X * Integrated with globals.h and added a few more comments.
X * Also fixed a bug whereby the configuration file over-rode both
X * command-line options and environment variables!
X * 
X * Revision 1.2  90/03/20  20:52:38  lee
X * removed some globals...
X * 
X *
X */
X
X/* System and Library calls used in this function:
X *
X */
Xextern int open(), close();
Xextern void exit();
X
Xextern int atoi(), strcmp(), strlen();
X#ifndef tolower
X extern int tolower();
X#endif
Xextern char *strcpy();
Xextern void perror();
Xextern int ReadCommonWords(), IsDir();
Xint cknatstr();
Xstatic int NextChar();
Xstatic void ReadDefaultFile();
X
X
Xtypedef enum {
X    FW_Cmdline,
X    FW_Envvar,
X    FW_Default, /* use the default */
X    FW_File, /* from the config file */
X    FW_None /* don't use any at all */
X} t_FromWhere;
X
Xstatic t_FromWhere DirFromWhere = FW_Default;
Xstatic t_FromWhere CommonFromWhere = FW_Default;
Xstatic t_FromWhere DocFromWhere = FW_Default;
X
Xextern int MakeDocPath(); /* hand it DOCPATH... */
Xextern int AsciiTrace;
X
Xchar *mkdbm();
Xchar *joinstr3();
X
X/* should PCM_HalfCase be in globals.h??? */
Xt_PhraseCaseMatch PhraseMatchLevel = PCM_HalfCase;
X
Xvoid
XSetDefaults(argc, argv)
X    int argc;
X    char **argv;
X{
X    extern char *getenv();
X    extern char *progname;
X    char *p;
X
X    /* main() should have set progname.  If it didn't. we don't strip
X     * the leading / as this is (I hope!) a testing and not a production
X     * version... and an early test at that!
X     */
X    if (!progname || !*progname) progname = argv[0];
X
X    /* loop over arguments, looking for
X     * -d -- set directory for database
X     * -c -- common words file
X     *
X     * don't use getopts, as we'll be using that later in main(),
X     * and it doesn't like being called twice.
X     * As a result, main() should ignore the z: option.
X     */
X    while (--argc > 0) {
X	if (**++argv == '-' || **argv == '+') {
X	    char TurnOn = (**argv == '-');
X
X	    switch((*argv)[1]) {
X	    case 'm': /* precise matching */
X		argv[0][1] = 'z'; /* so it gets ignored by getopt */
X
X		if (!*(p = &argv[0][2])) {
X		    if (argc > 1) {
X			argc--; argv++;
X			p = (*argv);
X		    } else {
X			fprintf(stderr,
X			"%s: -m must be followed by a, h or p; see -x\n",
X								progname);
X			exit(1);
X		    }
X		}
X		if (p[1]) {
X		    fprintf(stderr,
X		    "%s: -m must be followed by a, h or p, not \"%s\"\n",
X								progname, p);
X		}
X
X		switch (*p) {
X		case 'p': /* precise */
X		    PhraseMatchLevel = PCM_SameCase;
X		    break;
X		case 'h': /* heuristic */
X		    PhraseMatchLevel = PCM_HalfCase;
X		    break;
X		case 'a': /* any, approxmate */
X		    PhraseMatchLevel = PCM_AnyCase;
X		    break;
X		default:
X		    fprintf(stderr,
X		    "%s: -m must be followed by \"p\", \"h\" or \"a\";\n",
X								    progname);
X		    fprintf(stderr,
X		    "use %s -xv for more explanation.\n", progname);
X		    exit(1);
X		}
X		break;
X
X	    case 'v': /* -v is the same as -t1 */
X		argv[0][1] = 'Z'; /* so it gets ignored by getopt */
X		++AsciiTrace;
X		break;
X	    case 't': /* trace level */
X		argv[0][1] = 'z'; /* so it gets ignored by getopt */
X		if (argv[0][2] != '\0') {
X		    p = &argv[0][2];
X		} else {
X		    if (argc > 1) {
X			argc--;
X			p = (*++argv);
X		    } else {
X			p = "1";
X		    }
X		}
X		if (cknatstr(p)) {
X		    AsciiTrace = atoi(p);
X		} else {
X		    fprintf(stderr, "%s: -t: \"%s\" is not a number\n",
X								    progname, p);
X		    exit(1);
X		}
X		if (AsciiTrace <= 0) AsciiTrace = 1;
X		fprintf(stderr, "%s: trace level set to %d\n",
X							    progname, AsciiTrace);
X
X		break;
X	    case 'c': /* common file */
X		if (TurnOn) {
X		    CommonFromWhere = FW_Cmdline;
X		    argv[0][1] = 'z'; /* so it gets ignored by getopt */
X		    if ((*argv)[2] != '\0') {
X			CommonWordFile = &(*argv[2]);
X		    } else {
X			if (argc > 1) {
X			    CommonWordFile = argv[1];
X			    argc--; argv++;
X			} else {
X			    fprintf(stderr,
X				"%s: -c option must be followed by a filename",
X									progname);
X			    exit(1);
X			}
X		    }
X		} else { /* Turn off, +c, may be undocumented right now */
X		    CommonFromWhere = FW_None;
X		    break;
X		}
X		break;
X	    case 'd':
X		argv[0][1] = 'z'; /* so it gets ignored by getopt */
X		DirFromWhere = FW_Cmdline;
X		if (argv[0][2] != '\0') {
X		    DatabaseDir = &argv[0][2];
X		} else {
X		    if (argc > 1) {
X			DatabaseDir = argv[1];
X			argc--; argv++;
X		    } else {
X			/* @error */
X			fprintf(stderr,
X				"%s: %cd must be followed by a directory name",
X						progname, TurnOn ? '-' : '+');
X			exit(1);
X		    }
X		}
X		break;
X	    } /* end switch */
X	} else {
X	    /* not an option, so stop looking */
X	    break;
X	}
X    } /* end while */
X
X    /* now we have parsed the command line arguments, so look for the
X     * default directory
X     */
X    if (DirFromWhere == FW_Default) {
X	char *t;
X
X	if ((t = getenv("LQTEXTDIR")) != (char *) 0) {
X	    DatabaseDir = emalloc(strlen(t) + 1);
X	    (void) strcpy(DatabaseDir, t);
X	    DirFromWhere = FW_Envvar;
X	} else {
X#ifdef UNDERHOME
X	    char *home = getenv("HOME");
X
X	    if (home) {
X		DatabaseDir = joinstr3(home, "/", UNDERHOME);
X		if (!IsDir(DatabaseDir)) {
X		    fprintf(stderr,
X			    "%s: database directory \"%s\" inaccessible.\n",
X			    progname, DatabaseDir);
X		    exit(1);
X		}
X	    } else {
X		fprintf(stderr, "%s: can't find your login directory ($HOME)\n",
X					progname);
X		exit(1);
X	    }
X#endif /* UNDERHOME*/
X	    /* in either case it's the default... */
X	    DirFromWhere = FW_Default;
X	}
X    }
X
X    if (!DatabaseDir || !*DatabaseDir) {
X	/* This can happen if there is no default, or if the user types
X	 * lqword -d ""
X	 * just to be malicious :-)
X	 */
X	fprintf(stderr,
X	"%s: You must give a database directory with -d or $LQTEXTDIR\n",
X			progname);
X	fprintf(stderr, "        use %s -xv for more details.\n", progname);
X	exit(1);
X    }
X
X    /* IsDir is in DocPath.c -- perhaps this should be, too. */
X    if (!IsDir(DatabaseDir)) {
X	char *msg = (char *) 0;
X
X	switch (DirFromWhere) {
X	case FW_Cmdline:
X	    msg = " (specified with the -d option)";
X	    break;
X	case FW_Envvar:
X	    msg = " (from $LQTEXTDIR)";
X	    break;
X	}
X	fprintf(stderr, "%s: \"%s\"%s is not a directory.\n",
X					progname, DatabaseDir, msg ? msg : " ");
X	exit(1);
X    }
X
X    /* set default filenames */
X#define IfNot(x, y) ((x) ? (x) : (y))
X
X    FileIndex = mkdbm(DatabaseDir, IfNot(FileIndex, FILEINDEX));
X    WordIndex = mkdbm(DatabaseDir, IfNot(WordIndex, WORDINDEX));
X
X    DataBase = joinstr3(DatabaseDir, "/", IfNot(DataBase, DATABASE));
X    FidFile = joinstr3(DatabaseDir, "/", IfNot(FidFile, FIDFILE));
X    WidFile = joinstr3(DatabaseDir, "/", IfNot(WidFile, WIDFILE));
X    WidIndexFile =
X	    joinstr3(DatabaseDir, "/", IfNot(WidIndexFile, WIDINDEXFILE));
X
X    ReadDefaultFile();
X
X    if (AsciiTrace) {
X	fprintf(stderr, "%s: lqtext directory \"%s\"\n",progname,DatabaseDir);
X    }
X
X    if (CommonFromWhere == FW_Default) {
X	char *c = getenv("LQCOMMON");
X
X	if (c) {
X	    CommonWordFile = emalloc(strlen(c) + 1);
X	    (void) strcpy(CommonWordFile, c);
X	    CommonFromWhere = FW_Envvar;
X	}
X    }
X
X    if (CommonFromWhere != FW_None && CommonWordFile && *CommonWordFile) {
X	extern int errno;
X	int c;
X
X	if (*CommonWordFile != '/') {
X	    CommonWordFile = joinstr3(DatabaseDir, "/", CommonWordFile);
X	}
X
X	if ((c = open(CommonWordFile, O_RDONLY, 0)) < 0) {
X	    if (CommonFromWhere != FW_Default) {
X		int e = errno;
X		char *msg = " ";
X
X		switch (CommonFromWhere) {
X		case FW_Cmdline:
X		    msg = " (from the -c option)";
X		    break;
X		case FW_Envvar:
X		    msg = " (from $COMMONWORDS)";
X		    break;
X		}
X
X		fprintf(stderr,"%s: can't read common-word file%s ",progname,msg);
X		errno = e;
X		if (errno) {
X		    perror(CommonWordFile);
X		} else {
X		    fprintf(stderr, "\"%s\"\n", CommonWordFile);
X		}
X		exit(1);
X	    }
X	    CommonWordFile = (char *) 0;
X	} else {
X	    (void) close(c); /* it's OK */
X	}
X    }
X
X    if ((p = getenv("DOCPATH")) != (char *) 0) {
X	switch (DocFromWhere) {
X	case FW_File:
X	    if (AsciiTrace > 1) {
X		fprintf(stderr, "%s: DOCPATH (%s) overrides %s (%s)\n",
X#ifdef CONFIGFILE
X				progname, p, CONFIGFILE, DocPath
X#else
X				progname, p, "README", DocPath
X#endif
X		);
X	    }
X	    efree(DocPath);
X	    /* FALL THROUGH */
X	case FW_Default:
X	default: /* ? */
X	    DocPath = emalloc((unsigned) (strlen(p) + 1));
X	    (void) strcpy(DocPath, p);
X	    DocFromWhere = FW_Envvar;
X	    break;
X	}
X    }
X
X    if (!DocPath || !*DocPath) {
X	DocPath = ".";
X    }
X
X#define SetOrNot(s) ( (s && *s) ? s : (s ? "[empty]" : "[null]" ) )
X
X    /* this is always here -- it's only checked once, and is actually
X     * rather useful.
X     */
X    if (AsciiTrace > 2) {
X	fprintf(stderr, "%s: CommonWordFile = \"%s\"\n", progname,
X		SetOrNot(CommonWordFile));
X	fprintf(stderr, "%s: DatabaseDir = \"%s\"\n", progname,
X		SetOrNot(DatabaseDir));
X	fprintf(stderr, "%s: DocPath  = \"%s\"\n", progname,
X		SetOrNot(DocPath));
X	fprintf(stderr, "%s: FileIndex = \"%s\"\n", progname,
X		SetOrNot(FileIndex));
X	fprintf(stderr, "%s: WordIndex = \"%s\"\n", progname,
X		SetOrNot(WordIndex));
X	fprintf(stderr, "%s: DataBase = \"%s\"\n", progname,
X		SetOrNot(DataBase));
X	fprintf(stderr, "%s: FidFile = \"%s\"\n", progname,
X		SetOrNot(FidFile));
X	fprintf(stderr, "%s: WidFile = \"%s\"\n", progname,
X		SetOrNot(WidFile));
X	fprintf(stderr, "%s: WidIndexFile = \"%s\"\n", progname,
X		SetOrNot(WidIndexFile));
X    }
X
X    (void) MakeDocPath(DocPath);
X    /* DocPath is no longer needed, so getenv() can be called again now */
X
X    if (CommonWordFile && *CommonWordFile) {
X	(void) ReadCommonWords(CommonWordFile);
X    }
X}
X
Xvoid
XDefaultUsage()
X{
X    fprintf(stderr, "\
X	-c file	-- ignore words that are listed in the namd file\n\
X	-d dir	-- use the lq-text database in the named directory\n\
X	-m c	-- set matching criteria -- c is \"p\", \"h\" or \"a\"\n");
X    if (AsciiTrace) {
X	fprintf(stderr, "\
X		   -m p  uses precise matching, where CaSe is significant;\n\
X		   -m h  uses heuristic matching, which is the default, and\n\
X		   -m a  uses approximate matching.\n");
X    }
X
X    fprintf(stderr, "\n\
X	-t N	-- set trace level t N (default is zero)\n\
X	-x	-- print %s explanation\n\
X	-xv	-- print %s explanation\n\
X	-V	-- print version information\n\
X	-v	-- be verbose (same as -t 1)\n",
X			AsciiTrace ? "a shorter" : "this",
X			AsciiTrace ? "this" : "a longer");
X    if (AsciiTrace) {
X	fprintf(stderr, "\
XThe current database directory is \"%s\";\n\
X%s will search the path \"%s\" for documents.\n", DatabaseDir, progname, DocPath);
X    }
X}
X
X/* This should be in smalldb.c I think */
Xchar *
Xmkdbm(root, prefix)
X    char *root; /* /tmp/lqtext */
X    char *prefix; /* wordlist, --> /tmp/lqtext.{dir,pag} for dbm */
X{
X#if DBMCREAT == 0
X    extern int errno;
X#endif
X    /* Although ndbm will create files automatically, gdbm and dbm will
X     * not, so we do that here.
X     * Also, it might take a while to get to here, so it will be a lot
X     * better if we get an error message now.
X     */
X    char *p = joinstr3(root, "/", prefix);
X
X#if DBMCREAT == 0
X    q = joinstr3(p, ".", "dir");
X    errno = 0; /* paranoia */
X
X    if ((i = open(q)) < 0 && errno == ENOENT) {
X	i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */
X
X	if (i < 0) {
X	    fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
X	    (void) exit(1);
X	}
X
X	(void) close(i);
X    }
X    (void) strcpy(&q[strlen(q) - 3], "pag");
X
X    if ((i = open(q)) < 0 && errno == ENOENT) {
X	i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */
X
X	if (i < 0) {
X	    fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
X	    (void) exit(1);
X	}
X
X	(void) close(i);
X    }
X
X    (void) efree(q);
X
X#endif /*DBMCREAT*/
X
X    return p; /* the prefix for dbm, not the whole path */
X}
X
X/* this belongs in string.c or something */
Xchar *
Xjoinstr3(a, b, c)
X    char *a, *b, *c;
X{
X    char *p;
X    int i = strlen(a), j = (b[0] != '\0' && b[1] == '\0') ? 1 : strlen(b);
X
X    p = emalloc(i + j + strlen(c) + 1);
X    /* ASSERT: p != 0 */
X    (void) strcpy(p, a);
X    (void) strcpy(&p[i], b);
X    (void) strcpy(&p[i + j], c);
X
X    return p;
X}
X
X#define LCNOMAP 0 /* Token -- leave case alone */
X#define LCMAP 1  /* map to lower case */
X
Xstatic int RMLine = 0;
X
Xstatic void
XReadDefaultFile()
X{
X    extern int errno;
X
X    static char *NextToken(); /* see below */
X#ifdef CONFIGFILE
X    char *ReadMe = joinstr3(DatabaseDir, "/", CONFIGFILE);
X#else
X    char *ReadMe = joinstr3(DatabaseDir, "/", "README");
X#endif
X    FILE *fp;
X    char *Token;
X
X
X    /* This is paranoid... */
X    if (!ReadMe || !*ReadMe) {
X	fprintf(stderr, "%s: Internal: %s: %d: ReadMe %s\n",
X		progname, __FILE__, __LINE__, SetOrNot(ReadMe));
X	exit(1);
X    }
X
X    errno = 0;
X    if ((fp = fopen(ReadMe, "r")) == (FILE *) 0) {
X	if (errno == EPERM) {
X	    fprintf(stderr,
X	    "%s: Warning: you don't have permission to read \"%s\"\n",
X							progname, ReadMe);
X	} else if (AsciiTrace) {
X	    int e = errno;
X
X	    fprintf(stderr, "%s: warning: can't open config file ", progname);
X	    errno = e;
X	    perror(ReadMe);
X	}
X	return;
X    }
X
X    /* Read README up to an "end" line, ignoring lines starting with # */
X
X    while ((Token = NextToken(fp, ReadMe, LCMAP)) != (char *) 0) {
X	if (STREQ(Token, "end")) goto finish;
X	if (STREQ(Token, "common")) {
X	    if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
X		fprintf(stderr, "%s: %s %d: unexpected eof at common file\n",
X				progname, ReadMe, RMLine);
X		exit(1);
X	    } else if (CommonFromWhere == FW_Default) {
X		    CommonWordFile = emalloc((unsigned) (strlen(Token) + 1));
X		    (void) strcpy(CommonWordFile, Token);
X		    CommonFromWhere = FW_File;
X	    }
X	} else if (STREQ(Token, "path") || STREQ(Token, "docpath")) {
X	    if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
X		fprintf(stderr, "%s: %s: %d: unexpected eof at common file\n",
X				progname, ReadMe, RMLine);
X		exit(1);
X	    } else {
X		DocPath = emalloc((unsigned) (strlen(Token) + 1));
X		(void) strcpy(DocPath, Token);
X		DocFromWhere = FW_File;
X	    }
X	} else {
X	    fprintf(stderr, "%s: \"%s\": %d: token(\"%s\") unexpected\n",
X				progname, ReadMe, RMLine, Token);
X	    exit(1);
X	}
X    } /* while */
X
Xfinish:
X    (void) fclose(fp);
X    return;
X}
X
Xstatic char *
XNextToken(fd, Name, Map)
X    FILE *fd;
X    char *Name;
X    int Map;
X{
X    int ch;
X    static char buf[50];
X    register char *q = buf;
X    int InQuote = 0;
X    int OriginalMap = Map;
X
X    while ((ch = NextChar(fd, Name, Map)) != EOF) {
X	switch (ch) {
X	case '"': case '\'':
X	    if (q == buf && !InQuote) InQuote = ch;
X	    else if (ch == InQuote) {
X		*q = '\0';
X		if (AsciiTrace > 10) {
X		    fprintf(stderr, "RM[%s] ", buf);
X		}
X		return buf;
X	    }
X	    Map = 0; /* no case conversion inside strings */
X	    break;
X	case '\\':
X	    if ((ch = NextChar(fd, Name, Map)) == EOF) {
X		fprintf(stderr, "%s: %s; %d: EOF after \\ unexpected!\n",
X							progname, Name, RMLine);
X		exit(1);
X	    }
X	    *q++ = ch;
X	    break;
X	case ' ':
X	case '\n':
X	    if (InQuote) {
X		fprintf(stderr, "%s: %s: %d: missing quote -->%s<--\n",
X						progname, Name, RMLine, InQuote);
X		exit(1);
X	    }
X	    *q = '\0';
X	    if (q > buf) return buf;
X	    else return NextToken(fd, Name, OriginalMap);
X	    /*NOTREACHED*/
X	    break;
X	default:
X	    *q++ = ch;
X	}
X    }
X    if (q > buf) {
X	fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
X							progname, Name, RMLine);
X	exit(1);
X    }
X    return (char *) 0;
X}
X
Xstatic int
XNextChar(fd, Name, Map)
X    FILE *fd;
X    char *Name;
X    int Map;
X{
X    int ch;
X
X    while ((ch = getc(fd)) != EOF) {
X	switch (ch) {
X	case '#':
X	    do {
X		if ((ch = getc(fd)) == EOF) {
X		    fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
X				progname, Name, RMLine);
X		    exit(1);
X		}
X	    } while (ch != '\n');
X	    /* ASSERT: ch == '\n' */
X	    ++RMLine;
X	    break;
X	case '\n':
X	    ++RMLine; 
X	    if (!Map) return ch;
X	    /* else FALL THROUGH */
X	case ' ': case '\t': case '\f': case '\r':
X	    if (!Map) {
X		return ' ';
X	    }
X
X	default:
X	    return (Map && isupper(ch)) ? tolower(ch) : ch;
X	}
X    } /* while */
X    return EOF;
X}
X
Xint
Xcknatstr(str)
X    char *str;
X{
X    /* check that a string represents a positive or 0 number */
X    register char *p = str;
X
X    /* skip leading white space */
X    while (isspace(*p)) p++;
X    if (!*p) return 0;
X
X    /* allow a leading sign */
X    if (*p == '-' || *p == '+') p++;
X    if (!*p) return 0;
X
X    /* now skip digits... */
X    while (isdigit(*p)) p++;
X
X    return (p > str && *p == '\0');
X}
X
X/* you can tell I am tired by the extra end-while etc. comments.
X * wonder if it will work?
X * perhaps if I took my socks off too.
X *
X * Hmm, yeah, that worked.
X */
@@@End of lq-text/src/liblqtext/Defaults.c
echo x - lq-text/src/liblqtext/DocPath.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/DocPath.c <<'@@@End of lq-text/src/liblqtext/DocPath.c'
X/* DocPath.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X *
X * $Id: DocPath.c,v 1.5 90/10/06 00:11:53 lee Rel1-10 $
X *
X *
X * $Log:	DocPath.c,v $
X * Revision 1.5  90/10/06  00:11:53  lee
X * Prepared for first beta release.
X * 
X * Revision 1.4  90/10/05  23:43:19  lee
X * Put the debugging in isfile() within ASCIITRACE ifdefs.
X * 
X * Revision 1.3  90/08/29  21:46:32  lee
X * Alpha release.
X * 
X * Revision 1.2  90/08/09  19:16:12  lee
X * *** empty log message ***
X * 
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <sys/types.h>
X#include <sys/stat.h>
X#ifdef BSD
X# include <sys/param.h>
X# define PATH_MAX MAXPATHLEN
X#else /*not BSD*/
X# include <limits.h> /* for PATH_MAX */
X#endif
X#include <stdio.h>
X#include "emalloc.h"
X#include "fileinfo.h"
X
X/** Unix system calls: **/
Xextern int stat();
X/** C Library functions: **/
Xextern int strlen();
Xextern char *strcpy();
X/** Within this file: **/
Xextern int IsDir();
X/** **/
X
X#ifdef ASCIITRACE
Xextern int AsciiTrace;
X#endif
X
Xtypedef struct s_DocPath {
X    char *DirName;
X    struct s_DocPath *Next;
X} t_DocPath;
X
Xstatic t_DocPath *XDocPath = 0;
X
X#ifndef PATH_MAX
X# define PATH_MAX 2048
X#endif
X
Xchar *
X_FindFile(Name)
X    char *Name;
X{
X    int IsFile();
X
X    t_DocPath *p;
X    static char Buffer[PATH_MAX + 3]; /* +1 for "\0" */
X
X    if (!XDocPath) {
X#ifdef ASCIITRACE
X	if (AsciiTrace > 4) {
X	    fprintf(stderr, "FindFile(%s) --> %s\n", Name,
X					IsFile(Name) ? Name : (char *) 0);
X	}
X#endif
X	return IsFile(Name) ? Name : (char *) 0;
X    }
X
X    for (p = XDocPath; p; p = p->Next) {
X	(void) sprintf(Buffer, "%s/%s", p->DirName, Name);
X	if (IsFile(Buffer)) {
X#ifdef ASCIITRACE
X	    if (AsciiTrace > 4) {
X		fprintf(stderr, "FindFile(%s) --> %s\n", Name, Buffer);
X	    }
X#endif
X	    return Buffer;
X	}
X    }
X
X    return (char*) 0;
X}
X
Xint
XMakeDocPath(Path)
X    char *Path;
X{
X    extern char *getenv();
X
X    char *Start, *End;
X    t_DocPath **dpp;
X
X    if (XDocPath == (t_DocPath *) 0) {
X	dpp = &XDocPath;
X	*dpp = (t_DocPath *) 0;
X
X	/* For each element in DocPath, */
X	for (Start = Path; Start && *Start; Start = End) {
X	    char SaveEnd;
X
X	    /* find the end of this bit of the path */
X	    for (End = Start; *End && *End != ':'; End++)
X		;
X	    
X	    if (End == Start) break;
X
X	    SaveEnd = (*End);
X	    *End = '\0';
X
X	    /* if not a directory, delete from path */
X	    if (!IsDir(Start)) {
X		*End = SaveEnd;
X		continue;
X	    }
X
X	    /* add to the linked list */
X	    *dpp = (t_DocPath *) emalloc(sizeof(t_DocPath));
X	    (*dpp)->DirName = emalloc(strlen(Start) + 1);
X	    (void) strcpy((*dpp)->DirName, Start);
X	    dpp = &(*dpp)->Next;
X	    (*dpp) = (t_DocPath *) 0;
X	    if ((*End = SaveEnd) != '\0') {
X		End++;
X	    }
X	}
X    }
X    return 0;
X}
X
Xint
XIsDir(Dir)
X    char *Dir;
X{
X    struct stat statbuf;
X
X    if (!Dir || !*Dir) return 0;
X    if (stat(Dir, &statbuf) < 0) return 0;
X    if ((statbuf.st_mode & S_IFMT) != S_IFDIR) {
X	return 0;
X    }
X    return 1;
X}
X
Xint
XIsFile(Path)
X    char *Path;
X{
X    struct stat statbuf;
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 20) {
X	fprintf(stderr, "IsFile(%s)\n", Path);
X    }
X#endif
X    if (stat(Path, &statbuf) < 0) return 0;
X    if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
X	return 0;
X    }
X#ifdef ASCIITRACE
X    if (AsciiTrace > 20) {
X	fprintf(stderr, "\t\tIsFile(%s) returns true.\n", Path);
X    }
X#endif
X    return 1;
X}
@@@End of lq-text/src/liblqtext/DocPath.c
echo x - lq-text/src/liblqtext/FileList.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/FileList.c <<'@@@End of lq-text/src/liblqtext/FileList.c'
X/* FileList.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/*
X *
X * FileList -- operations on the list of files.  This is the Document
X * Directory part of NX-Text.
X *
X * $Id: FileList.c,v 1.8 90/10/13 02:39:05 lee Rel1-10 $
X *
X * $Log:	FileList.c,v $
X * Revision 1.8  90/10/13  02:39:05  lee
X * deleted some incorrect code.
X * 
X * Revision 1.7  90/10/13  02:21:03  lee
X * NEEDALIGN stuff
X * 
X * Revision 1.6  90/10/07  20:37:18  lee
X * changed ifdef sparc to ifdef NEEDALIGN
X * 
X * Revision 1.5  90/10/06  00:11:55  lee
X * Prepared for first beta release.
X * 
X * Revision 1.4  90/09/29  23:46:14  lee
X * very minor speedup, and changed a free() to efree().
X * 
X * Revision 1.3  90/09/20  19:11:05  lee
X * deleted unused locking code.
X * removed a sun4-specific memory leak.  Other minor changes.
X * 
X * Revision 1.2  90/08/29  21:46:33  lee
X * Alpha release.
X * 
X * Revision 1.1  90/08/09  19:16:15  lee
X * Initial revision
X * 
X * Revision 2.2  89/10/08  20:29:10  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:12:08  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.2  89/09/16  21:15:54  lee
X * First demonstratable version.
X * 
X * Revision 1.1  89/09/07  21:01:36  lee
X * Initial revision
X * 
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h>
X#include <malloc.h>
X#include <unistd.h>
X#include <sys/types.h>
X#include <sys/stat.h>
X#include <signal.h>
X#include <errno.h>
X#include <fcntl.h>
X#include <string.h>
X
X#include "smalldb.h"
X#include "fileinfo.h"
X#include "emalloc.h"
X
X/** Unix system calls that need to be declared: **/
Xextern int stat();
Xextern int open(), close(), creat();
Xextern void exit();
Xextern int read(), write();
Xextern unsigned alarm();
X/** library functions that need to be declared: */
Xextern int lockf();
Xextern unsigned sleep();
Xextern void perror();
Xextern long atol();
X
X/** other (lqtext) functions **/
Xt_FID GetNextFID();
Xt_FileInfo *GetFileInfo();
X/** **/
X
Xt_FID
XGetMaxFID()
X{
X    extern int errno;
X
X    int fd;
X    struct stat StatBuf;
X    char Buffer[20];
X
X    /* ensure that the file is there */
X    if (stat(FidFile, &StatBuf) == -1) {
X	return 0;
X    }
X
X    if ((fd = open(FidFile, O_RDWR, 0)) < 0) {
X	fprintf(stderr, "Warning: Can't open FID file");
X	return 0;
X    }
X
X    /* Read the file */
X    if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
X	fprintf(stderr, "Can't read from \"%s\"\n", FidFile);
X	exit(1);
X    }
X
X    (void) close(fd);
X
X    Buffer[StatBuf.st_size] = '\0';
X
X    return atol(Buffer);
X}
X
X/*ARGSUSED*/
Xt_FID
XGetNextFID(Size)
X    long Size; /* to let it keep short FIDs for huge files, execpt I don't */
X{
X    extern int errno;
X    extern long atol();
X    extern long lseek();
X
X    int fd;
X    char Buffer[21];
X    struct stat StatBuf;
X    t_FID Result;
X
X    /* ensure that the file is there */
X    if (stat(FidFile, &StatBuf) == -1) {
X	fprintf(stderr, "Creating FID file \"%s\"\n", FidFile);
X	if ((fd = creat(FidFile, 02666)) < 0) {
X	    fprintf(stderr, "Can't create FID file \"%s\"\n", FidFile);
X	    exit(1);
X	}
X	(void) close(fd);
X	return GetNextFID(Size);
X
X	/*NOTREACHED*/
X    }
X
X    if ((fd = open(FidFile, O_RDWR, 0)) < 0) {
X	fprintf(stderr, "Can't open FID file");
X	perror(FidFile);
X	exit(1);
X    }
X
X    errno = 0;
X
X    /* Read the file */
X    if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
X	fprintf(stderr, "Can't read from \"%s\"\n", FidFile);
X	exit(1);
X    }
X
X    Buffer[StatBuf.st_size] = '\0';
X
X    Result = atol(Buffer);
X
X    if (Result == 0L || *Buffer == '-') {
X	Result = 1L;
X    }
X
X    (void) sprintf(Buffer, "%lu\n", Result + 1);
X
X    /* Move to the start of the file and write the now value.
X     * No need to truncate the file, because it didn't shrink!
X     */
X    (void) lseek(fd, 0, 0L);
X    (void) write(fd, Buffer, (unsigned int) strlen(Buffer));
X    (void) close(fd);
X
X    return Result;
X}
X
Xtypedef struct {
X    t_FID FID;
X    time_t DateLastIndexed;
X    int FilterType;
X    unsigned NameLength;
X    char CurrentLocation[1];
X} t_PhysicalIndexEntry;
X
Xt_PhysicalIndexEntry *
XFileInfo2Phys(FileInfo)
X    t_FileInfo *FileInfo;
X{
X    t_PhysicalIndexEntry *PIE;
X    register int NameLength;
X
X    if (!FileInfo || !FileInfo->Name) return (t_PhysicalIndexEntry *) 0;
X
X    NameLength = strlen(FileInfo->Name);
X
X    PIE = (t_PhysicalIndexEntry *) emalloc(
X				sizeof(t_PhysicalIndexEntry) + NameLength + 1);
X
X    if (!PIE) return (t_PhysicalIndexEntry *) 0;
X
X    PIE->FID = FileInfo->FID;
X    PIE->DateLastIndexed = FileInfo->Date;
X    PIE->FilterType = FileInfo->FilterType;
X    PIE->NameLength = NameLength;
X    (void) strcpy(PIE->CurrentLocation, FileInfo->Name);
X    return PIE;
X}
X
Xt_FileInfo *
XPhys2FileInfo(PIE)
X    t_PhysicalIndexEntry *PIE;
X{
X    t_FileInfo *FileInfo;
X
X    if (!PIE || !PIE->NameLength) return (t_FileInfo *) 0;
X
X    FileInfo = (t_FileInfo *) emalloc(sizeof(t_FileInfo));
X    FileInfo->FID = PIE->FID;
X    FileInfo->Date = PIE->DateLastIndexed;
X    FileInfo->FilterType = PIE->FilterType;
X    FileInfo->Stream = (FILE *) 0;
X    if (PIE->NameLength) {
X#if 0
X	char *doc;
X#endif
X
X	FileInfo->Name = emalloc(PIE->NameLength + 1);
X	(void) strncpy(FileInfo->Name, PIE->CurrentLocation,
X						    PIE->NameLength);
X	FileInfo->Name[PIE->NameLength] = '\0';
X
X#if 0
X	/* with this in place, wordinfo spends over 40% of its time
X	 * in stat!
X	 */
X	if ((doc = FindFile(FileInfo->Name)) != (char *) 0) {
X	    /* hence, we never retrieve non-existent files */
X	    FileInfo->Name = erealloc(FileInfo->Name, strlen(doc) + 1);
X	    (void) strcpy(FileInfo->Name, doc);
X	}
X#endif
X    } else {
X	FileInfo->Name = (char *) 0;
X    }
X
X    return FileInfo;
X}
X
Xint
XSaveFileInfo(FileInfo)
X    t_FileInfo *FileInfo;
X{
X    t_PhysicalIndexEntry *PIE;
X    datum key, data;
X    DBM *db;
X    int RetVal;
X    char Buffer[20];
X
X    if (!FileInfo) return -1;
X
X    if ((PIE = FileInfo2Phys(FileInfo)) == (t_PhysicalIndexEntry *) 0) {
X	return -1;
X    }
X
X    if ((db = startdb(FileIndex)) == (DBM *) 0) {
X	return -1;
X    }
X
X    if (FileInfo->Name && *(FileInfo->Name)) {
X	/* For the reverse mapping, FileName --> FID ... store an
X	 * entry of the form ([\377]317, "hello").
X	 * This scheme simply has to go.
X	 * I favour a btree, but that may be needlessly complex.
X	 */
X	int KeyLen = strlen(FileInfo->Name);
X	key.dptr = emalloc(KeyLen + 2); /* +2: "\375" and \0 */
X	/* Note: the N= is so that a file called "123" does not cause
X	 * confusion with the reverse mapping
X	 */
X	*(key.dptr) = '\375';
X	(void) strcpy(&(key.dptr[1]), FileInfo->Name);
X	key.dsize = KeyLen + 1;
X		/* length of name + length of "\375" -- the nul at the end
X		 * is not included.
X		 */
X
X	(void) sprintf(Buffer, "%lu", FileInfo->FID);
X	data.dptr = Buffer;
X	data.dsize = strlen(Buffer);
X	(void) dbm_store(db, key, data, DBM_REPLACE);
X	(void) efree(key.dptr);
X    }
X
X    (void) sprintf(Buffer, "F%lu", FileInfo->FID);
X
X    key.dptr = Buffer;
X    key.dsize = strlen(Buffer);
X
X    data.dptr = (char *) PIE;
X    data.dsize = sizeof(t_PhysicalIndexEntry) + PIE->NameLength;
X
X    RetVal = dbm_store(db, key, data, DBM_REPLACE);
X
X    enddb(db);
X
X    return RetVal;
X}
X
Xt_FID
XName2FID(Name)
X    char *Name;
X{
X    DBM *db;
X    datum key, result;
X    extern long atol();
X
X    key.dsize = strlen(Name);
X    /* see previous routine for comments about this +2 ugliness */
X    key.dptr = emalloc(key.dsize + 2);
X    *(key.dptr) = '\375';
X    (void) strcpy(&(key.dptr[1]), Name);
X    key.dsize += 1; /* for the cookie; we don't include the \0 */
X
X    if ((db = startdb(FileIndex)) == (DBM *) 0) {
X	fprintf(stderr, "Name2FID can't get FID for %s (database \"%s\"\n", Name, FileIndex);
X	(void) efree(key.dptr);
X	return -1;
X    }
X    result = dbm_fetch(db, key);
X    enddb(db);
X
X    (void) efree(key.dptr);
X
X    return (result.dsize == 0) ? (t_FID) 0 : atol(result.dptr);
X}
X
Xt_FileInfo *
XGetFileInfo(FID)
X    t_FID FID;
X{
X    t_FileInfo *FileInfo;
X    datum key, data;
X    DBM *db;
X    char Buffer[20];
X#ifdef NEEDALIGN
X    t_PhysicalIndexEntry *PIE;
X#endif
X
X    (void) sprintf(Buffer, "F%lu", FID);
X    key.dptr = Buffer;
X    key.dsize = strlen(Buffer);
X
X    if ((db = startdb(FileIndex)) == (DBM *) 0) {
X	return (t_FileInfo *) 0;
X    }
X
X    data = dbm_fetch(db, key);
X    enddb(db);
X
X    if (data.dsize == 0) {
X	return (t_FileInfo *) 0;
X    }
X
X#ifdef NEEDALIGN
X    PIE = (t_PhysicalIndexEntry *) emalloc(data.dsize + 1);
X    (void) memcpy((char *) PIE, data.dptr, data.dsize);
X    FileInfo = Phys2FileInfo(PIE);
X    (void) efree((char *) PIE);
X#else
X
X    /* Now we have a PIE, so we need a FileInfo... */
X    FileInfo = Phys2FileInfo(/*NOSTRICT*/(t_PhysicalIndexEntry *) data.dptr);
X#endif
X
X    return FileInfo;
X}
X
Xint
Xstrcontains(ShortString, LongString)
X    char *ShortString;
X    char *LongString;
X{
X    register char *p;
X
X    int strprefix();
X
X    for (p = LongString; *p; p++) {
X	if (*p == *ShortString && strprefix(ShortString, p)) {
X	    return 1;
X	}
X    }
X    return 0;
X}
X
Xint
Xstrprefix(Prefix, String)
X    register char *Prefix;
X    register char *String;
X{
X    while (*String++ == *Prefix++)
X	if (!*Prefix) return 1;
X    return 0;
X}
@@@End of lq-text/src/liblqtext/FileList.c
echo x - lq-text/src/liblqtext/FilterType.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/FilterType.c <<'@@@End of lq-text/src/liblqtext/FilterType.c'
X/* FilterType.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* FilterType -- determine how to deal with a given file.
X * Part of Liam Quin's NX-Text text retrieval package.
X *
X * $Id: FilterType.c,v 1.6 90/10/06 00:11:56 lee Rel1-10 $
X *
X * $Log:	FilterType.c,v $
X * Revision 1.6  90/10/06  00:11:56  lee
X * Prepared for first beta release.
X * 
X * Revision 1.5  90/09/24  21:20:31  lee
X * changed a free() to an efree() -- the last one!
X * 
X * Revision 1.4  90/09/20  20:07:35  lee
X * fixed a tiny memory hole...
X * 
X * Revision 1.3  90/08/29  21:46:35  lee
X * Alpha release.
X * 
X * Revision 1.2  90/08/09  19:16:18  lee
X * BSD lint and fixes...
X * 
X * Revision 2.2  89/10/08  20:44:34  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X *
X */
X
X#include <stdio.h>
X#include <malloc.h>
X#include "emalloc.h"
X#include <sys/types.h>
X#include <sys/stat.h>
X#include <errno.h>
X#include <fcntl.h>
X#include <string.h>
X#include <ctype.h>
X
X#include "fileinfo.h"
X#define FILTERDEF /* see filter.h */
X#include "filter.h"
X#include "wordrules.h" /* for min word length -- don't index files shorter */
X
X#define Prefix(pref,str) ((*(pref)==(*str))&&!strncmp(pref,str,strlen(pref)))
X
Xextern int open(), close();
Xextern int read();
Xextern int strcontains();
X
X/* The current filter types are:
X * FTYPE_NEWS  1
X * FTYPE_MAIL  2
X * FTYPE_CDMS  3
X * FTYPE_MOSTLYASCII 4
X * FTYPE_C_SOURCE 5
X */
X
X/* InitFilterTable might one day be called from Defaults.c....
X * At which point, it will read an ascii file that describes the
X * various filters, I suppose.
X *
X * For,now, it does nothing.  It is only called once, and should return 0
X * for success or -1 for failure.
X */
Xint
XInitFilterTable()
X{
X    return 0;
X}
X
Xint
XGetFilterType(FileInfo, StatBuf)
X    t_FileInfo *FileInfo;
X    struct stat *StatBuf;
X{
X    int Type = MaxFilterType + 1;
X    char Buffer[1024];
X    int fd = (-1); /* initialised for lint */
X    int AmountRead = 0; /* initialised for lint */
X    int ch;
X    int Length;
X    FILE *fp = (FILE *) 0;
X
X    /* GetFilterType() is called to determine which input filter (if any)
X     * should be used to read a given file.
X     * This routine should know about compressed files.
X     *
X     * It currently knows about mail, news and C files.
X     * There are also hooks for CDMS files (a word-processing package).
X     *
X     * If the file should not be indexed at all (e.g. it's a core dump),
X     * we return -1.
X     */
X
X    if (!FileInfo || !FileInfo->Name || !*(FileInfo->Name)) return (-1);
X
X    if (StatBuf->st_size < MinWordLength) return (-1);
X
X    Length = strlen(FileInfo->Name);
X
X    if (FileInfo->Name[Length - 1] == 'Z' && Length > 2 &&
X					FileInfo->Name[Length - 2] == '.') {
X	char *Buf = emalloc(Length + 10);
X
X	(void) sprintf(Buf, "zcat < \"%s\"", FileInfo->Name);
X
X	fp = popen(Buf, "r");
X	(void) efree(Buf);
X	if (fp == (FILE *) 0) {
X	    return (-1);
X	}
X    }
X
X    if (fp) {
X	if ((AmountRead = fread(fp, Buffer, sizeof(Buffer))) < MinWordLength) {
X	    (void) pclose(fp);
X	    fp = (FILE *) 0; /* try again with read() */
X	}
X    }
X
X    if (!fp) {
X	if ((fd = open(FileInfo->Name, O_RDONLY, 0)) < 0) {
X	    return -1;
X	}
X	if ((AmountRead = read(fd, Buffer, sizeof(Buffer)-1)) < MinWordLength) {
X	    (void) close(fd);
X	    return -1;
X	}
X    }
X    if (fp) {
X	(void) pclose(fp);
X    } else {
X	(void) close(fd);
X    }
X
X    /* Check the magic table for CDMS: */
X    if ((unsigned char) Buffer[0] == 128 && Buffer[1] == 'M') {
X	if (AmountRead > 35) { /* size of CDMS file header */
X	    Type = FTYPE_CDMS;
X	    return (FileInfo->FilterType = Type);
X	}
X    }
X     
X    if (AmountRead < 30) {
X	register char *p = Buffer;
X
X	/* who cares if it's this small? */
X	for (; p - Buffer < AmountRead; p++) {
X	    if (!isascii(*p)) {
X		return (-1);
X	    }
X	}
X	return 0;
X    }
X
X    /* Not cdms -- try news/mail;
X     * mail files start with From;
X     * news starts with From, Path or Relay-Version
X     */
X    if (isupper(Buffer[0])) {
X	Buffer[AmountRead] = '\0';
X	AmountRead--;
X	if (Prefix("Xref: ", Buffer)) {
X	    return (FileInfo->FilterType = FTYPE_NEWS);
X	} else if (Prefix("Newsgroups: ", Buffer)) {
X	    return (FileInfo->FilterType = FTYPE_NEWS);
X	} else if (Prefix("Relay-Version: ", Buffer)) {
X	    return (FileInfo->FilterType = FTYPE_NEWS);
X	} else if (Prefix("From", Buffer)) {
X	    if (strcontains("\nPath: ", Buffer)) {
X		/* bug: should only check header, not body! */
X		return FTYPE_NEWS;
X	    } else {
X		return FTYPE_MAIL;
X	    }
X	} else if (Prefix("Path: ", Buffer)) {
X	    if (strcontains("\nNewsgroups: ", Buffer)) {
X		return FTYPE_NEWS;
X	    } else {
X		return FTYPE_MAIL;
X	    }
X	} else if (Prefix("Return-Path: ", Buffer)) {
X	    return FTYPE_MAIL; /* MH-style mail */
X	}
X    }
X
X    /* look for C, trying not to get muddled up with shell scripts */
X    ch = FileInfo->Name[Length - 1];
X
X    if ((ch == 'c' || ch == 'h') && (Length > 2) &&
X			    FileInfo->Name[Length - 2] == '.') {
X	/* We could require one of
X	 * . a comment
X	 * . a #[ ^i]*(include|define|ifn?def|if)[ ^i]+
X	 * . main[ ^i\n]*(
X	 * . a declaration -- int, char, long, unsigned, static
X	 * in the first block of the file.
X	 * Can't be bothered today.
X	 */
X	if (strcontains("#line", Buffer)) {
X	    return (-1); /* preprocessed already, index the original! */
X	    /* we ought to say why we are not indexing it! */
X	}
X
X	/* we are very predisposed to thinking of this as C... */
X	if (Prefix("#include", Buffer)		||
X		strcontains("/*", Buffer)		||
X		strcontains("#define", Buffer)	||
X		strcontains("argc", Buffer)		||
X		strcontains("()", Buffer)		||
X		strcontains("#include", Buffer)) {
X	    return FTYPE_C_SOURCE;
X	}
X    }
X
X    /* if still not done, choose between Don't Index and Ascii Filter
X     * (which simply strips non-ascii characters).
X     */
X    if (Type >= MaxFilterType) {
X	register char *p;
X	int AsciiCount = 0;
X	int OtherCount = 0;
X
X	for (p = Buffer; p - Buffer < AmountRead; p++) {
X	    if (isascii(*p)) AsciiCount++;
X	    else OtherCount++;
X	    if (!*p) {
X		/* If it has nulls in it, it isn't a normal file,
X		 * and we have no idea what to do with it!
X		 * (if we did know, it would have had a magic number,
X		 * so we wouldn't have got here)
X		 */
X		Type = (-1);
X		break;
X	    }
X	    if (Type > 0) {
X		if (AsciiCount > OtherCount * 5) {
X		    Type = (OtherCount) ? FTYPE_MOSTLYASCII : 0;
X		} else {
X		    Type = (-1); /* too much garbage */
X		}
X	    }
X	}
X    }
X
X    if (Type > MaxFilterType) Type = -1; /* don't index */
X    return Type;
X}
@@@End of lq-text/src/liblqtext/FilterType.c
echo x - lq-text/src/liblqtext/Makefile 1>&2
sed 's/^X//' >lq-text/src/liblqtext/Makefile <<'@@@End of lq-text/src/liblqtext/Makefile'
X# Makefile for LQ-Text, a full text retrieval package by Liam R. Quin
X#
X# This Makefile belongs in the src/liblqtext directory.
X# Note that most of the actual configuration is done in ../Makefile and
X# in ../h/global.h, and not here.  This file is for representing the
X# dependancies between source components and specifying the steps
X# required to build the library $(DESTDIR)/$(TEXTLIB)
X#
X# $Id: Makefile,v 1.3 90/10/06 00:06:22 lee Rel1-10 $
X#
X# $Log:	Makefile,v $
X# Revision 1.3  90/10/06  00:06:22  lee
X# deleted mkdep output.
X# 
X# Revision 1.2  90/09/29  23:48:33  lee
X# does cmp on the right file now...
X# 
X# Revision 1.1  90/08/09  19:17:07  lee
X# Initial revision
X# 
X# 
X#
X
XPWD=liblqtext
X
XTEXTLIB=liblqtext.a
XLIAMLIB=liblq.a
XDESTDIR=../lib
XRANLIB=ranlib
XTEXTLINTLIB=llib-llqtext.ln
XLIAMLINTLIB=llib-llq.ln
XLINT=lint
XLINTFLAGS=-a -b -c -h -x 
X
XEXTRA=-I../h
X
Xall: $(DESTDIR)/$(TEXTLIB) $(DESTDIR)/$(LIAMLIB)
X
Xinstall: all
X
Xlint: $(DESTDIR)/$(TEXTLINTLIB) $(DESTDIR)/$(LIAMLINTLIB)
X	-echo The lint libraries are up to date.
X
X$(DESTDIR)/$(TEXTLINTLIB): $(TEXTLINTLIB)
X	mv $(TEXTLINTLIB) $(DESTDIR)/$(TEXTLINTLIB)
X
X$(DESTDIR)/$(LIAMLINTLIB): $(LIAMLINTLIB)
X	mv $(LIAMLINTLIB) $(DESTDIR)/$(LIAMLINTLIB)
X
XNDBMCFILES=
XNDBMOFILES=
X
X## keep all of the following consistent: ###################################
X
XTEXTOBJS = WordInfo.o DocPath.o Defaults.o FileList.o Phrase.o Root.o \
X              numbers.o pblock.o smalldb.o system.o FilterType.o \
X              asciitrace.o $(NDBMOFILES)
X
XTEXTSRC = DocPath.c Defaults.c FileList.c Phrase.c Root.c WordInfo.c \
X              malloc.c numbers.c pblock.c smalldb.c system.c FilterType.c \
X              asciitrace.c $(NDBMCFILES)
X
XLIAMOBJS = malloc.o progname.o cmdname.o
XLIAMSRC = malloc.c progname.c cmdname.c
X
X## end of mutually related stuff  ##########################################
X
X$(TEXTLINTLIB): $(TEXTSRC)
X	$(LINT) -Clqtext $(LINTFLAGS) $(CFLAGS) $(TEXTSRC)
X
Xsaber_src:
X	#cd $(PWD)
X	#load $(CFLAGS) $(TEXTSRC) $(LIAMSRC)
X	#cd ..
X
Xsaber_obj:
X	#cd $(PWD)
X	#load $(CFLAGS) $(TEXTOBJS) $(LIAMOBJS)
X	#cd ..
X
X$(LIAMLINTLIB): $(LIAMSRC)
X	$(LINT) -Clq $(LINTFLAGS) $(CFLAGS) $(LIAMSRC)
X
X$(DESTDIR)/$(TEXTLIB): $(TEXTLIB)
X	-test -d $(DESTDIR) || mkdir $(DESTDIR)
X	-test -f $(DESTDIR)/$(TEXTLIB) || cp /dev/null $(DESTDIR)/$(TEXTLIB)
X	-( cmp $(TEXTLIB) $(DESTDIR)/$(TEXTLIB) || cp $(TEXTLIB) $(DESTDIR) )
X	-/bin/rm -f $(TEXTLIB)
X
X$(DESTDIR)/$(LIAMLIB): $(LIAMLIB)
X	-test -d $(DESTDIR) || mkdir $(DESTDIR)
X	-test -f $(DESTDIR)/$(LIAMLIB) || cp /dev/null $(DESTDIR)/$(LIAMLIB)
X	-( cmp $(LIAMLIB) $(DESTDIR)/$(LIAMLIB) || cp $(LIAMLIB) $(DESTDIR) )
X	-/bin/rm -f $(LIAMLIB)
X
X$(TEXTLIB): $(TEXTOBJS)
X	rm -f $(TEXTLIB)
X	ar rv $(TEXTLIB) $(TEXTOBJS)
X	$(RANLIB) $(TEXTLIB)
X
X$(LIAMLIB): $(LIAMOBJS)
X	rm -f $(LIAMLIB)
X	ar rv $(LIAMLIB) $(LIAMOBJS)
X	$(RANLIB) $(LIAMLIB)
X
Xtidy:
X	/bin/rm -f *.o core
X
Xclean: tidy
X	/bin/rm -f $(TARGETS) $(TEST)
X
Xdepend:
X	mkdep $(CFLAGS) *.c
X
X# DO NOT DELETE THIS LINE -- mkdep uses it.
X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
X
X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
@@@End of lq-text/src/liblqtext/Makefile
echo end of part 03
-- 
Liam R. E. Quin,  lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337