[alt.sources] lq-text Full Text Retrieval Database Part 05/13

lee@sq.sq.com (Liam R. E. Quin) (03/04/91)

: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 05
echo x - lq-text/src/liblqtext/asciitrace.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/asciitrace.c <<'@@@End of lq-text/src/liblqtext/asciitrace.c'
X/* asciitrace.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X * This file simply declares AsciiTrace, which is used for verbose (-v)
X * output (when set to 1), and for debugging/tracing at higher levels.
X */
X
Xint AsciiTrace = 0;
X
X/* $Id: asciitrace.c,v 1.3 90/10/06 00:21:12 lee Rel1-10 $
X *
X */
@@@End of lq-text/src/liblqtext/asciitrace.c
echo x - lq-text/src/liblqtext/bcopy.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/bcopy.c <<'@@@End of lq-text/src/liblqtext/bcopy.c'
X/* bcopy.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X#ifdef BCOPYTEST
X# include <stdio.h>
X#endif
X
X/* this is a simple replacement for bcopy() where the native bcopy()
X * does not handle overlapping blocks.
X * do
X *	cc -DBCOPYTEST -o bcopy bcopy.c
X * and run "./bcopy" for a simple test.  You should get three
X * identical lines of output.
X *
X * $Id: bcopy.c,v 1.3 90/10/06 00:21:24 lee Rel1-10 $
X */
X
Xbcopy(src, dest, nbytes)
X    char *dest;
X    char *src;
X    int nbytes;
X{
X    /* We have to be clever about this...
X     * If src < dest then we copy from the top down
X     * otherwise, copy from the bottom up...
X     */
X    
X    register char *p, *q;
X
X    if (src < dest) {
X	for (p = &src[nbytes - 1], q =&dest[nbytes - 1]; nbytes--; q--, p--) {
X	    *q = *p;
X	}
X    } else {
X	for (p = src, q = dest; nbytes--; p++, q++) {
X	    *q = *p;
X	}
X    }
X}
X
X#ifdef BCOPYTEST
Xmain()
X{
X    char buffer[4096];
X    char *s = "The naked children hugged each other";
X
X    puts(s); /* first line */
X    (void) sprintf(&buffer[12], "%s", s);
X    bcopy(&buffer[12], buffer, strlen(s) + 1);
X    printf("[%s]\n", buffer); /* 2nd line */
X    bcopy(buffer, &buffer[12], strlen(s) + 1);
X    printf("[%s]\n", &buffer[12]); /* 3rd line */
X}
X#endif
X
X/* $Log:	bcopy.c,v $
X * Revision 1.3  90/10/06  00:21:24  lee
X * Prepared for first beta release.
X * 
X *
X */
@@@End of lq-text/src/liblqtext/bcopy.c
echo x - lq-text/src/liblqtext/cmdname.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/cmdname.c <<'@@@End of lq-text/src/liblqtext/cmdname.c'
X/* cmdname.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
Xchar *cmdname = (char *) 0;
X
X/* $Id: cmdname.c,v 1.2 90/10/06 00:12:08 lee Rel1-10 $
X *
X * $Log:	cmdname.c,v $
X * Revision 1.2  90/10/06  00:12:08  lee
X * Prepared for first beta release.
X * 
X * Revision 1.1  90/03/24  17:08:19  lee
X * Initial revision
X * 
X *
X */
@@@End of lq-text/src/liblqtext/cmdname.c
echo x - lq-text/src/liblqtext/malloc.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/malloc.c <<'@@@End of lq-text/src/liblqtext/malloc.c'
X/* malloc.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/*
X * malloc.c -- wrapper routines for free/malloc, that do checking and
X * print errors.
X *
X * $Id: malloc.c,v 1.6 91/03/03 00:14:27 lee Rel1-10 $
X *
X * $Log:	malloc.c,v $
X * Revision 1.6  91/03/03  00:14:27  lee
X * Simpler interface if MALLOCTRACE undefined.
X * 
X * Revision 1.5  90/10/06  00:12:11  lee
X * Prepared for first beta release.
X * 
X * Revision 1.4  90/09/29  23:51:31  lee
X * Added MALLTRACE to detect memory leaks.
X * 
X * Revision 1.3  90/08/29  21:46:55  lee
X * Alpha release.
X * 
X * Revision 1.2  90/08/09  19:16:44  lee
X * BSD lint and fixes...
X * 
X * Revision 2.2  89/10/08  20:46:10  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X *
X */
X
X#include "globals.h"
X
X#include <malloc.h>
X#include <stdio.h>
X#include <ctype.h>
X
Xextern char *progname;
X
Xextern void exit();
X
Xint _LiamIsInCurses = 0;
X    /* This should be in error.c */
X
XINLINE char *
X_emalloc(nbytes
X#ifdef MALLOCTRACE
X, FileName, Line
X#endif
X)
X    unsigned nbytes;
X#ifdef MALLOCTRACE
X    char *FileName;
X    int Line;
X#endif
X{
X    char *Result;
X
X    if ((Result = malloc(nbytes)) == (char *) 0) {
X#ifdef MALLOCTRACE
X	fprintf(stderr, "%s: %s: %d: emalloc(%u) failed.\n",
X					progname, FileName, Line, nbytes);
X#else
X	fprintf(stderr, "%s: emalloc(%u) failed.\n", progname, nbytes);
X#endif
X	exit(1);
X    }
X
X#ifdef MALLOCTRACE
X    (void) fprintf(stderr, "malloc %u > 0x%x %s %d\n", nbytes, Result, FileName, Line);
X#endif
X    return Result;
X}
X
XINLINE char *
X_ecalloc(Number, Size
X#ifdef MALLOCTRACE
X			, FileName, Line
X#endif
X)
X    unsigned Number;
X    unsigned Size;
X#ifdef MALLOCTRACE
X    char *FileName;
X    int Line;
X#endif
X{
X    char *Result;
X    extern char *calloc();
X
X    if ((Result = calloc(Number, Size)) == (char *) 0) {
X#ifdef MALLOCTRACE
X	fprintf(stderr, "%s: %s: %d: ecalloc(%u x %u) failed.\n",
X			progname, FileName, Line, Number, Size);
X#else
X	fprintf(stderr, "%s: ecalloc(%u x %u) failed.\n",
X			progname, Number, Size);
X#endif
X	exit(1);
X    }
X
X    return Result;
X}
X
XINLINE char *
X_erealloc(OldString, NewSize
X#ifdef MALLOCTRACE
X				, FileName, Line
X#endif
X)
X    char *OldString;
X    unsigned NewSize;
X#ifdef MALLOCTRACE
X    char *FileName;
X    int Line;
X#endif
X{
X    char *Result;
X
X    if ((Result = realloc(OldString, NewSize)) == (char *) 0) {
X#ifdef MALLOCTRACE
X	fprintf(stderr, "%s: %s: %d: erealloc(0x%x, %u) failed.\n",
X			    progname, FileName, Line, OldString, NewSize);
X#else
X	fprintf(stderr, "%s: erealloc(0x%x, %u) failed.\n",
X			    progname, OldString, NewSize);
X#endif
X	exit(1);
X    }
X
X#ifdef MALLOCTRACE
X    (void) fprintf(stderr, "realloc 0x%x %u > 0x%x %s %d\n", OldString, NewSize, Result, FileName, Line);
X#endif
X    return Result;
X}
X
XINLINE void
X_efree(String
X#ifdef MALLOCTRACE
X		, FileName, Line
X#endif
X)
X    char *String;
X#ifdef MALLOCTRACE
X    char *FileName;
X    int Line;
X#endif
X{
X    if (!String) {
X#ifdef MALLOCTRACE
X	(void) fprintf(stderr, "%s: %s: %d: Warning: free(0) ignored\n",
X			progname, FileName, Line);
X#else
X	(void) fprintf(stderr, "%s: Warning: free(0) ignored\n", progname);
X#endif
X	return;
X    }
X#ifdef MALLOCTRACE
X    (void) fprintf(stderr, "free 0x%x\n", String);
X#endif
X
X    (void) free(String);
X}
@@@End of lq-text/src/liblqtext/malloc.c
echo x - lq-text/src/liblqtext/numbers.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/numbers.c <<'@@@End of lq-text/src/liblqtext/numbers.c'
X/* numbers.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* Routines to read and write numbers in a compressed format, preserving
X * block boundaries.
X * The actual compression seems to be about 50%, as most numbers turn
X * out to fit in 16 bits.  Interestingly, there is room for another one
X * or two bits, I think, that could be used for something else, in the
X * main pblock index.  For example, it could mark whether words were
X * plural/-"ing"/"un"-/ with 2 bits.
X *
X * $Id: numbers.c,v 1.4 90/10/06 00:12:16 lee Rel1-10 $
X *
X * $Log:	numbers.c,v $
X * Revision 1.4  90/10/06  00:12:16  lee
X * Prepared for first beta release.
X * 
X * Revision 1.3  90/08/09  19:16:49  lee
X * BSD lint and fixes...
X * 
X * Revision 1.2  90/04/18  19:47:13  lee
X * More flexible (and slightly more compact) number format.
X * 
X * Revision 2.2  89/10/08  20:46:36  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:15:15  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.2  89/09/16  21:16:26  lee
X * First demonstratable version.
X * 
X * Revision 1.1  89/09/07  21:06:01  lee
X * Initial revision
X * 
X *
X */
X
X#include "globals.h"
X
X#ifdef SYSV
X	extern int _filbuf(), _flsbuf();
X#endif
X#include <stdio.h>
X#include "numbers.h"
X
X/* ReadNumber and WriteNumber take/return a long, using a compression
X * algorithm to reduce the amount of data taken.
X * The current algorithm is simply like internet addresses:
X * a 0 in the top bit followed by a 0 means it's one byte
X * a 0 followed by a 1 means it's 2 bytes
X * a 1 followed by a 0 means it's 3 bytes, and
X * a 1 followed by a 1 means it's 4 bytes.
X * A better alternative might simply use a 1 in the top bit, hence fitting
X * 7 bits into each bytes.  The advantages of considering more than
X * one number at a time and using compress-style LS packing are not clear.
X * In particular, speed of recovery is an issue too.
X *
X * The routines use (char *) pointers instead of files prefixes with an s.
X * see numbers.h for some related macros.
X *
X */
X
X
X#ifdef TESTNUMBERS
Xchar *progname;
X
Xint
Xmain(ac, av)
X    int ac;
X    char *av[];
X{
X    extern long atol();
X    FILE *f;
X    extern FILE *fopen();
X
X    progname = av[0];
X
X    while (--ac) {
X	unsigned long L = atol(*++av);
X	unsigned long L2;
X
X	f = fopen("/tmp/boy", "w");
X	printf("Write %u\n", L);
X	fWriteNumber(f, L);
X	fclose(f);
X	f = fopen("/tmp/boy", "r");
X	L2 = fReadNumber(f);
X	printf("Read %u\n", L2);
X	if (L != L2) {
X	    printf("**** ERROR **** %ld != %ld\n", L, L2);
X	}
X	fclose(f);
X    }
X    return 0;
X}
X#endif /*TESTNUMBERS*/
X
XINLINE void
XfWriteNumber(f, Number)
X    FILE *f;
X    unsigned long Number;
X{
X    /* Compressed numbers:
X     * 7 bit numbers --> single byte;
X     * 8...14 bits --> 2 bytes
X     * 15...21 bits --> 3 bytes
X     * 22..28 bits --> 4 bytes
X     * 29..32 bits --> 5 bytes
X     */
X    while (Number > 0177) {
X	putc((Number & 0177) | 0200, f);
X	Number >>= 7;
X    }
X    putc(Number & 0177, f);
X}
X
X#define PutC(ch, S)  (*((*S)++) = (char) (ch))
X
XINLINE void
XsWriteNumber(s, Number)
X    char **s;
X    unsigned long Number;
X{
X    /* Compressed numbers:
X     * 7 bit numbers --> single byte;
X     * 8...14 bits --> 2 bytes
X     * 15...21 bits --> 3 bytes
X     * 22..28 bits --> 4 bytes
X     * 29..32 bits --> 5 bytes
X     */
X    while (Number > 0177) {
X	PutC((Number & 0177) | 0200, s);
X	Number >>= 7;
X    }
X    PutC(Number & 0177, s);
X}
X
XINLINE unsigned long
XfReadNumber(f)
X    FILE *f;
X{
X    unsigned long Result = 0L;
X    int ThereIsMore;
X    int Shift = 0;
X
X    /* Read a number, 7 bits at a time, lsb first, until there is
X     * a byte without the top bit set -- that's the most significant
X     * byte, and there is no more of this number.
X     */
X    do {
X	Result |= ((ThereIsMore = getc(f)) & 0177) << Shift;
X	ThereIsMore &= 0200;
X	Shift += 7;
X    } while (ThereIsMore);
X    return Result;
X}
X
X#define GetC(S) \
X    ( (unsigned int) * (unsigned char *) ((* (unsigned char **)S)++) )
X
XINLINE unsigned long
XsReadNumber(s)
X    char **s;
X{
X    unsigned long Result = 0L;
X    int ThereIsMore;
X    int Shift = 0;
X
X    /* Read a number, 7 bits at a time, lsb first, until there is
X     * a byte without the top bit set -- that's the most significant
X     * byte, and there is no more of this number.
X     */
X    do {
X	Result |= ((ThereIsMore = GetC(s)) & 0177) << Shift;
X	ThereIsMore &= 0200;
X	Shift += 7;
X    } while (ThereIsMore);
X    return Result;
X}
@@@End of lq-text/src/liblqtext/numbers.c
echo x - lq-text/src/liblqtext/pblock.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/pblock.c <<'@@@End of lq-text/src/liblqtext/pblock.c'
X/* pblock.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* The physical Word Database for NX-Text...
X *
X * Interface by Liam Quin, 1989
X *
X * The main database is a mesh of linked lists, rather like tagliatelli.
X *
X * $Id: pblock.c,v 1.10 90/10/13 03:08:36 lee Rel1-10 $
X *
X * $Log:	pblock.c,v $
X * Revision 1.10  90/10/13  03:08:36  lee
X * deleted an illegal dereference!
X * 
X * Revision 1.9  90/10/06  00:12:17  lee
X * Prepared for first beta release.
X * 
X * Revision 1.8  90/09/29  23:49:05  lee
X * commented out (with #if 0) some code that called WID2WordInfo needlessly.
X * 
X * Revision 1.7  90/08/29  21:47:04  lee
X * Alpha release.
X * 
X * Revision 1.6  90/08/09  19:16:52  lee
X * BSD lint and fixes...
X * 
X * Revision 1.5  90/08/08  22:26:17  lee
X * Many major lint and gcc -Wall fixes...
X * 
X * Revision 1.4  90/03/22  14:26:54  lee
X * Fixed the test for Sorting monotonicity (?)..., which had not been
X * checking that the FIDS were the same before checking the blocks...
X * 
X * Revision 1.3  90/03/21  19:42:22  lee
X * new calls to efree();
X * removed WID from the data blocks.
X * doesn't work yet, though, sorry.
X * 
X * Revision 2.2  89/10/08  20:46:53  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:15:24  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.3  89/09/17  23:03:12  lee
X * Various fixes; NumberInBlock now a short...
X * 
X * Revision 1.2  89/09/16  21:16:30  lee
X * First demonstratable version.
X * 
X * Revision 1.1  89/09/07  21:06:05  lee
X * Initial revision
X * 
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h> /* stderr, also for fileinfo.h */
X#include <fcntl.h>
X#include <malloc.h>
X#include <sys/types.h>
X#include "fileinfo.h" /* for wordinfo.h */
X#include "wordinfo.h"
X#include "pblock.h"
X#include "numbers.h"
X#include "wordrules.h"
X#include "emalloc.h"
X
X#ifndef STREQ
X# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl))))
X#endif
X
X#define new(type) ( ((type) *) emalloc(sizeof(type)) )
X
Xextern t_WordInfo *WID2WordInfo();
Xextern t_WID Word2WID();
X
X/** Unix system calls that need to be declared: **/
Xextern void exit();
Xextern int open();
Xextern int read(), write();
X
X/** C library functions that need to be declared: **/
Xextern char *memcpy();
Xextern void perror();
Xextern void qsort();
Xextern int strcmp();
Xextern char *strcpy();
Xextern int strlen();
X
X/** lqtext library functions that need to be declared: **/
Xextern int MkWIB();
Xextern void MkWIBH();
Xextern int PutWordInfoIntoIndex();
Xextern void SortWordPlaces();
Xextern t_WordInfo *MakeWordInfo();
Xextern void SlayWordInfo();
X
X/** Functions within this file that need to be declared: **/
Xvoid DeleteWordPlaces();
Xstatic void FlushBlock();
X
Xt_WordPlace *GetWordPlaces();
Xt_pblock *Getpblock();
Xunsigned short PutWordPlaces();
Xint _PutByte(), PutLong();
Xunsigned char _GetByte();
Xunsigned long GetLong();
Xunsigned long FindFreeBlock();
Xvoid FillWordPlaces();
X/** **/
X
Xextern char *progname;
X
X/* If you find this macro confusing, see "The C Programming Language",
X * Kernighan & Ritchie, 1st. Edition, for a good introduction to
X * programming in C.
X * :-( :-)
X */
X#define PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) \
X    ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \
X       _PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) : \
X      ((*((*(sp))++) = (Byte)), 0))
X
X#define GetByte(WID, sp, Blockp, BlockLength, NextBlock) \
X    ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \
X       _GetByte(WID, sp, Blockp, BlockLength, NextBlock) : *((*(sp))++) )
X
Xextern long lseek();
X
Xstatic int DataFile = -1;
X
X#ifdef ASCIITRACE
Xextern int AsciiTrace;
X#endif
X
Xchar *ReadBlock();
Xvoid WriteBlock();
Xunsigned long Putpblock();
X
X/* Layout of the physical index database (OUT OF DATE)
X * =====================================
X *
X * This file is the only interface to the database of FID/Offset pairs.
X *
X * The db is organised in blocks arranged in Tagliatelli format: a linked
X * list of blocks for each WID; there is a list for each WID in the Word
X * Index.  The Word Index contains the block number of the start of the
X * chain.
X * Block 0 is the start of the free list (but is never itself free!)
X *
X * block 0... Free list header; otherwise currently unused.
X * block 1... first data block:
X * +---------------------------
X * | bytes 0...3: Offset of next block in this chain
X * |	   4...7: Number of valid pairs in this block (0-->unused block)
X * |	   8..11: WID to which this block refers (for checking)
X * |	  12..15: Total number of WIDS in the chain
X * |		  (only the first block in each chain has this)
X * | The (FID, Offset) pairs follow, in compressed (Internet-style) format.
X * |
X * block 2... next data block (either the start of a new chain, or a
X * continuation of some other chain.  Or maybe unused, especially if files
X * have been deleted).
X *
X * The block header is described by t_BlockHeader.
X *
X */
X
X#include "blkheader.h"
X
X/* Look up a word in the database...
X * and return a list of all the WordPlaces where it's found
X * BUG: should be called WordInfo2pblock().
X */
Xt_pblock *
XGetpblock(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    t_pblock *pblock = 0;
X    unsigned long HowManyToGet = 0L;
X    t_WordPlace *WordPlaces;
X    unsigned long CurrentPair = 0L;
X
X#if 0
X    /* This code allows one to call GetPblock with a very minimal
X     * wordinfo entry:
X     */
X    if (WordInfo->Offset == 0L) {
X	/* No database entry found yet... */
X	if (!WordInfo->WID && WordInfo->Length) {
X	    WordInfo->WID = Word2WID(WordInfo->Word, WordInfo->Length);
X	}
X	if (WordInfo->WID) {
X	    WordInfo = WID2WordInfo(WordInfo->WID);
X	}
X    }
X#endif
X
X    if (!WordInfo->NumberOfWordPlaces) {
X	return (t_pblock *) 0; /* nothing to write */
X    }
X
X    if (!WordInfo->Offset && !WordInfo->NumberOfWordPlaces) {
X	/* no pblock offset, so no pblock! */
X	return (t_pblock *) 0;
X    }
X
X    HowManyToGet = WordInfo->NumberOfWordPlaces;
X
X    pblock = (t_pblock *) emalloc( sizeof(t_pblock) +
X		    (unsigned) (HowManyToGet + 1) * sizeof(t_WordPlace));
X
X    WordPlaces = pblock->WordPlaces;
X    pblock->WID = WordInfo->WID;
X    pblock->ChainStart = WordInfo->Offset;
X    pblock->NumberOfWordPlaces = WordInfo->NumberOfWordPlaces;
X
X    /* First, the pairs in the WordInfo might suffice: */
X    if (WordInfo->WordPlacesInHere >= HowManyToGet) {
X	for (; CurrentPair < WordInfo->WordPlacesInHere; CurrentPair++) {
X	    WordPlaces[CurrentPair] = WordInfo->WordPlaces[CurrentPair];
X	}
X    }
X
X    /* If they all fitted in the WordInfo block, well, that was a big win! */
X    if (CurrentPair >= HowManyToGet) {
X	pblock->ChainStart = 0L;
X	return pblock;
X    }
X
X    /* So we need to read the entire list of WordPlaces from the database.
X     * Although we may have already done the first few, I'm going to do them
X     * all again because that ensures that the last few bytes in the
X     * WordInfo data block can get used!
X     */
X    
X    WordPlaces = GetWordPlaces(
X		WordInfo->WID,
X		WordInfo->WordPlaceStart,
X		(unsigned) WIDBLOCKSIZE -
X			    (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X		WordInfo->Offset,
X		HowManyToGet);
X
X  
X  
X    if (WordPlaces == (t_WordPlace *) 0) {
X	fprintf(stderr, "%s: SNAFU: no wordplaces for WID %ld, wanted %ld\n",
X			progname, WordInfo->WID, HowManyToGet);
X	exit(1);
X    }
X
X    /* copy the result... */
X    (void) memcpy((char *) pblock->WordPlaces, (char *) WordPlaces,
X				(int) (sizeof(t_WordPlace) * HowManyToGet));
X    (void) efree((char *) WordPlaces);
X    return pblock;
X}
X
X/* This is how many WordPlaces one could write into a block in WIDFILE:
X * The Header takes up 1 for the word length, MinWordLength or more for
X * the actual word, and another 1 for the number of places.
X * (32 - 5) / 3 is 27/3 is 9, but this is rather rare in practice!
X * See the comment just before PutPairs().
X * If this is not kept up to date, space may be wasted in the database,
X * or you will slow down lqaddfile.
X */
X#define MaxWordPlacesInAWordBlock ((WIDBLOCKSIZE - (MinWordLength + 2)/3))
X
X/* This should in fact take a (t_WordPlaceList *) or something.
X * It returns the number of words written, for checking in
X * DumpCache (wordtable.c)
X */
Xint
XWriteWordChain(WordPlaceList)
X    t_WordPlaceList *WordPlaceList;
X{
X    extern t_WID GetNextWID();
X    extern t_WordInfo *FindWordInfoFromIndex();
X
X    t_WID WID;
X    int Length;
X    t_pblock *pblock = (t_pblock *) 0;
X    int NumberOfWordPlacesToAdd = 0;
X    t_WordPlaceList *WP;
X    t_WordInfo *IndexEntry = 0;
X    char *FirstWord;
X    register int i;
X    int TotalWordsWritten = 0;
X
X    if (!WordPlaceList) return 0; /* nothing to do */
X
X    /* Sanity check: */
X    if (!(WordPlaceList->Word) || !*(WordPlaceList->Word)) {
X	fprintf(stderr, "Warning: WriteWordChain() asked to write null word\n");
X	return 0;
X    }
X
X    Length = strlen(WordPlaceList->Word);
X
X    /* This is undoubtedly a bad way to do things:
X     * if IndexEntry is big, we don't want to fetch it when we could
X     * simply stuff things on the end!
X     */
X    if ((WID = Word2WID(WordPlaceList->Word, Length)) != (t_WID) 0) {
X	if ((IndexEntry = WID2WordInfo(WID)) != (t_WordInfo *) 0) {
X	    pblock = Getpblock(IndexEntry);
X	}
X    }
X
X    if (!WID) {
X	WID = GetNextWID();
X    }
X
X#ifdef ASCIITRACE
X    if (AsciiTrace >= 3) {
X	fprintf(stderr, "Entry %lu for \"%s\"", WID, WordPlaceList->Word);
X	if (IndexEntry) {
X	    fprintf(stderr, ", had %d pairs\n", IndexEntry->NumberOfWordPlaces);
X	} else {
X	    fprintf(stderr, " (new entry)\n");
X	}
X    }
X#endif
X
X    /* Count the number of entries we are going to add.
X     * There may be several different words in the chain, but they are
X     * sorted in word order.
X     */
X    FirstWord = WordPlaceList->Word;
X
X    for (WP = WordPlaceList; WP; WP = WP->Next) {
X	if (!STREQ(WP->Word, FirstWord)) break;
X	++NumberOfWordPlacesToAdd;
X	if (WP != WordPlaceList) {
X	    WP->Word = (char *) 0; /* so we don't free it; see AddWord() */
X	}
X    }
X
X    if (pblock == (t_pblock *) 0) {
X	int oldnp = (IndexEntry) ? IndexEntry->NumberOfWordPlaces : 0;
X
X	pblock = (t_pblock *) emalloc(sizeof(t_pblock) +
X		(NumberOfWordPlacesToAdd + oldnp) * sizeof(t_WordPlace));
X	pblock->NumberOfWordPlaces = 0;
X	if (IndexEntry && IndexEntry->NumberOfWordPlaces) {
X	    register t_WordPlace *W, *Q;
X	    register unsigned long boy = 0L;
X  
X	    if (IndexEntry->WordPlacesInHere < IndexEntry->NumberOfWordPlaces) {
X		FillWordPlaces(IndexEntry);
X	    }
X
X	    for (W = IndexEntry->WordPlaces, Q = pblock->WordPlaces;
X				boy < IndexEntry->NumberOfWordPlaces; boy++) {
X		*Q++ = *W++;
X	    }
X	    pblock->NumberOfWordPlaces = IndexEntry->NumberOfWordPlaces;
X	}
X    } else {
X	/* Remove the old information from disk.
X	 * This isn't as bad as it sounds, as it will be at the start
X	 * of the freelist, so when we write it out again it will be
X	 * in the buffer cache...
X	 * Although, it would be faster simply to append.
X	 */
X	if (IndexEntry->Offset) {
X	    DeleteWordPlaces(IndexEntry->Offset, IndexEntry->WID);
X	}
X	/*NOSTRICT*/
X	pblock = (t_pblock *) erealloc((char *) pblock, sizeof(t_pblock) +
X		(unsigned) (NumberOfWordPlacesToAdd +
X			pblock->NumberOfWordPlaces) * sizeof(t_WordPlace));
X    }
X
X    pblock->WID = WID;
X    pblock->ChainStart = 0L; /* certainly it is now invalid! */
X    i = pblock->NumberOfWordPlaces;
X    pblock->NumberOfWordPlaces += NumberOfWordPlacesToAdd;
X
X    for (WP = WordPlaceList; WP && NumberOfWordPlacesToAdd--; WP = WP->Next) {
X	/* As we ignore the rest of these WordInfo entries, something
X	 * has to change here for more efficiency!
X	 * Probably we should get passed a linked list of WordPlaces
X	 * instead of WordInfo structures.
X	 */
X	pblock->WordPlaces[i] = WP->WordPlace;
X	i++;
X    }
X
X    if (WP && STREQ(WP->Word, FirstWord)) {
X	fprintf(stderr, "Internal error counting pairs to add\n");
X	exit(1);
X    }
X
X    if (NumberOfWordPlacesToAdd > 0) {
X	fprintf(stderr, "I've got some pairs left over, \"%s\" line %d\n",
X			__FILE__, __LINE__ - 1);
X	exit(1);
X    }
X
X    /* We are going to need a WordInfo here... */
X    if (IndexEntry == (t_WordInfo *) 0) {
X	IndexEntry = MakeWordInfo(WID, Length, WordPlaceList->Word);
X    }
X    IndexEntry->NumberOfWordPlaces = pblock->NumberOfWordPlaces;
X
X    /* Now, see if they all fit into the WordInfo itself! */
X
X    pblock->ChainStart = IndexEntry->Offset = 0L;
X
X    if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
X	(void) MkWIB(IndexEntry, pblock);
X    }
X
X    if (IndexEntry->WordPlacesInHere == pblock->NumberOfWordPlaces) { 
X	/* no pblock needed! */
X	if (PutWordInfoIntoIndex(IndexEntry, (unsigned long) 0L) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't put \"%s\" into the ",
X						progname, IndexEntry->Word);
X	    errno = e;
X	    perror("index");
X	    exit(1);
X	}
X    } else {
X	(void) Putpblock(IndexEntry, pblock); /* (this alters *WordInfo) */
X
X	if (PutWordInfoIntoIndex(IndexEntry, pblock->ChainStart) < 0) {
X	    extern int errno;
X	    int e = errno;
X	    fprintf(stderr, "%s: Couldn't add word \"%s\" to the ",
X						progname, IndexEntry->Word);
X	    errno = e;
X	    perror("word index");
X	    exit(1);
X	}
X    }
X
X    if (pblock) {
X	efree((char *) pblock);
X	pblock = (t_pblock *) 0;
X    }
X
X    if (IndexEntry) {
X	SlayWordInfo(IndexEntry);
X    }
X
X    TotalWordsWritten += NumberOfWordPlacesToAdd;
X
X    /* Now see if there are any more words in the chain: */
X    if (WP) {
X	TotalWordsWritten += WriteWordChain(WP);
X    }
X
X    return TotalWordsWritten;
X}
X
Xstatic unsigned char pblockBuffer[BLOCKSIZE + 5];
X
X/* Write out an entire (presumably new) data entry, and
X * return a disk pointer to the start of the chain
X */
Xunsigned long
XPutpblock(WordInfo, pblock)
X    t_WordInfo *WordInfo;
X    t_pblock *pblock;
X{
X    /* Assume that we can discard the PairBlock in WordInfo --
X     * it was a pointer to a static buffer anyway.
X     */
X
X    if (WordInfo->DataBlock) {
X	WordInfo->DataBlock = (char *) 0;
X    }
X
X    WordInfo->Offset = pblock->ChainStart = FindFreeBlock(WordInfo->WID);
X    (void) MkWIBH(WordInfo, pblock);
X    PutWordPlaces(
X	    pblock->WordPlaces,
X	    WordInfo->WID,
X	    (unsigned char *) WordInfo->WordPlaceStart,
X	    (unsigned)
X	    WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X	    WordInfo->Offset,
X	    pblock->NumberOfWordPlaces);
X
X    return WordInfo->Offset;
X}
X
X/** WordPlaces are now stored as sequences, as follows:
X **  FID*2 -- 1, 2, 3 (usually) or 4 bytes				1-4
X **  (very, very occasionaly a variable-size number may be 5 bytes.)
X **   . the bottom bit in the stored number determines whether there
X **     is more than one FID to follow
X **  number of following places (only if prev. bit was 1) -- 1 byte	0-1
X ** For each following entry:-
X **   . for each of the following places:
X **     Block In File (long, 1, 2, 3 or 4 bytes, usually 1)		1-4
X **     Word In Block -- always 1 byte					1-1
X **		the bottom bit of this says if there are flags
X **     Flags -- always 1 byte, if present				0-1
X **	    Stuff Before -- 1 byte					0-1
X **	    (if there are no flags, there's no Stuff Before byte, and
X **     we use the default value of 1)
X **
X ** Hence:	each sub-place takes from 2 to 7 bytes [was: 0 to 4]
X **		each Place sequence takes from 3 [min was 2 with old format]
X **		to (4 + 1) + 255 * (2..7) bytes.
X **		In most (I guess > 7/10) cases, flags will be 0, and
X **		StuffBefore be the default of 1.
X **
X **	I am hoping, of course, that the extra information stored is
X ** worth while!
X **	It might be possible to coalesce WordInBlock and BlockInFile using
X ** delta modulation -- i.e., storing the increment from the previous.  In
X ** this case, a flag bit could mean that those two values each occupy a
X ** nibble in a single byte.  Or, I could use a single byte, like this:
X **	[a b c d e f g h]
X **	a == 1 --> (efgh) is word in block inc., (bcd is block in file inc)
X ** but I need to do some real measurements to figure out how best to save
X ** space.  It really is worth while keeping the format as simple as I can,
X ** as this speeds retrieval.
X **
X **/
X
Xunsigned short
XPutWordPlaces(WordPlaces, WID, Block, BlockLength, NextOffset, NumberToWrite)
X    t_WordPlace *WordPlaces;
X    t_WID WID;
X    unsigned char *Block;
X    unsigned BlockLength;
X    unsigned long NextOffset;
X    unsigned long NumberToWrite;
X{
X    unsigned char *q = Block;
X    unsigned long L;
X    int CurrentPlace = 0;
X    unsigned long LastStart = 0L;
X    t_FID LastFID = 0;
X    unsigned long LastBlock = 0L;
X
X    /** IMPORTANT NOTICE:  keep the definition of MaxWordPlacesInAWordBlock
X     ** up to date (it's #defined above).
X     **/
X
X    /* sort the pblock to simplify subsequent accesses */
X    if (NumberToWrite > 1) {
X	SortWordPlaces(NumberToWrite, WordPlaces);
X    }
X
X    while (CurrentPlace < 0 || CurrentPlace < NumberToWrite) {
X	unsigned short NumberOfRepeats;
X	unsigned char U;
X	t_FID FID = WordPlaces[CurrentPlace].FID;
X	int LastPlace;
X
X	/* Determine the number of Places in the same file;
X	 * note that we can write at most 255 in the same place, so
X	 * longer stretches are broken up into clumps of 255.
X	 * This is a reasonable tradeoff, I think.  The alternative would
X	 * be to write NumberOfRepeats as a long, and lose if there were
X	 * (say) between 64 (old, 127 new) and 255 of them.  This case only
X	 * occurs once in the New Testament anyway, and presumably is
X	 * generally quite rare.
X	 */
X	NumberOfRepeats = 0;
X	LastPlace = CurrentPlace;
X	while (NumberOfRepeats < 255) {
X	    if (LastPlace >= NumberToWrite) {
X		break;
X	    } else if (WordPlaces[LastPlace].FID != FID) {
X		break;
X	    }
X	    ++NumberOfRepeats;
X	    ++LastPlace;
X	}
X
X	L = (FID - LastFID) << 1;
X	LastFID = FID;
X	if (NumberOfRepeats > 1) L |= 01L;
X	if (PutLong(L, WID, &q, &Block, &BlockLength,
X						&NextOffset, &LastStart) < 0) {
X	    return CurrentPlace;
X	}
X	if (L & 01L) {
X	    if (PutByte(NumberOfRepeats, WID, &q, &Block, &BlockLength,
X						&LastStart, &NextOffset) < 0) {
X		return CurrentPlace;
X	    }
X	}
X
X	LastBlock = 0;
X
X	for (; NumberOfRepeats != 0; --NumberOfRepeats) {
X	    if (CurrentPlace > NumberToWrite) {
X		fprintf(stderr,
X		"Entry for file %lu (WID %ld) has more matches than expected\n",
X								    FID, WID);
X		exit(1);
X		/* This would represent a rather serious bug, I think! */
X	    }
X	    /* block number */
X	    if (WordPlaces[CurrentPlace].BlockInFile < LastBlock) {
X		fprintf(stderr, "Sort WID %ld failed, non-monatonic blocks\n",
X									WID);
X		exit(1); /* can't cope with this one */
X	    } else if (CurrentPlace &&
X		    (WordPlaces[CurrentPlace].FID ==
X			    WordPlaces[CurrentPlace - 1].FID) &&
X		    (WordPlaces[CurrentPlace].BlockInFile == LastBlock) &&
X		    (WordPlaces[CurrentPlace].WordInBlock <=
X			    WordPlaces[CurrentPlace - 1].WordInBlock)) {
X		fprintf(stderr,
X		    "Sort WID %ld failed, FID %ld: Blk %d: WIB %d <= %d\n",
X		    WID, FID, LastBlock, WordPlaces[CurrentPlace].WordInBlock,
X		    WordPlaces[CurrentPlace - 1].WordInBlock
X		);
X	    }
X	    L = WordPlaces[CurrentPlace].BlockInFile - LastBlock;
X	    LastBlock += L;
X
X	    if (PutLong(L, WID, &q, &Block, &BlockLength, &NextOffset,
X							    &LastStart) < 0) {
X		return CurrentPlace;
X	    }
X	    U = (WordPlaces[CurrentPlace].WordInBlock << 1);
X	    if (WordPlaces[CurrentPlace].StuffBefore != 1) {
X		WordPlaces[CurrentPlace].Flags |= WPF_HASSTUFFBEFORE;
X	    }
X	    if (WordPlaces[CurrentPlace].Flags != WPF_HASSTUFFBEFORE) {
X		U |= 01;
X	    }
X	    if (U > 255) {
X		fprintf(stderr,
X		"WID %lu: WordInBlock (0%o) from FID %lu too big\n",
X			WID, U, FID);
X		exit(1);
X	    }
X
X	    if (PutByte(U, WID, &q, &Block, &BlockLength, &LastStart,
X							&NextOffset) < 0) {
X		return CurrentPlace;
X	    }
X	    if (U & 01) {
X		if (PutByte(WordPlaces[CurrentPlace].Flags, WID, &q, &Block,
X				&BlockLength, &LastStart, &NextOffset) < 0) {
X		    return CurrentPlace;
X		}
X	    }
X
X	    /* Even if there are flags, there still might not be a separate
X	     * entry for the number of preceding skipped bytes.
X	     */
X	    if ((U & 01) &&
X			(WordPlaces[CurrentPlace].Flags & WPF_HASSTUFFBEFORE)) {
X		if (PutByte(WordPlaces[CurrentPlace].StuffBefore, WID, &q,
X			&Block, &BlockLength, &LastStart, &NextOffset) < 0) {
X		    return CurrentPlace;
X		}
X	    }
X	    ++CurrentPlace;
X	}
X	if (CurrentPlace > LastPlace) {
X	    fprintf(stderr, "oops- I went wrong and wrote %ld > %ld\n",
X						    CurrentPlace, LastPlace);
X	}
X    }
X    if (LastStart) {
X	/* NextStart had better not be non-zero, but FlushBlock will
X	 * take care of it (we have wasted a block in that case!).
X	 * LastStart is zero if we fitted it all inside the WordInfo
X	 * block, although this is currently unlikely as we don't get
X	 * called in that case!
X	 * Oh, maybe we do.
X	 */
X	FlushBlock((unsigned char *) Block, &NextOffset, &LastStart, WID);
X    }
X    return NumberToWrite;
X}
X
Xt_WordPlace *
XGetWordPlaces(WID, Block, BlockLength, NextOffset, NumberExpected)
X    t_WID WID;
X    char *Block;
X    unsigned BlockLength;
X    unsigned long NextOffset;
X    unsigned long NumberExpected;
X{
X    unsigned char *q = (unsigned char *) Block;
X    unsigned long L;
X    t_WordPlace *Places = (t_WordPlace *) 0;
X    long CurrentPlace = 0;
X    t_FID LastFID = (t_FID) 0;
X    unsigned LastBlock = 0L;
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 10) {
X	fprintf(stderr,
X		"GetWordPlaces WID %ld Blk %ld len %d next %ld No. %ld\n",
X		    WID, Block, BlockLength, NextOffset, NumberExpected);
X			
X    }
X#endif
X
X    if (Block == (char *) 0) {
X	fprintf(stderr, "GetWordPlaces Error %lu\n", WID);
X    }
X
X    /*NOSTRICT*/
X    Places = (t_WordPlace *) emalloc(sizeof(t_WordPlace) * NumberExpected);
X
X    while (CurrentPlace < NumberExpected) {
X	unsigned short NumberOfRepeats;
X	unsigned char Uchar;
X	t_FID FID;
X
X	/** First get the FID.  The bottom bit of the number stored
X	 ** actually determines whether there are multiple Places
X	 ** stored here for the same FID.
X	 **/
X	L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset);
X	FID = (L >> 1) + LastFID; /* Get rid of flag bit */
X	LastFID = FID;
X	NumberOfRepeats = (L & 01L) ? 
X		GetByte(WID, &q, &Block, &BlockLength, &NextOffset) : 1;
X
X	/* Quick Sanity check */
X	switch (NumberOfRepeats) {
X	case 0L:
X	    fprintf(stderr, "Warning: no entries! for FID %lu\n", FID);
X	case 1L:
X	    if (L & 01L) {
X		fprintf(stderr, "Warning: FID %lu repeated 1 times!\n", FID);
X	    }
X	}
X
X	LastBlock = 0L;
X	if (CurrentPlace + NumberOfRepeats > NumberExpected) {
X	    fprintf(stderr,
X		"Entry for file %lu WID %ld has %lu matches, not %lu\n",
X		FID, WID, CurrentPlace + NumberOfRepeats + 1, NumberExpected);
X	    NumberOfRepeats = NumberExpected - CurrentPlace;
X	}
X	for (; NumberOfRepeats != 0; --NumberOfRepeats) {
X	    Places[CurrentPlace].FID = FID;
X	    /* block number */
X	    L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset);
X	    LastBlock += L;
X	    Places[CurrentPlace].BlockInFile = LastBlock;
X	    Uchar = GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X	    Places[CurrentPlace].WordInBlock = (Uchar >> 1);
X
X	    /* Sanity check: */
X	    if (CurrentPlace > 0 && Places[CurrentPlace].FID ==
X					Places[CurrentPlace - 1].FID) {
X		if (Places[CurrentPlace - 1].BlockInFile ==
X				    Places[CurrentPlace].BlockInFile) {
X		    if ( Places[CurrentPlace - 1].WordInBlock >=
X				Places[CurrentPlace].WordInBlock) {
X			fprintf(stderr,
X    "%s: warning: %dth match for word %ld (FID %ld) WIB goes backwards!\n",
X				    progname, CurrentPlace, WID, FID);
X		    }
X		} else if (Places[CurrentPlace - 1].BlockInFile >
X				Places[CurrentPlace].BlockInFile) {
X			fprintf(stderr,
X	"%s: warning: %dth match for word %ld (FID %ld) BIF goes backwards!\n",
X				    progname, CurrentPlace, WID, FID);
X		}
X	    }
X	    /* end of sanity test */
X
X	    if (Uchar & 01) { /* use if, not ?:, for profiler */
X		Places[CurrentPlace].Flags = 
X		    GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X	    } else {
X		Places[CurrentPlace].Flags = 0;
X	    }
X
X	    /* If there are flags, there still might not be a separate
X	     * entry for the number of preceding skipped bytes.
X	     */
X	    if (Places[CurrentPlace].Flags & WPF_HASSTUFFBEFORE) {
X		Places[CurrentPlace].StuffBefore = 
X		    GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X	    } else {
X		Places[CurrentPlace].StuffBefore = 1;
X	    }
X	    ++CurrentPlace;
X	}
X    }
X    return Places;
X}
X
Xvoid
XFillWordPlaces(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    WordInfo->WordPlaces = GetWordPlaces(
X	    WordInfo->WID,
X	    WordInfo->WordPlaceStart,
X	    WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X	    WordInfo->Offset,
X	    WordInfo->NumberOfWordPlaces
X    );
X}
X
Xstatic void
XFlushBlock(Block, NextOffset, LastStart, WID)
X    char *Block;
X    unsigned long *NextOffset, *LastStart;
X    t_WID WID;
X{
X    if (*LastStart && Block) {
X	/*NOSTRICT*/
X	t_BlockHeader *BH = (t_BlockHeader *) Block;
X
X	BH->NextOffset = ((*NextOffset) / BLOCKSIZE) << 1;
X	WriteBlock(*LastStart, Block);
X    }
X    *LastStart = *NextOffset = 0L;
X}
X
X/* This is simply to help keep the source lines getting too long! */
Xtypedef unsigned char *UCP;
X
Xint
X_PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock)
X    unsigned char Byte;
X    t_WID WID;
X    unsigned char **sp;
X    unsigned char **Blockp;
X    unsigned *BlockLength;
X    unsigned long *NextBlock;
X    unsigned long *LastStart; /* for writing the linked list */
X{
X    t_BlockHeader *BH;
X
X    if (*sp - (*Blockp) >= (*BlockLength)) {
X	if (!*NextBlock && !*LastStart) return -1; /* only do the 1st block */
X	if (*NextBlock == (unsigned long) 0) {
X	    *NextBlock = FindFreeBlock(WID);
X	}
X	/* Complete the information in the previous block, if required */
X	if (*LastStart) {
X	    BH = (t_BlockHeader *) (*Blockp);
X	    BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1;
X	    /* Write the old block */
X	    WriteBlock(*LastStart, *Blockp);
X	    *LastStart = 0L;
X	}
X	*LastStart = (*NextBlock);
X	*BlockLength = BLOCKSIZE;
X	(*NextBlock) = 0L;
X	*Blockp = pblockBuffer; /* Use static (to this file) data buffer */
X	/*NOSTRICT*/
X	BH = (t_BlockHeader *) (*Blockp);
X	(*sp) = (UCP) BH->Data;
X    }
X    **sp = Byte;
X    (*sp)++;
X    return 0;
X}
X
Xunsigned char
X_GetByte(WID, sp, Blockp, BlockLength, NextBlock)
X    t_WID WID;
X    unsigned char **sp;
X    unsigned char **Blockp;
X    unsigned long *BlockLength;
X    unsigned long *NextBlock;
X{
X    t_BlockHeader *BH;
X
X    if (*sp - (*Blockp) >= (*BlockLength)) {
X	if (*NextBlock == (unsigned long) 0) {
X	    (*Blockp) = (*sp) = (UCP) 0;
X	    fprintf(stderr, "Database Corrupt, Next is zero\n");
X	    return 0;
X	} else {
X	    (*sp) = (*Blockp) = (UCP) ReadBlock(*NextBlock);
X	}
X	/* Check the new block */
X	if ((*Blockp) == (UCP) 0) {
X	    fprintf(stderr, "Database corrupt, %lu, sigh.\n", *NextBlock);
X	    exit(1);
X	}
X	/*NOSTRICT*/
X	BH = (t_BlockHeader *) (*Blockp);
X	*NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE;
X	*BlockLength = BLOCKSIZE;
X	(*sp) = (UCP) BH->Data;
X    }
X    return *((*sp)++);
X}
X
X/* PutLong -- write a long number in compressed/abbreviated form into a
X * string.  If this moves the string pointer beyond the block, write out
X * the block and start a new one.  In that case, the number written may well
X * span the gap between the blocks.  We use an overflow buffer to copy
X * the bytes (if any) that overflowed into it.
X * Then we write them at the start of the next block.
X *
X * This routine returns -1 and writes a partial number (no allocated block)
X * if *LastBlock and *NextBlock are zero.  This allows PutwOrdPlaces to be
X * called to put the WordPlaces into the WIDFILE block without writing out
X * an entire chain.
X */
Xint
XPutLong(Long, WID, sp, Blockp, BlockLength, NextBlock, LastStart)
X    unsigned long Long;
X    t_WID WID;
X    unsigned char **sp;
X    unsigned char **Blockp;
X    unsigned *BlockLength;
X    unsigned long *NextBlock;
X    unsigned long *LastStart; /* for writing the linked list */
X{
X    t_BlockHeader *BH;
X    unsigned char Buffer[sizeof(unsigned long) + 1];
X    unsigned char *Bufp = Buffer;
X    unsigned char *p;
X
X    sWriteNumber((char **) sp, Long);
X
X    if ((*sp) - (*Blockp) > (*BlockLength)) { /* gone too far! */
X	if (!*NextBlock && !*LastStart) return -1;
X	/* Save the overflow in Buffer:
X	 * the 1st 1 or more characters will fitted into the old block,
X	 * but we need them all in a lump for readnumber().
X	 * When we write the next block, we need to put the overflow
X	 * characters into the start of the next block.
X	 */
X	for (p = &(*Blockp)[*BlockLength]; p < (*sp); p++) {
X	    *Bufp++ = *p;
X	}
X	if (*NextBlock == (unsigned long) 0) {
X	    *NextBlock = FindFreeBlock(WID);
X	}
X	/* Complete the information in the previous block, if required */
X	if (*LastStart) {
X	    BH = (t_BlockHeader *) (*Blockp);
X	    BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1;
X
X	    /* Write the old block */
X	    WriteBlock(*LastStart, *Blockp);
X	}
X	*LastStart = (*NextBlock);
X	(*NextBlock) = 0L;
X	*Blockp = pblockBuffer;
X	BH = (t_BlockHeader *) (*Blockp);
X	*BlockLength = BLOCKSIZE;
X	(*sp) = (UCP) BH->Data;
X	/* Now write the stuff from Buffer into the new block */
X	if (Bufp > Buffer) {
X	    for (p = Buffer; p < Bufp; p++) {
X		*((*sp)++) = (*p);
X	    }
X	}
X    }
X    return 0;
X}
X
X/* This is the reverse of PutLong.
X * Things are slightly complicated by the need to provide sReadNumber
X * with a contiguous copy of all of the bytes in a number that spanned
X * a gap between data blocks.
X */
Xunsigned long
XGetLong(WID, sp, Blockp, BlockLength, NextBlock)
X    t_WID WID;
X    unsigned char **sp;
X    unsigned char **Blockp;
X    unsigned *BlockLength;
X    unsigned long *NextBlock;
X{
X    unsigned char Buffer[sizeof(unsigned long) + 1];
X    long Result;
X    t_BlockHeader *BH;
X    unsigned char *NumberStart = (*sp);
X    unsigned char *p;
X
X    Result = sReadNumber(sp);
X
X    /* Now, have we fallen off the end of the block? */
X    if ((*sp) - (*Blockp) > (*BlockLength)) {
X	unsigned char *bp = Buffer;
X
X	if (*NextBlock == (unsigned long) 0) {
X	    return 0L;
X	}
X
X	/* Copy the first half of the number into the overflow buffer */
X	for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) {
X	    *bp++ = *p;
X	}
X
X	/** Now:
X	 ** . sp is garbage, as is NumberStart, as they point at the old
X	 **   data block
X	 ** . Buffer contains the first few bytes of the number
X	 ** . we need some more bytes, but don't yet know how many, as
X	 **   this depends on the number representation
X	 **   NOTE that we must have, however, that we know that there
X	 **   are more bytes, so that we know if we need the next block.
X	 ** . bp points 1 beyond the end of the 1st half of the number.
X	 **/
X
X	(*sp) = *Blockp = (UCP) ReadBlock(*NextBlock);
X	/* Check the new block */
X	if ((*Blockp) == (UCP) 0) {
X	    fprintf(stderr, "Database corrupt, %lu, oh dear!\n", *NextBlock);
X	    exit(1);
X	}
X	BH = (t_BlockHeader *) *Blockp;
X	*NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE;
X	*BlockLength = BLOCKSIZE;
X	(*sp) = (UCP) BH->Data;
X	/* Fill up the buffer from the new block */
X	for (p = bp; p - Buffer < sizeof(Buffer) - 1; p++) {
X	    *p = *(*sp)++;
X	}
X	/* read the number from the buffer */
X	(*sp) = Buffer;
X	/* Try that number again... */
X	Result = sReadNumber(sp);
X	/* Now put sp where it should be.  Part of the buffer was
X	 * from the old block...
X	 */
X	(*sp) = (UCP) BH->Data + ((*sp) - bp);
X    }
X    return Result;
X}
X
Xvoid
XWriteBlock(Block, Data)
X    unsigned long Block;
X    char *Data;
X{
X    if (DataFile < 0) {
X	if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X	    fprintf(stderr, "Can't open database");
X	    perror(DataBase);
X	    exit(1);
X	}
X    }
X
X    if (lseek(DataFile, (long) Block, 0) < 0) {
X	perror("lseek");
X	exit(1);
X    }
X
X    if (write(DataFile, Data, BLOCKSIZE) != BLOCKSIZE) {
X	extern int errno;
X	int e = errno;
X	fprintf(stderr,
X	"Warning: %s: E%d -- couldn't write %ld bytes at block %lu: ",
X					    DataBase, e, BLOCKSIZE, Block);
X	errno = e;
X	perror("write");
X    }
X    return;
X}
X
Xunsigned long
XFindFreeBlock(WID)
X    t_WID WID;
X{
X    char Data[BLOCKSIZE];
X    t_BlockHeader *HP;
X    unsigned long Here = 0L;
X    unsigned long There = 0L;
X
X    if (DataFile < 0) {
X	if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X	    fprintf(stderr, "Can't open database");
X	    perror(DataBase);
X	    exit(1);
X	}
X    }
X
X    do {
X	(void) lseek(DataFile, (long) Here, 0);
X
X	if (read(DataFile, Data, BLOCKSIZE) < BLOCKSIZE) {
X	    HP = (t_BlockHeader *) 0;
X	    There = 0L;
X	    Here = BLOCKSIZE;
X	} else {
X	    HP = (t_BlockHeader *) Data;
X	}
X
X	/* It is free if it has no pairs (or if the block has
X	 * never been written to, of course)
X	 * Freed blocks are given a NextOffset or'd with 1.
X	 */
X	if ((!HP || (HP->NextOffset & 01L)) && Here != 0L) {
X	    unsigned long NextFree = HP ? HP->NextOffset : 0L;
X
X	    if (NextFree) {
X		/* make it suitable for use with lseek() */
X		NextFree = (NextFree >> 1) * BLOCKSIZE;
X	    }
X
X	    /* Make the previous block in the chain point to the one
X	     * after this one in the free list, instead of this one.
X	     */
X
X	    if (HP) {
X		(void) lseek(DataFile, (long) There, 0);
X		(void) read(DataFile, Data, BLOCKSIZE);
X	    				/* Well, we read it a second ago!*/
X	    }
X	    HP = (t_BlockHeader *) Data;
X	    HP->NextOffset = (NextFree / BLOCKSIZE) << 1;
X	    (void) lseek(DataFile, (long) There, 0);
X	    (void) write(DataFile, Data, BLOCKSIZE);
X
X	    goto GotOne;
X	}
X
X	There = Here;
X	Here = (HP->NextOffset >> 1) * BLOCKSIZE;
X
X    } while (Here != 0L);
X
X    if ((Here = lseek(DataFile, 0L, 2)) == 0L) {
X	Here = BLOCKSIZE;
X    }
X
XGotOne:
X    /* Mark it in use */
X    HP->NextOffset = 0L;
X    (void) lseek(DataFile, (long) Here, 0);
X    (void) write(DataFile, Data, BLOCKSIZE);
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 10) {
X	fprintf(stderr, "\tFindFree --> %lu\n", (Here == 0L) ? BLOCKSIZE : Here);
X    }
X#endif
X    return (Here == 0L) ? BLOCKSIZE : Here;
X}
X
X/* Get a single disk block from the database */
Xchar *
XReadBlock(Offset)
X    unsigned long Offset;
X{
X    static char Buffer0[BLOCKSIZE];
X
X    if (DataFile < 0) {
X	if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0664)) < 0) {
X	    return (char *) 0;
X	}
X    }
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 15) {
X	fprintf(stderr, "===ReadBlock(%ld)\n", Offset);
X    }
X#endif
X
X    if (lseek(DataFile, (long) Offset, 0) < 0) {
X	return (char *) 0;
X    }
X
X    /* Now we are in the right place */
X
X    if (read(DataFile, Buffer0, BLOCKSIZE) != BLOCKSIZE) {
X	return (char *) 0;
X    }
X
X    /* Now that we have the 1st block of the data.. */
X    return Buffer0;
X}
X
Xint
XWordPlaceCompare(F1, F2)
X    t_WordPlace *F1, *F2;
X{
X    if (F1->FID != F2->FID) return F1->FID - F2->FID;
X    if (F1->BlockInFile != F2->BlockInFile) {
X	return F1->BlockInFile - F2->BlockInFile;
X    }
X    return F1->WordInBlock - F2->WordInBlock;
X}
X
Xint
XRemoveFIDFrompblock(FID, pblock)
X    t_FID FID;
X    t_pblock *pblock;
X{
X    unsigned int ThisPair;
X    int TotalWordPlacesToMove = 0;
X
X    /* Mark unwanted pairs as deleted */
X    for (ThisPair = 0; ThisPair < pblock->NumberOfWordPlaces; ThisPair++) {
X	if (pblock->WordPlaces[ThisPair].FID == FID) {
X	    pblock->WordPlaces[ThisPair].FID = 0L;
X	    ++TotalWordPlacesToMove;
X	} 
X    }
X    if (!TotalWordPlacesToMove) return 0;
X
X    if (pblock->NumberOfWordPlaces > 1) {
X	SortWordPlaces(pblock->NumberOfWordPlaces, pblock->WordPlaces);
X    }
X
X    /* shuffle up any FID=0 pairs */
X    {
X	register t_WordPlace *Src, *Dest;
X	register int n = TotalWordPlacesToMove;
X
X	Src = pblock->WordPlaces;
X	Dest = &pblock->WordPlaces[TotalWordPlacesToMove];
X
X	while (n-- > 0) {
X	    *Src++ = *Dest++;
X	}
X    }
X
X    return TotalWordPlacesToMove;
X}
X
Xvoid
XSortWordPlaces(NumberOfWordPlaces, WordPlaces)
X    unsigned long NumberOfWordPlaces;
X    t_WordPlace *WordPlaces;
X{
X    /* sort the list */
X    if (NumberOfWordPlaces > 2) {
X	qsort(WordPlaces, (int) NumberOfWordPlaces,
X		sizeof(t_WordPlace), WordPlaceCompare);
X    } else {
X	/* don't call qsort in the trivial cases... */
X	if (NumberOfWordPlaces == 2) {
X	    if (WordPlaceCompare(&WordPlaces[0], &WordPlaces[1]) > 0) {
X		t_WordPlace tmp;
X		tmp = WordPlaces[0]; /* structure copy! */
X		WordPlaces[0] = WordPlaces[1];
X		WordPlaces[1] = tmp;
X	    }
X	}
X    }
X}
X
Xvoid
XDeleteWordPlaces(FirstBlock, WID)
X    unsigned long FirstBlock;
X    t_WID WID;
X{
X    char Data[BLOCKSIZE];
X    char BlockZero[BLOCKSIZE];
X    t_BlockHeader *H;
X    t_BlockHeader *HZERO; /* for the free list*/
X    unsigned long ThisBlock;
X    unsigned long Here;
X
X    if (!FirstBlock) {
X	return;
X    }
X
X    if (DataFile < 0) {
X	if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X	    fprintf(stderr, "Can't open database");
X	    perror(DataBase);
X	    exit(1);
X	}
X    }
X
X    (void) lseek(DataFile, 0L, 0);
X
X    if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) {
X	return; /* nothing to delete */
X    }
X
X    /* Block zero is always the head of the free list */
X    HZERO = (t_BlockHeader *) BlockZero;
X
X    Here = FirstBlock;
X
X    do {
X	if (lseek(DataFile, (long) Here, 0) < 0) {
X	    fprintf(stderr, "Aaaargh:");
X	    perror("lseek");
X	    exit(1);
X	}
X
X	if (read(DataFile, Data, BLOCKSIZE) <= 0) {
X	    return; /* it's not there to delete */
X	}
X
X	H = (t_BlockHeader *) Data;
X
X	ThisBlock = Here; /* so we can write the changed block */
X	Here = (H->NextOffset >> 1) * BLOCKSIZE;
X	if (Here == 0L) { /* next block... */
X	    H->NextOffset = HZERO->NextOffset;
X	}
X	H->NextOffset |= 01L; /* mark it as free */
X
X	/* Write out the newly freed block, having saved its Next pointer
X	 */
X	WriteBlock(ThisBlock, Data);
X    } while (Here);
X
X    /* Now make the free list point to the start of the new chain;
X     * the end of the newly added chain points to the start of the
X     * original chain.  This means that the new blocks are still in the
X     * Unix buffer cache, so this helps performance.  I hope.
X     */
X    HZERO->NextOffset = (FirstBlock / BLOCKSIZE) << 1;
X    (void) lseek(DataFile, 0L, 0);
X    (void) write(DataFile, BlockZero, BLOCKSIZE);
X
X    return;
X}
X
Xvoid
XDeletepblock(pblock)
X    t_pblock *pblock;
X{
X    char Data[BLOCKSIZE];
X    char BlockZero[BLOCKSIZE];
X    t_BlockHeader *H;
X    t_BlockHeader *HZERO; /* for the free list*/
X    unsigned long ThisBlock;
X    unsigned long Here;
X
X    if (!pblock || !pblock->ChainStart) {
X	return;
X    }
X
X    if (DataFile < 0) {
X	if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X	    fprintf(stderr, "Can't open database");
X	    perror(DataBase);
X	    exit(1);
X	}
X    }
X
X    if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) {
X	return; /* nothing to delete */
X    }
X
X    /* Block zero is always the head of the free list */
X    HZERO = (t_BlockHeader *) BlockZero;
X
X    Here = pblock->ChainStart;
X
X    do {
X	if (lseek(DataFile, (long) Here, 0) < 0) {
X	    fprintf(stderr, "Aaaargh:");
X	    perror("lseek");
X	    exit(1);
X	}
X
X	if (read(DataFile, Data, BLOCKSIZE) <= 0) {
X	    return; /* it's not there to delete */
X	}
X
X	H = (t_BlockHeader *) Data;
X
X	ThisBlock = Here; /* so we can write the changed block later */
X	if ((Here = ((H->NextOffset >> 1) * BLOCKSIZE)) == 0L) {
X	    H->NextOffset = HZERO->NextOffset;
X	}
X	H->NextOffset |= 01L; /* mark it as free */
X
X	/* Write out the newly freed block, having saved its Next pointer
X	 */
X	WriteBlock(ThisBlock, Data);
X    } while (Here);
X
X    /* Now make the free list point to the start of the new chain;
X     * the end of the newly added chain points to the start of the
X     * original chain.  This means that the new blocks are still in the
X     * Unix buffer cache, so this helps performance.  I hope.
X     */
X    HZERO->NextOffset = (pblock->ChainStart / BLOCKSIZE) << 1;
X    (void) lseek(DataFile, 0L, 0);
X    write(DataFile, BlockZero, BLOCKSIZE);
X
X    return;
X}
@@@End of lq-text/src/liblqtext/pblock.c
echo end of part 05
-- 
Liam R. E. Quin,  lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337