[news.software.b] latest dbz

zeeff@b-tech.ann-arbor.mi.us (Jon Zeeff) (11/12/89)
Here is the latest dbz.  It's faster and produces smaller files than dbm.
The INCORE speedups by Mark Moraes seem to make a big difference for
expire.  C News users need this version to handle the rfc822ized message
ids.  If you don't mind waiting, I expect that Henry Spencer will release
a version with additional improvements.  

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  dbz.c dbz.h
# Wrapped by root@b-tech on Sat Nov 11 20:22:21 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f dbz.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"dbz.c\"
else
echo shar: Extracting \"dbz.c\" \(10694 characters\)
sed "s/^X//" >dbz.c <<'END_OF_dbz.c'
X/*
X
Xdbz.c  V1.9
X
XCopyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
XYou can use this code in any manner, as long as you leave my name on it
Xand don't hold me responsible for any problems with it.
X
XHacked on by gdb@ninja.UUCP (David Butler); Sun Jun  5 00:27:08 CDT 1988
X
XVarious improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
X
XThese routines replace dbm as used by the usenet news software
X(it's not a full dbm replacement by any means).  It's fast and
Xsimple.  It contains no AT&T code.
X
XBSD sites will notice some savings in disk space.  Sys V sites without 
Xdbm will notice much faster operation.  Note that I find that dbz is 
X2-18 times faster than dbm.  
X
XNote: if you change INDEX_SIZE, you have to run mkdbm to udate the index
Xfile.
X
XThis code relies on the fact that news stores a pointer to the history 
Xfile as the dbm data.  It doesn't store another copy of the key like 
Xdbm does so it saves disk space.  Make sure that the data file exists 
Xwith the proper name.  All you can do is fetch() and store() data.  
X
XJust make news with the DBM option and link with dbz.o.
X
X*/
X
X/*
X *  Set this to the something several times larger than the maximum # of
X *  lines in a history file.  It should be a prime number. 
X */
X#define INDEX_SIZE  120011 /* Use 300007L unless you need to save space */
X
X#define SYSV			/* just to keep lint happy */
X/* #define BNEWS		/* B news needs lowercase */
X#define CNEWS			/* C News needs rfc822izing */
X
X#define BLKSIZE	1023
X
X/* 
X Define INCORE if you want to hold the index file entirely in 
X memory.  It should speed things up considerably if you are doing many 
X db operations per invocation (eg, news expire - yes, unbatching - no) 
X and shouldn't cost much for VM machines.  Note that this will work 
X even if there isn't enough memory to hold the index file - it'll just 
X revert to the slow old scheme, so it doesn't hurt to define it.  Do 
X not use this on 16 bit int machines.   Make sure that dbmclose()
X gets called.
X*/ 
X
X/* #define INCORE   /* do it on the command line, not here */
X
X/* 3rd argument to read() is unsigned in SYSV */
X#ifdef SYSV
X# define io_int	unsigned
X#else
X# define io_int int
X#endif
X
X#ifndef NULL
X#define NULL 0
X#endif
X
X#define DEBUG if (0) (void) printf	          /* for no debugging */
X/* #define DEBUG if (1) (void) printf		  /* for debugging */
X/* #define DEBUG if (debug) (void) printf        /* for "rn" type debugging */
X
X/* Note: let the optimizer remove any if(0) or if(1) code above */
X
X#include <fcntl.h>
X#include <string.h>
X#include <ctype.h>
X
Xextern long lseek();
Xextern char *malloc();
Xextern void free();
X
Xstatic long get_ptr();
Xstatic void lcase();
Xstatic int put_ptr();
Xstatic long hash();
Xstatic void crcinit();
X
X#include "dbz.h"
X
Xstatic int Data_fd;		/* points to /usr/lib/news/[n]history */
Xstatic int Index_fd = -1;	/* points to /usr/lib/news/[n]history.pag */
X#ifdef INCORE
Xstatic long *core_index = (long *) 0;
X#endif
X
Xint 
Xdbminit(name)
Xchar *name;
X{
X	char *index_file;	/* index file name */
X#ifdef INCORE
X        io_int nbytes;
X#endif
X
X	if (Index_fd >= 0) {
X		DEBUG("dbminit: dbminit aready called once\n");
X		return(-1);    /* init already called once */
X	}
X
X#define PAGEXT ".pag"
X	if ((index_file =
X	 malloc((unsigned) (1 + strlen(name) + sizeof(PAGEXT)))) == (char *)0)
X		return -1;
X	
X	(void) strcpy(index_file, name);
X	(void) strcat(index_file, PAGEXT);
X
X	/* if we don't have permission to open the pag file for read/write */
X	/* then we may never attempt a store().  If we do, it will fail */
X
X	if ((Index_fd = open(index_file, O_RDWR)) < 0 &&
X	    (Index_fd = open(index_file, O_RDONLY)) < 0 &&
X	    (Index_fd = open(index_file, O_CREAT | O_RDWR, 0644)) < 0) 
X	{
X		DEBUG("dbminit: Index_file open failed\n");
X		return(-1);
X	}
X
X#ifdef INCORE
X	nbytes = (INDEX_SIZE * sizeof(long));
X	core_index = (long *) malloc((unsigned) nbytes);
X	if (core_index != (long *) NULL) {
X		if( read(Index_fd, (char *) core_index, nbytes)
X		 == -1) {
X			DEBUG("dbminit: read failed\n");
X			return(-1);
X		}
X	}
X#endif /* INCORE */
X
X	if ((Data_fd = open(name, O_RDONLY)) < 0) {
X
X		/* The only time the data file does not exist is when */
X		/* expire is running (as "news") and it is ok to create it */
X		/* If we don't have "permission" the create will fail, too */
X
X		if (close(creat(name, 0644)) < 0 ||
X		    (Data_fd = open(name, O_RDONLY)) < 0) {
X			DEBUG("dbminit: Data_file open failed\n");
X			(void) close(Index_fd);
X			free(index_file);
X			Index_fd = -1;
X			return(-1);
X		}
X	}
X
X	crcinit();	/* initialize the crc table */
X	free(index_file);
X	DEBUG("dbminit: succeeded\n");
X	return(0);
X}
X
Xint
Xdbmclose()
X{
X	int err = 0;
X	
X	if (Index_fd >= 0) {
X#ifdef INCORE
X		if (core_index != (long *) NULL) {
X			int count;
X			io_int nbytes = INDEX_SIZE * sizeof(long);
X			
X			count = write(Index_fd, (char *) core_index, nbytes);
X			if (count != nbytes)
X				err++;
X			(void) free((char *) core_index);
X			core_index = (long *) NULL;
X		}
X#endif
X		err += close(Index_fd);
X		Index_fd = -1;
X		err += close(Data_fd);
X	}
X	DEBUG("dbmclose: %s\n", err == 0 ? "succeeded" : "failed");
X	return (err == 0 ? 0 : -1);
X}
X
X/* get an entry from the database */
X
Xdatum
Xfetch(key)
Xdatum key;
X{
X	register long index_ptr;
X	long index_size = INDEX_SIZE;
X        char buffer[BLKSIZE + 1];
X	static long data_ptr;
X	datum output;
X	int keysize;
X
X	DEBUG("fetch: (%s)\n", key.dptr);
X	output.dptr = (char *)0;
X	output.dsize = 0;
X
X	for (index_ptr = hash(key.dptr, key.dsize);
X	    --index_size && (data_ptr = get_ptr(index_ptr)) >= 0L;
X	    index_ptr = ++index_ptr % INDEX_SIZE) {
X
X		if (lseek(Data_fd, data_ptr, 0) == -1L) {
X			DEBUG("fetch: seek failed\n");
X			return output;
X		}
X		/* Key had better be less than BLKSIZE */
X		keysize = key.dsize;
X		if (keysize >= BLKSIZE) {
X			keysize = BLKSIZE;
X			DEBUG("keysize is %d - truncated to %d\n", key.dsize,
X			 BLKSIZE);
X		}
X		if (read(Data_fd, buffer, (io_int) keysize) != keysize) {
X			DEBUG("fetch: read failed\n");
X			return output;
X		}
X		
X		buffer[keysize] = '\0';
X#ifdef BNEWS
X		/* key should be lcase version of article id and no tab */
X		lcase(buffer, key.dsize);
X		if (buffer[key.dsize - 1] == '\t') 
X			buffer[key.dsize - 1] = '\0';
X#endif
X#ifdef CNEWS
X		if (buffer[keysize - 1] == '\t') 
X		   buffer[keysize - 1] = 0;      /* get rid of the tab */
X		rfc822ize(buffer);
X#endif
X
X		DEBUG("fetch: buffer (%s) looking for (%s) size = %d\n", 
X                       buffer,key.dptr,keysize);
X
X		if (strncmp(key.dptr, buffer, keysize - 1) == 0) {
X			/* we found it */
X			output.dptr = (char *)&data_ptr;
X			output.dsize = sizeof(long);
X			DEBUG("fetch: successful\n");
X			return(output);
X		}
X	}
X
X	/* we didn't find it */
X
X	DEBUG("fetch: failed\n");
X	return(output);
X}
X
X/* add an entry to the database */
Xstore(key, data)
Xdatum key;
Xdatum data;
X{
X	/* lint complains about a possible pointer alignment problem here */
X	/* it is not a problem because dptr is the first element of a */
X	/* structure and should be aligned for anything */
X
X	DEBUG("store: (%s, %ld)\n", key.dptr, *((long *)data.dptr));
X	return(put_ptr(hash(key.dptr, key.dsize), *((long *)data.dptr)));
X}
X
X/* get a data file pointer from the specified location in the index file */
X
Xstatic long
Xget_ptr(index_ptr)
Xlong index_ptr;
X{
X	long data_ptr;
X
X	DEBUG("get_ptr: (%ld)\n", index_ptr);
X
X#ifdef INCORE
X	/* If we have the index file in memory, use it */
X	if (core_index != (long *) NULL) {
X		DEBUG("get_ptr: (incore) succeeded\n");
X		return (core_index[index_ptr] - 1);
X	}
X#endif
X		
X	/* seek to where it should be */
X	if (lseek(Index_fd, (long)(index_ptr * sizeof(long)) ,0) == -1L) {
X		DEBUG("get_ptr: seek failed\n");
X		return -1L;
X	}
X
X	/* read it */
X	if (read(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long) ||
X	    data_ptr == 0L) {
X		DEBUG("get_ptr: failed\n");
X		return(-1L);
X	}
X
X	DEBUG("get_ptr: succeeded\n");
X	return(--data_ptr);	/* remove the offset we added in put_ptr */
X}
X
X/* put a data file pointer into the specified location in the index file */
X/* move down further if slots are full (linear probing) */
X
Xstatic
Xput_ptr(index_ptr, data_ptr)
Xregister long index_ptr;
Xlong data_ptr;
X{
X	long index_size = INDEX_SIZE;
X
X	/* find an empty slot */
X	while (--index_size && get_ptr(index_ptr) >= 0L) {
X		index_ptr = ++index_ptr % INDEX_SIZE;
X	}
X
X	if (index_size == 0L) {
X		DEBUG("put_ptr: hash table overflow - failed\n");
X		return(-1);
X	}
X
X#ifdef INCORE
X	/* If we have the index file in memory, use it */
X	if (core_index != (long *) NULL) {
X		core_index[index_ptr] = data_ptr + 1;
X		DEBUG("put_ptr: (incore) succeeded\n");
X		return 0;
X	}
X#endif
X		
X	/* seek to spot */
X	if (lseek(Index_fd, (long)(index_ptr * sizeof(long)), 0) == -1L) {
X		DEBUG("put_ptr: seek failed\n");
X		return -1;
X	}
X
X	++data_ptr;   /* add one so that we can use 0 as no pointer */
X
X	/* write in data */
X	if (write(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long)) {
X		DEBUG("put_ptr: write failed\n");
X		return(-1);
X	}
X
X	DEBUG("put_ptr: succeeded\n");
X	return(0);
X}
X
X#ifdef BNEWS
X
Xstatic void
Xlcase(s, n)
Xregister char *s;
Xregister int n;
X{
X	for (; n > 0; --n, ++s) {
X		if (isupper(*s)) *s = tolower(*s);
X	}
X}
X
X#endif
X
X/* This is a simplified version of the pathalias hashing function.
X * Thanks to Steve Belovin and Peter Honeyman
X *
X * hash a string into a long int.  31 bit crc (from andrew appel).
X * the crc table is computed at run time by crcinit() -- we could
X * precompute, but it takes 1 clock tick on a 750.
X *
X * This fast table calculation works only if POLY is a prime polynomial
X * in the field of integers modulo 2.  Since the coefficients of a
X * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
X * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
X * 31 down to 25 are zero.  Happily, we have candidates, from
X * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
X *	x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
X *	x^31 + x^3 + x^0
X *
X * We reverse the bits to get:
X *	111101010000000000000000000000001 but drop the last 1
X *         f   5   0   0   0   0   0   0
X *	010010000000000000000000000000001 ditto, for 31-bit crc
X *	   4   8   0   0   0   0   0   0
X */
X
X#define POLY 0x48000000L	/* 31-bit polynomial (avoids sign problems) */
X
Xstatic long CrcTable[128];
X
Xstatic void
Xcrcinit()
X{	register int i, j;
X	register long sum;
X
X	for (i = 0; i < 128; ++i) {
X		sum = 0L;
X		for (j = 7 - 1; j >= 0; --j)
X			if (i & (1 << j))
X				sum ^= POLY >> j;
X		CrcTable[i] = sum;
X	}
X	DEBUG("crcinit: done\n");
X}
X
Xstatic long
Xhash(name, size)
Xregister char *name;
Xregister int size;
X{
X	register long sum = 0L;
X
X	while (size--) {
X		sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
X	}
X	DEBUG("hash: returns (%ld)\n", sum % INDEX_SIZE);
X	return(sum % INDEX_SIZE);
X}
END_OF_dbz.c
if test 10694 -ne `wc -c <dbz.c`; then
    echo shar: \"dbz.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f dbz.h -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"dbz.h\"
else
echo shar: Extracting \"dbz.h\" \(160 characters\)
sed "s/^X//" >dbz.h <<'END_OF_dbz.h'
X/* For Jon Zeeff's dbm-clone for news */
X
Xtypedef struct {
X	char	*dptr;
X	int		dsize;
X} datum;
X
Xextern datum	fetch();
Xextern int	dbminit(), dbmclose(), store();
END_OF_dbz.h
if test 160 -ne `wc -c <dbz.h`; then
    echo shar: \"dbz.h\" unpacked with wrong size!
fi
# end of overwriting check
fi
echo shar: End of shell archive.
exit 0
-- 
Branch Technology  <zeeff@b-tech.ann-arbor.mi.us>