[alt.sources] Problems and fixes for dbz.c

gdb@ninja.UUCP (David Butler) (05/31/88)

I took this code off alt.sources and found a couple of problems.  I have
sent the changes to the author but have not received any response.
               (Thank you Jon Zeeff for 99% of the work!!)

I feel the problems are severe enough to post what I have at this time.
If you are already running a early version of this code.  You will have to
rebuild your history.pag file after you add this.  Run "expire -R".

  The biggest problem that I found was the fetch of the stored data never
  gave you the correct offset in the history file!  This allowed duplicate
  articles to be received and the system would not honor sendme messages.

  Fetches that clash at the end of the hash table would not wrap around.

  Stores to a full hash table would corrupt an entry before failing.

  The code required all of the news programs to be setuid to the owner of
  the history.pag file or it had to be writable by all.  It now only
  opens the file read/write if a store is attempted.

I have cleaned up the code (my opinion :->) and fixed the above problems.
I also added DEBUG code for both news and rn styles.

For news (I'm running 2.11 p14) there is a skeleton localize.sh script
for SysV systems that makes the changes to add dbz.

For rn the changes are to run ./Configure.  Answer "n"o to run makedepend.
Edit "Makefile" and change the line:

obj4 = respond.o rn.o search.o sw.o term.o util.o

to:

obj4 = respond.o rn.o search.o sw.o term.o util.o dbz.o

Also edit bits.c and change this:

#ifdef DBM
#	include <dbm.h>
#endif DBM

to:

#define DBM
#ifdef DBM
#	include "dbz.h"
#endif DBM

Then do a "makedepend" and "make".

Good Luck

David Butler
gdb@ninja.UUCP
============================ Cut here and feed to /bin/sh ====================
: Run this shell script with "sh" not "csh"
# contents:
# news/localize.sv
# news/dbz.c
# rn/dbz.h
# rn/dbz.c
PATH=/bin:/usr/bin:
export PATH
all=FALSE
if [ $1x = -ax ]; then
	all=TRUE
fi
echo 'Extracting news/localize.sv'
sed 's/^X//' <<'//go.sysin dd *' >news/localize.sv
rm -f Makefile
cp Makefile.dst Makefile
chmod u+w Makefile
ed - Makefile  <<'EOF'
g/^#USG /s///
g/^#V7 /d
g/^#VMS /d
g/^#BSD4_[123] /d
g/#NOTVMS/s/#NOTVMS.*//
g/^#NNTP /d
g/^UUXFLAGS/s/-z/-gR/
g/^CFLAGS/s/-DUSG/-DUSG -DDBM/
g/^IOBJECTS/s/=/= dbz.o/
g/^ISRCS/s/=/= dbz.c/
g/^ROBJECTS/s/=/= dbz.o/
g/^RSRCS/s/=/= dbz.c/
g/^VOBJECTS/s/=/= dbz.o/
g/^VSRCS/s/=/= dbz.c/
g/^EXPOBJS/s/=/= dbz.o/
g/^ESRCS/s/=/= dbz.c/
w
q
EOF

echo "\n\c" >>Makefile
echo "dbz.o: dbz.c" >>Makefile
echo "\t$(CC) $(CFLAGS) -c dbz.c" >>Makefile

rm -f defs.h
cp defs.dist defs.h
chmod u+w defs.h
ed - defs.h <<'EOF'
X/#define TMAIL/s;ucb/Mail;bin/mailx;
X/#define UNAME/s;^/\* ;;
X/#define DOXREFS/s;^/\* ;;
w
q
EOF
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	chmod 640 news/localize.sv
	echo -n '	'; ls -ld news/localize.sv
fi
echo 'Extracting news/dbz.c'
sed 's/^X//' <<'//go.sysin dd *' >news/dbz.c
X/*
dbz.c  V1.3

Copyright 1988 Jon Zeeff (umix!b-tech!zeeff)
You can use this code in any manner, as long as you leave my name on it
and don't hold me responsible for any problems with it.

For news:
Hacked by gdb@ninja.UUCP (David Butler); Sun May 29 11:19:06 CDT 1988

These routines replace dbm as used by the usenet news software
(it's not a full dbm replacement by any means).  It's fast and
simple.

BSD sites will notice some savings in disk space.  Sys V sites without
dbm will notice much faster operation.

Note: .pag files created by version 1.0 need to be recreated before
      using this version.

This code relies on the fact that news stores a pointer to the history
file as the dbm data.  It doesn't store another copy of the key like
dbm does so it saves disk space.  All you can do is fetch() and
store() data.

Just make news with the dbm option and link with dbz.o.
*/

X/*
   Set this to the something several times larger than the maximum # of
   lines in a history file.  It should be a prime number.
*/
#define INDEX_SIZE 99991L

X/*#include <sys/types.h>*/
X/*#include <sys/stat.h>*/
#include <fcntl.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>

long lseek();

typedef struct {
	char *dptr;
	int dsize;
} datum;

static char Buffer[1024];	/* used for fetch returns */
static int Data_fd;
static char Index_file[512];	/* index file name */
static int Index_fd = -1;
static int Index_for_write = 0;	/* changed when the file is open for write */

dbminit(name)
char *name;
{
	void crcinit();

	if (Index_fd >= 0) {
#ifdef DEBUG
		printf("dbminit: Index_file already open\n");
#endif
		return(-1);    /* init already called once */
	}

	Data_fd = open(name, O_RDONLY);
	if (Data_fd < 0) {
#ifdef DEBUG
		printf("dbminit: Data_file open failed\n");
#endif
		return(-1);
	}

	strcpy(Index_file, name);
	strcat(Index_file, ".pag");
	Index_fd = open(Index_file, O_RDONLY);
	if (Index_fd < 0) {
#ifdef DEBUG
		printf("dbminit: Index_file open failed\n");
#endif
		return(-1);
	}

	crcinit();	/* initialize the crc table */
#ifdef DEBUG
	printf("dbminit: succeeded\n");
#endif
	return(0);
}

dbmclose()
{
	if (Index_fd >= 0) {
		close(Index_fd);
		Index_fd = -1;
		close(Data_fd);
	}
#ifdef DEBUG
	printf("dbmclose: succeeded\n");
#endif
	return(0);
}

X/* get an entry from the database */
datum
fetch(key)
datum key;
{
	long index_ptr;
	long index_size = INDEX_SIZE;
	long data_ptr;
	datum output;
	long hash();
	long get_ptr();
	void lcase();

#ifdef DEBUG
	printf("fetch: (%s)\n", key.dptr);
#endif
	for (index_ptr = hash(key.dptr, key.dsize);
	    index_size-- && (data_ptr = get_ptr(index_ptr)) > 0L;
	    index_ptr = ++index_ptr % INDEX_SIZE) {

		lseek(Data_fd, data_ptr, 0);
		read(Data_fd, Buffer, (unsigned)key.dsize);

		/* key should be lcase version of article id and no tab */

		(void)lcase(Buffer, key.dsize);
		if (Buffer[key.dsize - 1] == '\t') {
			Buffer[key.dsize - 1] = '\0';
		}
#ifdef DEBUG
		printf("fetch: Buffer (%s)\n", Buffer);
#endif
		if (memcmp(key.dptr, Buffer, (int)key.dsize) == 0) {
			/* we found it */
			memcpy(Buffer, (char *)&data_ptr, sizeof(long));
			output.dptr = Buffer;
			output.dsize = sizeof(long);
#ifdef DEBUG
			printf("fetch: successful\n");
#endif
			return(output);
		}
	}

	/* we didn't find it */

	output.dptr = (char *)0;
	output.dsize = 0L;
#ifdef DEBUG
	printf("fetch: failed\n");
#endif
	return(output);
}

X/* add an entry to the database */
store(key, data)
datum key;
datum data;
{
#ifdef DEBUG
	printf("store: (%s, %ld)\n", key.dptr, *((long *)data.dptr));
#endif
	return(put_ptr(hash(key.dptr, key.dsize), *((long *)data.dptr)));
}

X/* get a data file pointer from the specified location in the index file */
static long
get_ptr(index_ptr)
long index_ptr;
{
	long data_ptr;

#ifdef DEBUG
	printf("get_ptr: (%ld)\n", index_ptr);
#endif
	/* seek to where it should be */
	lseek(Index_fd, (long)(index_ptr * sizeof(long)) ,0);

	/* read it */
	if (read(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long) ||
	    data_ptr == 0L) {
#ifdef DEBUG
		printf("get_ptr: failed\n");
#endif
		return(-1L);
	}
#ifdef DEBUG
	printf("get_ptr: succeeded\n");
#endif
	return(--data_ptr);
}

X/* put a data file pointer into the specified location in the index file */
X/* move down further if slots are full */
static
put_ptr(index_ptr, data_ptr)
long index_ptr;
long data_ptr;
{
	long get_ptr();
	long index_size = INDEX_SIZE;
	int fd;

	/* open file for write */
	if (Index_for_write == 0) {
		if ((fd = open(Index_file, O_RDWR)) < 0) {
#ifdef DEBUG
			printf("put_ptr: can't open Index_file for write\n");
#endif
			return(-1);
		}
		close(Index_fd);
		Index_fd = fd;
		++Index_for_write;
	}

	/* find an empty slot */
	while (index_size-- && get_ptr(index_ptr) > 0L) {
		index_ptr = ++index_ptr % INDEX_SIZE;
	}

	if (index_size == 0L) {
#ifdef DEBUG
		printf("put_ptr: hash table overflow - failed\n");
#endif
		return(-1);
	}

	/* seek to spot */
	lseek(Index_fd, (long)(index_ptr * sizeof(long)), 0);

	++data_ptr;

	/* write in data */
	if (write(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long)) {
#ifdef DEBUG
		printf("put_ptr: write failed\n");
#endif
		return(-1);
	}

#ifdef DEBUG
	printf("put_ptr: succeeded\n");
#endif
	return(0);
}

static void
lcase(s, n)
register char *s;
register int n;
{
	for (; n > 0L; --n, ++s)
		if (isupper(*s))
			*s = _tolower(*s);
}

X/* This is a simplified version of the pathalias hashing function.
 * Thanks to Steve Belovin and Peter Honeyman
 *
 * hash a string into a long int.  31 bit crc (from andrew appel).
 * the crc table is computed at run time by crcinit() -- we could
 * precompute, but it takes 1 clock tick on a 750.
 *
 * This fast table calculation works only if POLY is a prime polynomial
 * in the field of integers modulo 2.  Since the coefficients of a
 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
 * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
 * 31 down to 25 are zero.  Happily, we have candidates, from
 * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
 *	x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
 *	x^31 + x^3 + x^0
 *
 * We reverse the bits to get:
 *	111101010000000000000000000000001 but drop the last 1
 *         f   5   0   0   0   0   0   0
 *	010010000000000000000000000000001 ditto, for 31-bit crc
 *	   4   8   0   0   0   0   0   0
 */

X/*#define POLY32 0xf5000000	/* 32-bit polynomial */
X/*#define POLY31 0x48000000	/* 31-bit polynomial */
X/*#define POLY POLY31		/* use 31-bit to avoid sign problems */

#define POLY 0x48000000		/* 31-bit polynomial */

static long CrcTable[128];

static void
crcinit()
{	register int i, j;
	register long sum;

	for (i = 0; i < 128; ++i) {
		sum = 0;
		for (j = 7 - 1; j >= 0; --j)
			if (i & (1 << j))
				sum ^= POLY >> j;
		CrcTable[i] = sum;
	}
#ifdef DEBUG
	printf("crcinit: done\n");
#endif
}

static long
hash(name, size)
register char *name;
register int size;
{
	register long sum = 0;

	while (size--) {
		sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
	}
#ifdef DEBUG
	printf("hash: returns (%ld)\n", (long)(sum % INDEX_SIZE));
#endif
	return((long)(sum % INDEX_SIZE));
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	chmod 640 news/dbz.c
	echo -n '	'; ls -ld news/dbz.c
fi
echo 'Extracting rn/dbz.h'
sed 's/^X//' <<'//go.sysin dd *' >rn/dbz.h
typedef struct {
   char *dptr;
   int dsize;
} datum;
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	chmod 640 rn/dbz.h
	echo -n '	'; ls -ld rn/dbz.h
fi
echo 'Extracting rn/dbz.c'
sed 's/^X//' <<'//go.sysin dd *' >rn/dbz.c
X/*
dbz.c  V1.3

Copyright 1988 Jon Zeeff (umix!b-tech!zeeff)
You can use this code in any manner, as long as you leave my name on it
and don't hold me responsible for any problems with it.

For rn:
Hacked by gdb@ninja.UUCP (David Butler); Sun May 29 11:34:20 CDT 1988

These routines replace dbm as used by the usenet news software
(it's not a full dbm replacement by any means).  It's fast and
simple.

BSD sites will notice some savings in disk space.  Sys V sites without
dbm will notice much faster operation.

Note: .pag files created by version 1.0 need to be recreated before
      using this version.

This code relies on the fact that news stores a pointer to the history
file as the dbm data.  It doesn't store another copy of the key like
dbm does so it saves disk space.  All you can do is fetch() and
store() data.

Just make news with the dbm option and link with dbz.o.
*/

X/*
   Set this to the something several times larger than the maximum # of
   lines in a history file.  It should be a prime number.
*/
#define INDEX_SIZE 99991L

#include "EXTERN.h"
#include "common.h"

long lseek();

#include "dbz.h"

static char Buffer[1024];   /* used for fetch returns */
static int Data_fd;
static char Index_file[512];	/* index file name */
static int Index_fd = -1;
static int Index_for_write = 0;	/* changed when the file is open for write */

dbminit(name)
char *name;
{
	void crcinit();

#ifdef DEBUGGING
	if (debug) {
		printf("dbminit: called with (%s)\n", name);
	}
#endif

	if (Index_fd >= 0) {
#ifdef DEBUGGING
		if (debug) {
			printf("dbminit: Index_file already open\n");
		}
#endif
		return(-1);    /* init already called once */
	}

	Data_fd = open(name, O_RDONLY);
	if (Data_fd < 0) {
#ifdef DEBUGGING
		if (debug) {
			printf("dbminit: Data_file open failed\n");
		}
#endif
		return(-1);
	}

	strcpy(Index_file, name);
	strcat(Index_file, ".pag");
	Index_fd = open(Index_file, O_RDONLY);

	if (Index_fd < 0) {
#ifdef DEBUGGING
		if (debug) {
			printf("dbminit: Index_file open failed\n");
		}
#endif
		return(-1);
	}

	crcinit();
#ifdef DEBUGGING
	if (debug) {
		printf("dbminit: succeeded\n");
	}
#endif
	return(0);
}

dbmclose()
{
	if (Index_fd >= 0) {
		close(Index_fd);
		Index_fd = -1;
		close(Data_fd);
	}
#ifdef DEBUGGING
	if (debug) {
		printf("dbmclose: succeeded\n");
	}
#endif
	return(0);
}

X/* get an entry from the database */
datum
fetch(key)
datum key;
{
	long index_ptr;
	long index_size = INDEX_SIZE;
	long data_ptr;
	datum output;
	long hash();
	long get_ptr();
	void lcase();

#ifdef DEBUGGING
	if (debug) {
		printf("fetch: (%s)\n", key.dptr);
	}
#endif
	for (index_ptr = hash(key.dptr, key.dsize);
	    index_size-- && (data_ptr = get_ptr(index_ptr)) > 0L;
	    index_ptr = ++index_ptr % INDEX_SIZE) {

		lseek(Data_fd, data_ptr, 0);
		read(Data_fd, Buffer, (unsigned)key.dsize);

		/* key should be lcase version of article id and no tab */

		(void)lcase(Buffer, key.dsize);
		if (Buffer[key.dsize - 1] == '\t') {
			Buffer[key.dsize - 1] = '\0';
		}
#ifdef DEBUGGING
		if (debug) {
			printf("fetch: Buffer (%s)\n", Buffer);
		}
#endif

		if (memcmp(key.dptr, Buffer, (int)key.dsize) == 0) {
			/* we found it */
			memcpy(Buffer, (char *)&data_ptr, sizeof(long));
			output.dptr = Buffer;
			output.dsize = sizeof(long);
#ifdef DEBUGGING
			if (debug) {
				printf("fetch: successful\n");
			}
#endif
			return(output);
		}
	}

	/* we didn't find it */

	output.dptr = (char *)0;
	output.dsize = 0L;
#ifdef DEBUGGING
	if (debug) {
		printf("fetch: failed\n");
	}
#endif
	return(output);
}

X/* add an entry to the database */
store(key, data)
datum key;
datum data;
{
#ifdef DEBUGGING
	if (debug) {
		printf("store: (%s, %ld)\n", key.dptr, *((long *)data.dptr));
	}
#endif
	return(put_ptr(hash(key.dptr, key.dsize), *((long *)data.dptr)));
}

X/* get a data file pointer from the specified location in the index file */
static long
get_ptr(index_ptr)
long index_ptr;
{
	long data_ptr;

#ifdef DEBUGGING
	if (debug) {
		printf("get_ptr: (%ld)\n", index_ptr);
	}
#endif
	/* seek to where it should be */
	lseek(Index_fd, (long)(index_ptr * sizeof(long)) ,0);

	/* read it */
	if (read(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long) ||
	    data_ptr == 0L) {
#ifdef DEBUGGING
		if (debug) {
			printf("get_ptr: failed\n");
		}
#endif
		return(-1L);
	}

#ifdef DEBUGGING
	if (debug) {
		printf("get_ptr: succeeded\n");
	}
#endif
	return(--data_ptr);
}

X/* put a data file pointer into the specified location in the index file */
X/* move down further if slots are full */
static
put_ptr(index_ptr, data_ptr)
long index_ptr;
long data_ptr;
{
	long get_ptr();
	long index_size = INDEX_SIZE;
	int fd;

	/* open file for write */
	if (Index_for_write == 0) {
		if ((fd = open(Index_file, O_RDWR)) < 0) {
#ifdef DEBUGGING
			if (debug) {
				printf("put_ptr: can't open Index_file for write\n");
			}
#endif
			return(-1);
		}
		close(Index_fd);
		Index_fd = fd;
		++Index_for_write;
	}

	/* find an empty slot */
	while (index_size-- && get_ptr(index_ptr) > 0L) {
		index_ptr = ++index_ptr % INDEX_SIZE;
	}

	if (index_size == 0L) {
#ifdef DEBUGGING
		if (debug) {
			printf("put_ptr: hash table overflow - failed\n");
		}
#endif
		return(-1);
	}

	/* seek to spot */
	lseek(Index_fd, (long)(index_ptr * sizeof(long)), 0);

	++data_ptr;

	/* write in data */
	if (write(Index_fd, (char *)&data_ptr, sizeof(long)) != sizeof(long)) {
#ifdef DEBUGGING
		if (debug) {
			printf("put_ptr: write failed\n");
		}
#endif
		return(-1);
	}

#ifdef DEBUGGING
	if (debug) {
		printf("put_ptr: succeeded\n");
	}
#endif
	return(0);
}

static void
lcase(s, n)
register char *s;
register int n;
{
	for (; n > 0L; --n, ++s)
		if (isupper(*s))
			*s = _tolower(*s);
}

X/* This is a simplified version of the pathalias hashing function.
 * Thanks to Steve Belovin and Peter Honeyman
 *
 * hash a string into a long int.  31 bit crc (from andrew appel).
 * the crc table is computed at run time by crcinit() -- we could
 * precompute, but it takes 1 clock tick on a 750.
 *
 * This fast table calculation works only if POLY is a prime polynomial
 * in the field of integers modulo 2.  Since the coefficients of a
 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
 * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
 * 31 down to 25 are zero.  Happily, we have candidates, from
 * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
 *	x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
 *	x^31 + x^3 + x^0
 *
 * We reverse the bits to get:
 *	111101010000000000000000000000001 but drop the last 1
 *         f   5   0   0   0   0   0   0
 *	010010000000000000000000000000001 ditto, for 31-bit crc
 *	   4   8   0   0   0   0   0   0
 */

X/*#define POLY32 0xf5000000	/* 32-bit polynomial */
X/*#define POLY31 0x48000000	/* 31-bit polynomial */
X/*#define POLY POLY31		/* use 31-bit to avoid sign problems */

#define POLY 0x48000000		/* 31-bit polynomial */

static long CrcTable[128];

static void
crcinit()
{	register int i, j;
	register long sum;

	for (i = 0; i < 128; ++i) {
		sum = 0;
		for (j = 7 - 1; j >= 0; --j)
			if (i & (1 << j))
				sum ^= POLY >> j;
		CrcTable[i] = sum;
	}
#ifdef DEBUGGING
	if (debug) {
		printf("crcinit: done\n");
	}
#endif
}

static long
hash(name, size)
register char *name;
register int size;
{
	register long sum = 0;

	while (size--) {
		sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
	}

#ifdef DEBUGGING
	if (debug) {
		printf("hash: returns (%ld)\n", (long)(sum % INDEX_SIZE));
	}
#endif
	return((long)(sum % INDEX_SIZE));
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	chmod 640 rn/dbz.c
	echo -n '	'; ls -ld rn/dbz.c
fi
exit 0

gdb@ninja.UUCP (David Butler) (06/01/88)

Expire always returns a store fail.  Please add the indicated line to expire.c:

#ifdef DBM
	if (!dorebuild) {
		(void) sprintf(PAGFILE, "%s/%s", LIB, "nhistory.pag");
		(void) sprintf(DIRFILE, "%s/%s", LIB, "nhistory.dir");
		(void) close(creat(PAGFILE, 0666));
		(void) close(creat(DIRFILE, 0666));
!		(void) close(creat(NARTFILE, 0666)); /* for dbz */
		initdbm(NARTFILE);
	}
#endif