[alt.sources] Replacement for dbm

zeeff@b-tech.UUCP (Jon Zeeff) (05/16/88)

This is an updated copy of this code with some important bug fixes.  Thanks
to the people who sent me mail about them.

Hopefully, this code will allow the removal of lots of #ifdef DBM 
nonsense from the news code.  

/*

dbz.c  V1.3 

Copyright 1988 Jon Zeeff (umix!b-tech!zeeff)  
You can use this code in any manner, as long as you leave my name on it
and don't hold me responsible for any problems with it.

These routines replace dbm as used by the usenet news software 
(it's not a full dbm replacement by any means).  It's fast and 
simple.  

BSD sites will notice some savings in disk space.  Sys V sites without 
dbm will notice much faster operation.  

Note: .pag files created by version 1.0 need to be recreated before
      using this version.

This code relies on the fact that news stores a pointer to the history 
file as the dbm data.  It doesn't store another copy of the key like 
dbm does so it saves disk space.  All you can do is fetch() and 
store() data.  

Just make news with the dbm option and link with dbz.o.

*/

/* 
   Set this to the something several times larger than the maximum # of 
   lines in a history file.  It should be a prime number.
*/

#define INDEX_SIZE 99991 

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <ctype.h>

long lseek();

typedef struct {
   char *dptr;
   int dsize;
} datum;

static char buffer[1024];   /* used for fetch returns */
static char lcbuffer[1024];   /* used for lower case comparisons */
static int data_file;
static FILE *index_file=NULL;

dbminit(name)
char *name;
{
char string[1024];
FILE *fopen();

if (index_file != NULL) return -1;    /* init already called once */

data_file = open(name,O_RDWR);

strcpy(string,name);
strcat(string,".pag");
index_file = fopen(string,"r+");

if (index_file == NULL) return -1;

return 0;

}

dbmclose()
{
if (index_file) {
   fclose(index_file);
   index_file = NULL;
   close(data_file);
}
return 0;
}


datum
fetch(key)
datum key;
{                                   
long index_ptr;
long data_ptr;
datum output;
char *strchr();
long hash();
long get_ptr();
                        
for (index_ptr = hash(key.dptr,key.dsize);(data_ptr = get_ptr(index_ptr)) != -1;index_ptr= ++index_ptr % INDEX_SIZE){

    /* we got a pointer into the history file, go see if it's the right one */

    lseek(data_file,data_ptr,0);
    read(data_file,buffer,(unsigned)key.dsize);

    /* we could change the tab to a null but */
    /* we can get away without the length info since we know that a 
       lh portion of one key != any full key */
	
    /* Have to use lcase, since key is lcase version of article id */

    strncpy(lcbuffer, buffer, key.dsize-1);
    lcbuffer[key.dsize-1] = '\0';
    lcase(lcbuffer);

    if (strcmp(key.dptr, lcbuffer) == 0) {
       /* we found it */
       output.dptr = buffer;
       /* output.dsize = strchr(buffer,'\n')-buffer + 1; not used by news */
       return output;
    } 
}

/* we didn't find it */

output.dptr = NULL;
output.dsize = 0;
return output;
 
}


/* add an entry to the database */

store(key,data)
datum key;
datum data;
{   

return put_ptr(hash(key.dptr,key.dsize),*((long *)data.dptr));

}             

/* get a data file pointer from the specified location in the index file */

static long
get_ptr(index_ptr)
long index_ptr;
{
long data_ptr=0;
int count;

/* seek to where it should be */
fseek(index_file,(long)(index_ptr * sizeof(long)),0);

/* read it */
count = fread((char *)&data_ptr,sizeof(long),1,index_file);

if (count != 1 || data_ptr == 0) return -1; 

data_ptr -= sizeof(long);

return data_ptr;
}

/* put a data file pointer into the specified location in the index file */
/* move down further if slots are full */

static 
put_ptr(index_ptr,data_ptr)
long index_ptr;
long data_ptr;
{
long i = INDEX_SIZE;

/* find an empty slot */
while (i-- && get_ptr(index_ptr) != -1) index_ptr = ++index_ptr % INDEX_SIZE;

/* seek to spot */
fseek(index_file,(long)(index_ptr * sizeof(long)),0);

/* write in data */
data_ptr += sizeof(long);
fwrite((char *)&data_ptr,sizeof(long),1,index_file);

if (i > 0) return 0;
return -1;

}

/* 
   A hash function 
*/

/* This is a simplified version of the pathalias hashing function.  */
/* Thanks to Steve Belovin and Peter Honeyman */

/*
 * fold a string into a long int.  31 bit crc (from andrew appel).
 * the crc table is computed at run time by crcinit() -- we could
 * precompute, but it takes 1 clock tick on a 750.
 *
 * This fast table calculation works only if POLY is a prime polynomial
 * in the field of integers modulo 2.  Since the coefficients of a
 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
 * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
 * 31 down to 25 are zero.  Happily, we have candidates, from
 * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
 *	x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
 *	x^31 + x^3 + x^0
 *
 * We reverse the bits to get:
 *	111101010000000000000000000000001 but drop the last 1
 *         f   5   0   0   0   0   0   0
 *	010010000000000000000000000000001 ditto, for 31-bit crc
 *	   4   8   0   0   0   0   0   0
 */

#define POLY32 0xf5000000	/* 32-bit polynomial */
#define POLY31 0x48000000	/* 31-bit polynomial */
#define POLY POLY31	/* use 31-bit to avoid sign problems */

static long CrcTable[128];

static crcinit()
{	register int i,j;
	register long sum;

	for (i = 0; i < 128; i++) {
		sum = 0;
		for (j = 7-1; j >= 0; --j)
			if (i & (1 << j))
				sum ^= POLY >> j;
		CrcTable[i] = sum;
	}
}

static long
fold(s,size)
	register char *s;
	int size;
{	register long sum = 0;

	while (size--) {
		sum = (sum >> 7) ^ CrcTable[(sum ^ (*s++)) & 0x7f];
	}

	return sum;
}

#define HASH1(n) ((n) % INDEX_SIZE);

static long
hash(name,size)
	char *name;
	int  size;
{
	register long probe;

	static int init = 1;

	if (init) {
           crcinit();
	   init = 0;
	}
        
	probe = fold(name,size);
	probe = HASH1(probe);

	return probe;
}


static int 
lcase(s)
register char *s;
{
	register char *ptr;

	for (ptr = s; *ptr; ptr++)
		if (isupper(*ptr))
			*ptr = tolower(*ptr);
}

-- 
Jon Zeeff           		Branch Technology,
uunet!umix!b-tech!zeeff  	zeeff%b-tech.uucp@umix.cc.umich.edu