chris@mimsy.umd.edu (Chris Torek) (10/19/90)
Archive-name: hash/18-Oct-90 Original-posting-by: chris@mimsy.umd.edu (Chris Torek) Original-subject: Re: hash function for text in C Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [Reposted from comp.lang.c. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] The following implements expanding chained hash tables for strings and numbers. This is unmodified from a special-purpose program (having to do with reading a large number of files with name=id pairs and doing various operations on them). Chain lengths are not monitored; instead, the table is invariably expanded whenever it becomes 2/3 full. Entries may be neither removed nor modified. An essentially-unrelated function called `string' maintains a string pool such that string(x) == string(y) iff strcmp(x,y)==0 (the program typically stores each name several times, and often needs to test for name equality, so this helps there). : Run this shell script with "sh" not "csh" PATH=/bin:/usr/bin:/usr/ucb:/etc:$PATH export PATH all=false if [ x$1 = x-a ]; then all=true fi echo Extracting hash.h sed 's/^X//' <<'//go.sysin dd *' >hash.h X/* X * $Id: hash.h,v 1.1 90/09/24 23:58:38 chris Exp $ X * X * Hash table entries keep track of name (or id) = value pairs. X * Values are simply pointers. Hash tables themselves are named X * (for debugging). X */ X Xstruct hashtab; X X/* X * The `ins' functions return nonzero if the old value existed, X * without changing the value. X * The iterate functions calls a given function with name,value X * or id,value pairs. X */ Xstruct hashtab *ht_new(); /* given name, create new hash table */ Xchar *ht_nget(); /* given table and name, get value */ Xchar *ht_iget(); /* given table and ID, get value */ Xint ht_nins(); /* given table and name, insert new value */ Xint ht_iins(); /* given table and id, insert new value */ Xvoid ht_niterate(); /* given table and function, iterate */ Xvoid ht_iiterate(); /* given table and function, iterate */ X X/* X * Some things that ought not to be here, but are anyway. X * goodnumber() takes a number of objects and a size and returns X * a new number of objects, such that malloc(goodnumber(n,size)*size) X * calls malloc with a `good' size value (resulting in less wasted X * memory). emalloc is malloc with program-abort on out-of-memory. X * string() makes a `read-only' copy of a string, reusing the previous X * copy if any. X */ Xint goodnumber(); /* given n & sizeof, return new n */ Xchar *emalloc(); /* malloc, exit on error */ Xchar *string(); /* make an `ideal' copy of a string */ //go.sysin dd * if [ `wc -c < hash.h` != 1415 ]; then made=false echo error transmitting hash.h -- echo length should be 1415, not `wc -c < hash.h` else made=true fi if $all; then echo ' Changing owner to bin' chown bin hash.h else echo ' Original owner was bin' fi if $made; then chmod 444 hash.h echo -n ' '; ls -ld hash.h fi echo Extracting hash.c sed 's/^X//' <<'//go.sysin dd *' >hash.c X#ifndef lint Xstatic char rcsid[] = "$Id: hash.c,v 1.2 90/10/02 13:32:23 chris Exp $"; X#endif X X/* X * Hash table routines. X */ X X#include <stdio.h> X#include <stdlib.h> X#include <string.h> X#include "hash.h" X X/* X * Hash table entries keep track of name=value pairs. X * The names may be numeric IDs instead (by having a null name). X */ Xstruct hashent { X struct hashent *h_next;/* next in chain */ X int h_hash; /* hash value or ID */ X char *h_name; /* name (null if from numeric ID) */ X char *h_value; /* value */ X}; X Xstruct hashtab { X int ht_size; /* size (power of 2) */ X int ht_mask; /* == ht_size - 1 */ X int ht_used; /* number of entries used */ X int ht_lim; /* when to expand */ X struct hashent **ht_tab;/* base of table */ X char ht_name[1]; /* table name; actually larger */ X}; X Xextern char *progname; X Xchar * Xemalloc(n) X size_t n; X{ X register char *p = malloc(n); X X if (p == NULL) { X (void) fprintf(stderr, "%s: out of memory\n", progname); X exit(1); X } X return (p); X} X X/* round up to next multiple of y, where y is a power of 2 */ X#define ROUND(x, y) (((x) + (y) - 1) & ~((y) - 1)) X X/* compute a `good' number of objects to allocate via malloc */ Xint Xgoodnumber(n, s) X int n; X size_t s; X{ X X /* 16384 is a guess at a good page size for malloc */ X /* 32 is a guess at malloc's overhead */ X return ((int)((ROUND(n * s + 32, 16384) - 32) / s)); X} X X/* X * Make a new hash table. X */ Xstruct hashtab * Xht_new(name) X char *name; X{ X register struct hashtab *ht; X register struct hashent **h; X register int n; X X ht = (struct hashtab *)emalloc(sizeof *ht + strlen(name)); X ht->ht_tab = h = (struct hashent **)emalloc(128 * sizeof *h); X ht->ht_size = 128; X ht->ht_mask = 127; X for (n = 128; --n >= 0;) X *h++ = NULL; X ht->ht_used = 0; X ht->ht_lim = (128 * 2) / 3; X (void) strcpy(ht->ht_name, name); X return (ht); X} X X/* X * Expand an existing hash table. X */ Xstatic void Xht_expand(ht) X register struct hashtab *ht; X{ X register int n = ht->ht_size * 2, i; X register struct hashent *p, **h, **oldh, *q; X X h = (struct hashent **)emalloc(n * sizeof *h); X for (i = n; --i >= 0;) X *h++ = NULL; X h -= n; X oldh = ht->ht_tab; X n--; X for (i = ht->ht_size; --i >= 0;) { X for (p = *oldh++; p != NULL; p = q) { X q = p->h_next; X p->h_next = h[p->h_hash & n]; X h[p->h_hash & n] = p; X } X } X free((char *)ht->ht_tab); X ht->ht_tab = h; X ht->ht_mask = n; X ht->ht_size = ++n; X ht->ht_lim = (n * 2) / 3; X} X X/* X * Make a new hash entry. Its h_next will be NULL. X */ Xstatic struct hashent * Xnewhashent(hash, name, value) X int hash; X char *name, *value; X{ X static struct hashent *hfree; X register struct hashent *h; X register int n, nalloc; X X if ((h = hfree) != NULL) X hfree = h->h_next; X else { X nalloc = goodnumber(2, sizeof *h); /* need at least 2 */ X hfree = h = (struct hashent *)emalloc(nalloc * sizeof *h) + 1; X for (n = nalloc - 2; --n >= 0; h++) X h->h_next = h + 1; X h->h_next = NULL; X h -= nalloc - 1; X } X h->h_next = NULL; X h->h_hash = hash; X h->h_name = name; X h->h_value = value; X return (h); X} X X#define HASH(str, h, p) \ X for (p = str, h = 0; *p;) h = (h << 5) - h + *p++ X X/* X * Look up a name=value. X */ Xchar * Xht_nget(ht, name) X register struct hashtab *ht; X char *name; X{ X register char *p; X register int hash; X register struct hashent *h; X X HASH(name, hash, p); X p = name; X for (h = ht->ht_tab[hash & ht->ht_mask]; h != NULL; h = h->h_next) X if (h->h_hash == hash && h->h_name != NULL && X strcmp(h->h_name, p) == 0) X return (h->h_value); X return (NULL); X} X X/* X * Look up an ID=value. X */ Xchar * Xht_iget(ht, id) X register struct hashtab *ht; X register int id; X{ X register struct hashent *h; X X for (h = ht->ht_tab[id & ht->ht_mask]; h != NULL; h = h->h_next) X if (h->h_hash == id && h->h_name == NULL) X return (h->h_value); X return (NULL); X} X X/* X * Insert (do not clobber) a name=value. X * Return zero on success. X */ Xint Xht_nins(ht, name, value) X register struct hashtab *ht; X char *name, *value; X{ X register char *p; X register int hash; X register struct hashent *h, **hp; X X HASH(name, hash, p); X p = name; X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL; X hp = &h->h_next) X if (h->h_hash == hash && h->h_name != NULL && X strcmp(h->h_name, p) == 0) X return (-1); X *hp = newhashent(hash, name, value); X if (++ht->ht_used > ht->ht_lim) X ht_expand(ht); X return (0); X} X X/* X * Insert (do not clobber) an ID=value. X * Return zero on success. X */ Xint Xht_iins(ht, id, value) X register struct hashtab *ht; X register int id; X char *value; X{ X register struct hashent *h, **hp; X X for (hp = &ht->ht_tab[id & ht->ht_mask]; (h = *hp) != NULL; X hp = &h->h_next) X if (h->h_hash == id && h->h_name == NULL) X return (-1); X *hp = newhashent(id, (char *)NULL, value); X if (++ht->ht_used > ht->ht_lim) X ht_expand(ht); X return (0); X} X X/* X * Stash a copy of a string away; it will never be freed. X */ Xstatic char * Xpoolstr(s) X char *s; X{ X register char *p; X register size_t l = strlen(s) + 1; X static char *poolp; X static size_t nleft; X X if (nleft < l) X poolp = emalloc(nleft = goodnumber(l, 1)); X bcopy(s, p = poolp, l); X poolp += l; X return (p); X} X X/* X * Generate a single unique copy of the given string. X */ Xchar * Xstring(s) X char *s; X{ X register char *p; X register int hash; X register struct hashent *h, **hp; X static struct hashtab *ht; X X if (ht == NULL) X ht = ht_new("strings"); X HASH(s, hash, p); X p = s; X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL; X hp = &h->h_next) X if (h->h_hash == hash && strcmp(h->h_name, p) == 0) X return (h->h_name); X *hp = h = newhashent(hash, poolstr(s), (char *)NULL); X if (++ht->ht_used > ht->ht_lim) X ht_expand(ht); X return (h->h_name); X} X X/* X * Call fn on all the name=value pairs. X */ Xvoid Xht_niterate(ht, fn) X struct hashtab *ht; X register void (*fn)(); X{ X register struct hashent *h, **hp; X register int n; X X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;) X for (h = *hp++; h != NULL; h = h->h_next) X if (h->h_name != NULL) X (*fn)(h->h_name, h->h_value); X} X X/* X * Call fn on all the id=value pairs. X */ Xvoid Xht_iiterate(ht, fn) X struct hashtab *ht; X register void (*fn)(); X{ X register struct hashent *h, **hp; X register int n; X X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;) X for (h = *hp++; h != NULL; h = h->h_next) X if (h->h_name == NULL) X (*fn)(h->h_hash, h->h_value); X} //go.sysin dd * if [ `wc -c < hash.c` != 6291 ]; then made=false echo error transmitting hash.c -- echo length should be 6291, not `wc -c < hash.c` else made=true fi if $all; then echo ' Changing owner to bin' chown bin hash.c else echo ' Original owner was bin' fi if $made; then chmod 444 hash.c echo -n ' '; ls -ld hash.c fi -- In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 405 2750) Domain: chris@cs.umd.edu Path: uunet!mimsy!chris