[net.lang.c] Adding integral bytes to foo pointers

chris@umcp-cs.UUCP (Chris Torek) (09/08/85)

>I have an application where I want to be able to save masses of objects
>most with lots of pointers in them to other objects, and then use this
>result to initialize the program the next time it runs. . . .

Here is a fairly hacky way we did this for Franz Lisp under 4.1BSD.
It has a couple of nonportable things in it (``but *I* didn't write
them,'' he protests): in particular, readint() is wrong; it assumes
32 bit integers; and it uses a free()d value in HashFree().

However, it does handle circular data structures and show how to
dump pointers to objects, then restore them.

Credit department: this idea was originated by Rehmi Post; the code
was rewritten by Craig Stanfill, Randy Trigg, and myself.

-------------------------------------------------------------------
/*
 * This file contains C code for the lisp structure dumper package. The main
 * lisp-callable functions are sdump and sscoop.  The format is 
 *	(sdump <lispobj> <filename>)
 *	(sscoop <filename>) 
 *
 * Sdump takes a pointer to an (almost) arbitrary data structure in lisp and
 * dumps the contents in binary to the file.  It handles cons nodes, atoms
 * (including value, pname, and plist), hunks, integers, and strings. It DOES
 * watch out for cycles (using a hash table of pointers) and so will preserve
 * any in the structure.  Sscoop returns the pointer that was originally
 * dumped.
 *
 * One weird feature:  if an atom is actually a flavor then its property list
 * is NOT followed.  The check for flavor works by checking the plist of the
 * atom for a property 'type' with value 'flavor'. 
 */

#include <sys/types.h>
#include <stdio.h>
#include "global.h"

/*
 * Give lisp the following to start 'er up:
 *	(cfasl 'strc.o '_init_strc 'init-strc "function")
 *	(init-strc) 
 */

extern lispval matom(), inewint(), mstr(), newdot(), newhunk();

static int MaxHash;
static FILE *dumpfile;

typedef struct Bucket {
	struct Bucket *next;
	lispval lval;
	int ival;
} bucket;

#define HashLog		9
#define HashMask	((1<<HashLog)-1)
#define HashTabSize	(1<<HashLog)
#define HashFunc(x)	(((x)>>4)&HashMask)
#define NODUMP		99
#define FLAVOR		OTHER
#define NOTSEEN		(-100)
#define readbyt()	(getc(dumpfile))
#define printbyt(b)	(putc(b,dumpfile))
#define printint(i)	(putc(i,dumpfile),putc((i)>>8,dumpfile),\
			 putc((i)>>16,dumpfile),putc((i)>>24,dumpfile))
#define printptr(p)	(printint((int)(p)))
#define MAXSTRLEN 2*STRBLEN+1

static bucket HashTable[HashTabSize];
static char locstrbuf[MAXSTRLEN];

/* clear all elements in this bucket */
static
HashFree(b)
	register bucket *b;
{

	while (b->next)
		free(b = b->next);
}

/*
 * called when sdumping - checks whether x is in the hash table - If so,
 * return 1. If not, return 0 after installing. 
 */
static
dump_seen(x)
	register lispval x;
{
	register bucket *buck1, *buck2;
	register int i;

	for (buck1 = &HashTable[HashFunc((int) x)], i = 0;
	     buck1->lval != x && buck1->next;
	     buck1 = buck1->next, i++)
		/*void*/;
	if (buck1->lval == x)
		return (1);
	if (MaxHash < i)
		MaxHash = i;
	buck2 = (bucket *) malloc(sizeof (bucket));
	buck1->next = buck2;
	buck2->next = 0;
	buck2->lval = x;
	return (0);
}

/*
 * like the above, this searches down the hash table.  The difference is that
 * this one (called when scooping) returns the bucket itself - either the
 * found bucket, or the new one just created. 
 */
static bucket *
scoop_seen(x)
	register int x;
{
	register bucket *buck1, *buck2;
	register int i;

	for (buck1 = &HashTable[HashFunc((int) x)], i = 0;
	     buck1->ival != x && buck1->next;
	     buck1 = buck1->next, i++)
		/*void*/;
	if (buck1->ival == x)
		return (buck1);
	if (MaxHash < i)
		MaxHash = i;
	buck2 = (bucket *) malloc(sizeof (bucket));
	buck1->next = buck2;
	buck2->next = 0;
	buck2->ival = x;
	buck2->lval = (lispval) NOTSEEN;
	return (buck2);
}

/*
 * 'main' lisp-callable function to do the structure dumping - checks file
 * arg and then calls dump with the first pointer. 
 */

static lispval 
Lsdump()
{
	register int i;
	char *dfile;

	chkarg(2, "sdump");

	if (TYPE(lbot[1].val) == ATOM)
		dfile = lbot[1].val->a.pname;
	else if (TYPE(lbot[1].val) == STRNG)
		dfile = (char *) lbot[1].val;
	else {
		error("Improper file argument");
		return (nil);
	}

	if ((dumpfile = fopen(dfile, "w")) == NULL) {
		perror(dfile);
		return (nil);
	}
	MaxHash = 0;
	for (i = 0; i < HashTabSize; i++) {
		if (HashTable[i].next) {
			HashFree(HashTable[i]);
			HashTable[i].next = 0;
		}
		HashTable[i].lval = 0;
	}
	printptr(lbot[0].val);
	dump(lbot[0].val);
	fclose(dumpfile);
	return (inewint(MaxHash));
}

/* the */
static
dump(lispptr)
	register lispval lispptr;
{

	if (!dump_seen(lispptr))
		switch (TYPE(lispptr)) {
		case UNBO:
			error("sdump: Can't handle this type: UNBO");
		case STRNG:
			printbyt(TYPE(lispptr));
			printstr(lispptr);
			break;
		case ATOM:
			dump_atom(lispptr);
			break;
		case INT:
			printbyt(TYPE(lispptr));
			printint(lispptr->i);
			break;
		case DTPR:
			printbyt(TYPE(lispptr));
			printptr(lispptr->d.car);
			dump(lispptr->d.car);
			printptr(lispptr->d.cdr);
			dump(lispptr->d.cdr);
			break;
		case DOUB:
			error("sdump: Can't handle this type: DOUB");
		case BCD:
			error("sdump: Can't handle this type: BCD");
		case PORT:
			error("sdump: Can't handle this type: PORT");
		case ARRAY:
			error("sdump: Can't handle this type: ARRAY");
		case OTHER:
			error("sdump: Can't handle this type: OTHER");
		case SDOT:
			error("sdump: Can't handle this type: SDOT");
		case VALUE:
			error("sdump: Can't handle this type: VALUE");
		case HUNK2:
			dump_hunk(2, lispptr);
			break;
		case HUNK4:
			dump_hunk(4, lispptr);
			break;
		case HUNK8:
			dump_hunk(8, lispptr);
			break;
		case HUNK16:
			dump_hunk(16, lispptr);
			break;
		case HUNK32:
			dump_hunk(32, lispptr);
			break;
		case HUNK64:
			dump_hunk(64, lispptr);
			break;
		case HUNK128:
			dump_hunk(128, lispptr);
			break;
		default:
			error("Unknown type: sdump");
			break;
		}
}

/* dumps an atom or a flavor - in the latter case we don't dump plist */
static
dump_atom(ptr)
	register lispval ptr;
{

	switch (atomtype(ptr)) {
	case NODUMP:
		printbyt(NODUMP);
		printstr(ptr->a.pname);
		break;
	case FLAVOR:
		printbyt(FLAVOR);
		printstr(ptr->a.pname);
		printptr(ptr->a.clb);
		if (ptr->a.clb != CNIL)
			dump(ptr->a.clb);
		break;
	default:
		printbyt(TYPE(ptr));
		printstr(ptr->a.pname);
		printptr(ptr->a.clb);
		if (ptr->a.clb != CNIL)
			dump(ptr->a.clb);
		printptr(ptr->a.plist);
		dump(ptr->a.plist);
	}
}

/* run down hunk elements (num of them) dumping */
static
dump_hunk(num, ptr)
	register int num;
	register lispval ptr;
{
	register int i;

	printbyt(TYPE(ptr));
	for (i = 0; i < num; i++) {
		printptr(ptr->h.hunk[i]);
		dump(ptr->h.hunk[i]);
	}
}

/*
 * check whether ptr has either the si:flavor (it's a flavor) or the $$NODUMP
 * property.  In the latter case we dump only the name, in the former, we
 * also dump the value - neither dumps the plist 
 */
static
atomtype(ptr)
	register lispval ptr;
{
	register lispval tmp;
	int nodump = 0;
	static beenhere;
	static lispval tmptype, tmptype1;

	if (!beenhere) {
		beenhere++;
		tmptype = matom("si:flavor");
		tmptype1 = matom("$$NODUMP");
	}
	for (tmp = ptr->a.plist; tmp != nil; tmp = tmp->d.cdr->d.cdr)
		if (tmp->d.car == tmptype)
			return (FLAVOR);
		else if (tmp->d.car == tmptype1)
			nodump++;
	return (nodump ? NODUMP : 0);
}

/* dumps a string with 0 at the end */
static
printstr(str)
	register char *str;
{

	do {
		putc(*str, dumpfile);
	} while (*str++);
}

/*
 * the lisp-callable scoop'er - checks file arg and calls scoop with the
 * first pointer in the file. 
 */
static
lispval 
Lsscoop()
{
	lispval scoop(), ptr;
	register int i;
	char *dfile;

	chkarg(1, "sscoop");

	if (TYPE(lbot[0].val) == ATOM)
		dfile = lbot[0].val->a.pname;
	else if (TYPE(lbot[0].val) == STRNG)
		dfile = (char *) lbot[0].val;
	else {
		error("Improper file argument");
		return (nil);
	}

	if ((dumpfile = fopen(dfile, "r")) == NULL) {
		perror(dfile);
		return (nil);
	}
	/* clean out hash table */
	MaxHash = 0;
	for (i = 0; i < HashTabSize; i++) {
		if (HashTable[i].next) {
			HashFree(HashTable[i]);
			HashTable[i].next = 0;
		}
		HashTable[i].lval = 0;
		HashTable[i].ival = 0;
	}
	ptr = scoop(readint());
	fclose(dumpfile);
	return (ptr);
}

/*
 * the scoop'ing workhorse - if seen before (present in hash table) then
 * return the lispval entry in the hash table - otherwise, build the lispval
 * and stick in hash table. 
 */
static
lispval 
scoop(iptr)
	int iptr;
{
	register lispval ptr1;
	register bucket *buck;
	int type, hunknum;
	register i;
	char *readstr();

	buck = scoop_seen(iptr);
	if (buck->lval != (lispval) NOTSEEN)
		return (buck->lval);
	switch (type = readbyt()) {
	case STRNG:
		return (buck->lval = mstr(readstr()));
		break;
	case NODUMP:
		return (buck->lval = matom(readstr()));
		break;
	case ATOM:
		buck->lval = ptr1 = matom(readstr());
		if ((i = readint()) != (int) CNIL)
			ptr1->a.clb = scoop(i);
		else
			ptr1->a.clb = CNIL;
		ptr1->a.plist = scoop(readint());
		return (ptr1);
		break;
	case FLAVOR:
		buck->lval = ptr1 = matom(readstr());
		if ((i = readint()) != (int) CNIL)
			ptr1->a.clb = scoop(i);
		else
			ptr1->a.clb = CNIL;
		return (ptr1);
		break;
	case INT:
		return (buck->lval = inewint(readint()));
		break;
	case DTPR:
		protect(buck->lval = ptr1 = newdot());
		ptr1->d.car = scoop(readint());
		ptr1->d.cdr = scoop(readint());
		--np;
		return (ptr1);
		break;
	case HUNK2:
	case HUNK4:
	case HUNK8:
	case HUNK16:
	case HUNK32:
	case HUNK64:
	case HUNK128:
		hunknum = type - HUNK2;
		protect(buck->lval = ptr1 = newhunk(hunknum));
		for (i = 0; i < (2 << hunknum); i++)
			ptr1->h.hunk[i] = scoop(readint());
		--np;
		return (ptr1);
		break;
	default:
		error("unknown type in scoop");
	}
}

/* reads one int as 4 bytes */
static
readint()
{
	union {
		int i;
		char c[4];
	}     u;

	u.c[0] = readbyt();
	u.c[1] = readbyt();
	u.c[2] = readbyt();
	u.c[3] = readbyt();
	return u.i;
}

/*
 * reads a string - uses locstrbuf for storage.  Size of locstrbuf is a
 * function of STRBLEN (which is defined in global.h) 
 */
static char *
readstr()
{
	register char *s = locstrbuf;

	while (*s++ = getc(dumpfile))
		/*void*/;
	return (locstrbuf);
}

/* initializer for this package - should call after doing cfasl */
lispval 
init_strc()
{

	mfun("sdump", Lsdump, lambda);
	mfun("sscoop", Lsscoop, lambda);
}
-- 
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 454 4251)
UUCP:	seismo!umcp-cs!chris
CSNet:	chris@umcp-cs		ARPA:	chris@maryland

gww%aphasia.uucp@BRL.ARPA (George Williams) (09/08/85)

> >Since 'p+n', where p is a pointer and n is an integer, is equivalent to
> >adding n*sizeof(whatever p points to), the safe and portable way of adding
> >an integer to a pointer treated as an integer is
> >	 (char *)p + n
>
> This will break down on machines which are bit or word oriented, rather than
> byte oriented.  The real question is why do you need to do this in the first
> place?

I can give one reason as to why this is desirable: Suppose one wants to
implement the PL/I area construct in c (For those who don't know PL/I
the area type defines a large space into which other types can be put,
and then encourages the use of offsets rather than pointers.  This is
very useful if you want to be able to save a symbol table (or some such)
on disk in one program, and then read it in in another without having to
chase down and fixing up pointers).

I have an application where I want to be able to save masses of objects
most with lots of pointers in them to other objects, and then use this
result to initialize the program the next time it runs.  Obviously I don't
want to do much processing when I initialize.  So what I have done is not
to store pointers, but to store offsets from the base of the current object;
thus if I save the appropriate part of memory (with no processing) and then
read it in to a different part of memory (again no processing) I have valid
offsets.

Now I need some standard form to store my offsets in, I can't use
sizeof(current object) because not all objects are the same type, and even
if they were I can't guarantee that they will be aligned properly (how much
overhead does malloc use on your machine?). So I use byte offsets, and when-
ever I want what an offset points to I have to do something like this:
    next = (ENTRY *) (((char *) ent) + ent->sym_next);
I know this will cause problems (or at least be slow) on things like dec10s
and dgs, but I can't think of a better way.

Does anyone have a better way of saving large chunks of processed data?

			    George Williams
			    decvax!cit-vax!aphasia!gww


You must come to know me; not so much now, because now
I'm excited, but I have got at least three virtues.
How many have you got?