[net.sources] Dissociated Press

john@x.UUCP (John Woods) (10/05/84)

[ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ >-  ]
Here is a program I wrote after seeing the Puree of Netnews article.  This
program takes the standard input (or the named files), and copies words from
the input to the output:  until it decides randomly to skip forward, looking
for a match in the input for the current context that it has (default is
one word).

I tried posting this before, but it seemed to get lost.  Here it is again, and
slightly improved, anyway:
-----(dp.c)------CUT-ON-THE-DOTTED-LINE----------------------------------
#include <stdio.h>
#include <ctype.h>

typedef struct word {
	char *alpha;
	char *punc;
	int acceptable;
} *word;

int nwords = 1;
int acceptcount = 0;
int slipfactor = 900;
int isapipe = 0;
FILE *infile;

main(argc,argv) char *argv[];
{
	word w;

	parseargs(--argc,++argv);
	setbuf(stdout,NULL);

	_ctype[ '!' + 1] |= _U;
	_ctype[ '_' + 1] |= _L;
	_ctype[ '\'' + 1] = (_L);

	while (w = getword())
		putword(w);
}

putword(w) word w;
{
	fputs(w->alpha,stdout);
	fputs(w->punc,stdout);
}

word rdword() {
	char abuf[512], pbuf[512];
	int c, a, p;
	word w;
	int isok = 1;

	for (a = 0; (c = getc(infile)) != EOF && isalpha(c); a++) {
		if (!islower(c)) isok = 0;
		abuf[a] = c;
	}
	if (c == EOF && a == 0)
		return 0;
	abuf[a] = 0;

	if (c == EOF) c = 0;
	pbuf[0] = c;
	for (p = 1; (c = getc(infile)) != EOF && !isalpha(c); p++)
		pbuf[p] = c;
	pbuf[p] = 0;
	if (c != EOF) ungetc(c,infile);

	w = xalloc ( sizeof (*w));
	strcpy( w->alpha = xalloc(strlen(abuf)+1), abuf);
	strcpy( w->punc  = xalloc(strlen(pbuf)+1), pbuf);
	w->acceptable = isok;
	return w;
}

word context[32], slippage[32];

shft(w,b) word w, *b;
{
	int i;
	if (b[0]) {
		if (b == context)
			acceptcount -= b[0]->acceptable;
		free(b[0]->alpha);
		free(b[0]->punc);
		free(b[0]);
	}
	if (b == context)
		acceptcount += w->acceptable;
	for (i = 0; i < nwords - 1; i++)
		b[i] = b[i+1];
	b[i] = w;
	return w;
}

word nextword() {
	word w;
	if ((w = rdword()) == NULL && !isapipe)
	{	fseek(infile,0,0);
		w = rdword();
	}
	return w;
}

word getword() {
	if (context[0] && acceptcount == nwords && (rnd(1000) > slipfactor))
		slip();
	return shft(nextword(), context);
}

rnd(n) {
	return rand() % n;
}

wneq(w1,w2) word w1, w2;
{
	return strcmp(w1->alpha,w2->alpha);
}

slip() {
	int i, j;
	long fp;

	fp = ftell(infile);
	/* load up context buffer */
	for (i = 0; i < nwords; i++)
	{	if ((slippage[i] = nextword()) == NULL)
			return;	/* premature EOF */
	}
	j = 0;
	while (++j < 2000 && notalleq()) {
		shft(nextword(), slippage);
	}
	if (j == 2000)
		fseek(infile,fp,0);
}

notalleq() {
	int i;
	for (i = 0; i < nwords; i++)
		if (wneq(context[i],slippage[i]))
			return 1;
	return 0;
}

bad(s) char *s;
{
	fprintf(stderr,"%s\n",s);
	exit(1);
}


parseargs(ac,av) char **av;
{
	int seed = getpid() % time(0);

	while (ac > 0 && **av == '-') {
		if (av[0][1] == 'h' || av[0][1] == 0) /* help */
		{
			fprintf(stderr,"usage: dp [-n nwords] [-s slipfactor] [-r randomseed] [ inputfile ]\n");
			exit(0);
		}
		if (-- ac <= 0)
			bad("numeric argument needed");
		av++;
		switch(av[-1][1]) {
		case 'n':
			if ((nwords = atoi(*av)) <= 0 || nwords > 31)
				bad("bad -n value");
			ac--, av++;
			break;
		case 's':
			if ((slipfactor = atoi(*av)) <= 0
			||  slipfactor >= 1000)
				bad("bad -s value");
			ac--, av++;
			break;
		case 'r':
			if ((seed = atoi(*av)) == 0)
				bad("bad -r value");
			ac--, av++;
			break;
		}
	}
	if (ac) {
		if ((infile = fopen(*av,"r")) == NULL)
			exit(perror(*av));
	} else {
		infile = stdin;
	}
	if (tell(fileno(infile)) < 0)
		isapipe = 1;
	srand(seed);
}

xalloc(n) {
	char *p;
	if (p = malloc(n))
		return p;
	fprintf(stderr,"no memory!!!!!!\n");
	exit(2);
}
-- 
John Woods, Charles River Data Systems, Framingham MA, (617) 626-1114
...!decvax!frog!john, ...!mit-eddie!jfw, jfw%mit-ccc@MIT-XX.ARPA

I have absolutely nothing clever to say in this signature.