[net.sources] subs: a statistical text reconstructor

kay@flame.UUCP (Kay Dekker) (12/02/84)

Friends,
	here is the sources for subs (see articles in net.crypt).
There's no makefile, no manual page, just the C source.
BTW, could we have a posting of 'shar' sometime? some of us have only
recently arrived on the net, and it might help despatching larger (and
more serious) items of software.
					{Enjoy, }+
						Kay.
Cut between the lines & compile. Do not spold, fend, mindle or butylate.
------------------------------------------------------------------------------
#include <stdio.h>
#include <math.h>
#include <vadvise.h>
#define EOS '\0'

typedef struct tree_struct
 { 
    struct tree_struct *along, *down;
    char ch;
    int count;
 } *tree;

tree root = NULL;
int n, exited;
char *string, *sp, *malloc (), save;

main (argc, argv) int argc; char **argv;
 {
    int i, fd = open ("/dev/tty", 0);
    char buf [BUFSIZ], *bufp = buf, *s;
    tree f;
    
    vadvise (VA_ANOM);
    if (argc != 2 || (n = atoi (argv[1])) < 1)
	fprintf (stderr, "usage: subs n  (n > 0)\n"), exit (1);
    
    for (i = 1; i < n && !feof (stdin); i++)
	*bufp++ = nextchar ();
    
    while (!feof (stdin))
     {
	if (bufp == buf + BUFSIZ - 1)
	 for (bufp = buf, i = 1; i < n; i++)
	    *bufp = bufp [BUFSIZ - n], ++bufp;
	*bufp++ = nextchar ();
	*bufp   = EOS;
	insert (bufp - n);
     }
fprintf (stderr, "tree generated\n");
    string = (char *)malloc (n+1);

    sp = s = string;
    f = root;
    srand (getpid ());
    exited = 0;
        
    for (i = 1; i < n; i++)
     {
	tree next ();

	f = next (f);
	*s++ = save;
	putchar (save);
     }
    *s = EOS;

    while (!exited)
     {
	int i;
	tree follow (), p;
	
	p = follow (string);
	if (exited)
	 break; 
	next (p);
	string [n - 1] = save;
	putchar (save);	
	
	for (i = 0; i < n; i++)
	 string[i] = string[i+1];
     }
 }

int
nextchar ()
 {
    int c = getchar ();
    return c;
 }

char *string;

tree
ins (t) tree t;
 {
    tree troot = t;
    char *strcpy ();
    
    if (*string == NULL)
	return troot;
    if (t == NULL)
     {
	tree new = (tree)malloc (sizeof (struct tree_struct));
	
	new->down = new->along = NULL;
	new->ch = 0;
	new->count = 0;
	return ins (new);
     }
    while (t->along != NULL && t->along->ch < *string)
	t = t->along;
    if (t->along == NULL || t->along->ch != *string)
     {
	tree new = (tree)malloc (sizeof (struct tree_struct));
	
	new->along = t->along; t->along = new;
	new->ch = *string++;
	new->count = 1;
	new->down = ins (NULL);
	t = new;
     }
    else
	t = t->along, ++string, ++(t->count);
    t->down = ins (t->down);
    return troot;
 }

insert (s) char *s;
 {
    string = s;
    root = ins (root);
 }

tabulate (t) tree t;
 {
    if (t == NULL)
	return;
    if (t->down == NULL)
     {
	if (t->count != 0)
	 {
	    *sp = EOS; 
	    printf ("%6d\t%s%c\n", t->count, string, t->ch);
	 }
     }
    else
     {
         *sp++ = t->ch;
	tabulate (t->down);
	--sp;
     }
    if (t->along != NULL)
	tabulate (t->along);
 }

tree
next (tr) tree tr;
 {
    int sum = 0, r;
    tree t;

    for (t = tr -> along; t != NULL; t = t -> along)
     sum += t->count;
    r = interval (sum);

    sum = 0;
    
    for (t = tr -> along; t != NULL; t = t -> along)
     if ((sum += t -> count), sum >= r)
      break;
    save = t -> ch;

    return t -> down;
 }

int
interval (n) int n;
 {
    return ((rand () % 37) % n + 1);
 }

tree
follow (s) char *s;
 {
    int i;
    tree t = root;
    
    for (i = 1; i < n; i++, s++)
     {
	if (t == NULL)
	 {
	    exited = 1; return NULL;
	 }

	for (; t->ch < *s; t = t->along)
	 if (t->along == NULL)
	  {
	    exited = 1; return NULL;
	  }

	if (t->ch != *s)
	 {
	    exited = 1; return NULL;
	 }
	t = t->down;
     }
    return t;
 }
-------------------------------------------------------------------------------- 
"But you TOLD me to type 'rm * .o', and I DID, and it said 'rm: .o nonexistent', and ...
			... mcvax!ukc!flame!ubu!kay