[comp.lang.c] Stack-threaded recursion plus setjmp

eric@snark.thyrsus.com (Eric S. Raymond) (10/27/90)

Eric Tiedemann pointed out that the algorithm in my stdemo posting
could be speeded up and the code simplified through use of setjmp().
As this introduces a lovely symmetry between the code's use of data
and its flow of control, I fell justified to in posting the new version.

/*
 * stdemo.c -- demonstrate application of stack-threaded recursion
 *
 * This is a lightly hacked version of the program I posted in USENET
 * article <1YDn0r#3DmCNn7pxmsK5Mf68M18Dt56=eric@snark.thyrsus.com> on
 * 25 Oct 90 00:27:23 GMT. Thanks to Eric Tiedemann (est@ollie.nyu.edu)
 * for pointing out that *control* should have been stack-threaded too!
 *
 * An implementation problem with the state-quintuple format used for the
 * transition-table compiler is that we need to apply the `code-generator'
 * function to all glob patterns implied by an RE of the form
 *
 *  <ss><ss><ss><ss><ss>
 *
 * where <ss> can be a digit or a digit set bracketed by [, ]; but there
 * could be up to MAXSTATES to the 5th power of these, which is too many to
 * generate into a scratch array or file and then crunch through.
 *
 * We cope with this using stack-threaded recursion, an idea cheerfully
 * stolen from some obscure EMACS internals. We crunch through the RE, using
 * recursion to build a pointer tree with attached values that's actually
 * stored on the auto-variables stack! Maximum stack storage used is O(5);
 *
 * Whenever a call to the recursive parser terminates, the function to be
 * applied gets fed the current `branch' of the in-stack tree. Note the use
 * of setjmp() on error to pop right back up to the line reader!
 *
 * To demonstrate this, simply compile and run it, typing in REs on each line:
 *
 * $ stdemo
 * 12345
 * 1, 2, 3, 4, 5
 *
 * [012]78[34]2
 * 0, 7, 8, 3, 2
 * 0, 7, 8, 4, 2
 * 1, 7, 8, 3, 2
 * 1, 7, 8, 4, 2
 * 2, 7, 8, 3, 2
 * 2, 7, 8, 4, 2
 *
 * 12
 * Wrong number of states.
 * $
 *
 * Read on, and be enlightened...
 *                                              Eric S. Raymond
 *                                              (eric@snark.thyrsus.com)
 */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <setjmp.h>

#define is_state(c)     isdigit(c)      /* is char a valid state indicator? */
#define stoi(c)         ((c) - '0')     /* state character to int */

#define RULESIZE        5               /* size of the tuples */

#define throw(catcher)  longjmp(catcher, 1)

struct stackthread
{
    int                 value;
    struct stackthread  *parent;
};

static jmp_buf
    noclose,    /* missing close */
    badchar,    /* invalid character */
    wrongnum;   /* wrong number of elements */

static void parse_recurse(parent, buf, ecount)
struct stackthread      *parent;
char                    *buf;
int                     ecount;
{
    struct stackthread  st;

    st.parent = parent;

    if (is_state(buf[0]))
    {
        /* 
         * Sequential cons of next state onto the tip of the branch
         * passed on.
         */
        st.value = stoi(*buf);
        parse_recurse(&st, buf + 1, ecount + 1);
    }
    else if (buf[0] == '[')
    {
        char    *ep = strchr(buf, ']');

        if (ep == (char *)NULL)
	    throw(noclose);

        /*
         * Wild-carding time. We iterate through the set; for each member,
         * we start a recurse from just past the *end* of the set.
         */
        while (is_state(*++buf))
        {
            int err;

            st.value = stoi(*buf);
            parse_recurse(&st, ep + 1, ecount + 1);
        }
    }
    else if (buf[0] != '\n')
        throw(badchar);
    else if (ecount != RULESIZE)
        throw(wrongnum);
    else
    {
        /* plug your function in here */
        printf("%d, %d, %d, %d, %d\n",
               parent->parent->parent->parent->parent->value,
               parent->parent->parent->parent->value,
               parent->parent->parent->value,
               parent->parent->value,
               parent->value);
    }
}

char *read_rules(fp)
FILE    *fp;
{
    char                buf[80];
    struct stackthread  top;

    if (setjmp(noclose))
	return("Missing ] character.\n");
    else if (setjmp(badchar))
	return("Invalid character.\n");
    else if (setjmp(wrongnum))
	return("Wrong number of states.\n");

    /*
     * Note: for applications where you don't know the length of the branch
     * in advance, you can initialize the parent field of top to NULL and
     * use that in the `hook' function to detect end-of-list.
     */

    while (fgets(buf, sizeof(buf), fp) != (char *)NULL)
    {
        int     status;

        if (buf[0] == '\n' || buf[0] == '\0') continue;
	parse_recurse(&top, buf, 0);
        (void) putchar('\n');
    }
}

main()
{
    char	*errmsg;

    if ((errmsg = read_rules(stdin)) != (char *)NULL)
	(void) printf(errmsg);
}

/* stdemo.c ends here */
-- 
      Eric S. Raymond = ...!uunet!snark!eric  (mad mastermind of TMN-Netnews)