[net.sources] Trivial parser code

fouts@AMES-NAS.ARPA (Marty) (12/27/85)

     I just got the latest 'dragon book', which is considerablly different
from the original, and in chapter 2 it has this trivial parser in C, so I
typed it in and it worked.

     Figuring that others who get the book (which I recommend doing, if
you're at all interested in compilers) would want the same code, I bundled
it up, and here it is:

     (Interesting that the book carries a 1986 copyright?)

----- CUT HERE And unpack with sh -----

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	README
#	emitter.c
#	error.c
#	global.h
#	init.c
#	lexer.c
#	main.c
#	makefile
#	parser.c
#	symbol.c
# This archive created: Fri Dec 27 09:47:34 1985
export PATH; PATH=/bin:$PATH
echo shar: extracting "'README'" '(511 characters)'
sed 's/^	X//' << \SHAR_EOF > 'README'
     Trans is a simple parser for arithmatic expressions. It is from Aho, A.
V., R Sethi, and J. D. Ullman [1986] "Compilers Principles, Techniques, and
Tools"; 73-78.  For a complete description, see the book.

     Trans accepts simple expressions involving identifies and integer
constants and produces 'reverse polish' results.  Expressions are terminated
by a semicolon. It terminates on any error, or on end of file from standard
input.

     Sample valid input:

     a + b; CXY - d; (a * b) * (c - d);

SHAR_EOF
echo shar: extracting "'emitter.c'" '(540 characters)'
sed 's/^	X//' << \SHAR_EOF > 'emitter.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

emit(t, tval)		/* generates output */
    int t, tval;
{
    switch(t) {
	case '+': case '-': case '*': case '/':
	    printf("%c\n", t); break;
	case DIV:
	    printf("DIV\n"); break;
	case MOD:
	    printf("MOD\n"); break;
	case NUM:
	    printf("%d\n", tval); break;
	case ID:
	    printf("%s\n", symtable[tval].lexptr); break;
	default:
	     printf("token %d, tokenval %d\n", t, tval);
    }
}
SHAR_EOF
echo shar: extracting "'error.c'" '(297 characters)'
sed 's/^	X//' << \SHAR_EOF > 'error.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

error(m)		/* generates all error messages */
    char *m;
{
    fprintf(stderr, "line %d: %s\n", lineno, m);
    exit(1);		/* unsuccessful termination */
}
SHAR_EOF
echo shar: extracting "'global.h'" '(501 characters)'
sed 's/^	X//' << \SHAR_EOF > 'global.h'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include <stdio.h>
#include <ctype.h>

#define BSIZE 128		/* buffer size */
#define NONE -1
#define EOS '\0'
#define NUM 256
#define DIV 257
#define MOD 258
#define ID 259
#define DONE 260

int tokenval;			/* value of token attribute */
int lineno;

struct entry {			/* form of symbol table entry */
    char *lexptr;
    int token;
};

struct entry symtable[];	/* symbol table */
SHAR_EOF
echo shar: extracting "'init.c'" '(351 characters)'
sed 's/^	X//' << \SHAR_EOF > 'init.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

struct entry keywords[] = {
    "div", DIV,
    "mod", MOD,
    0, 0
};

init()			/* loads keywords into symtable */
{
    struct entry *p;
    for (p = keywords; p->token; p++)
	insert(p->lexptr, p->token);
}
SHAR_EOF
echo shar: extracting "'lexer.c'" '(1022 characters)'
sed 's/^	X//' << \SHAR_EOF > 'lexer.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

char lexbuf[BSIZE];
int lineno = 1;
int tokenval = NONE;

int lexan()			/* lexical analyzer */
{
    int t;
    while (1) {
	t = getchar();
	if (t == ' ' || t == '\t')
	    ;			/* strip out white space */
	else if (t == '\n')
	    lineno = lineno + 1;
	else if (isdigit(t)) {	/* t is a digit */
	    ungetc(t, stdin);
	    scanf("%d", &tokenval);
	    return NUM;
	}
	else if (isalpha(t)) {	/* t is a letter */
	    int p, b = 0;
	    while (isalnum(t)) { /* t is alphanumeric */
	        lexbuf[b] = t;
		t = getchar();
		b = b + 1;
		if (b >= BSIZE)
		    error("compiler error");
	    }
	    lexbuf[b] = EOS;
	    if (t != EOF)
	        ungetc(t, stdin);
	    p = lookup(lexbuf);
	    if (p == 0)
	        p = insert(lexbuf, ID);
	    tokenval = p;
	    return symtable[p].token;
	}
	else if (t == EOF)
	    return DONE;
	else {
	    tokenval = NONE;
	    return t;
	}
    }
}

SHAR_EOF
echo shar: extracting "'main.c'" '(220 characters)'
sed 's/^	X//' << \SHAR_EOF > 'main.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

main()
{
    init();
    parse();
    exit(0);		/* successful termination */
}
SHAR_EOF
echo shar: extracting "'makefile'" '(338 characters)'
sed 's/^	X//' << \SHAR_EOF > 'makefile'
SRCS		= lexer.c parser.c emitter.c symbol.c init.c error.c main.c
OBJS		= lexer.o parser.o emitter.o symbol.o init.o error.o main.o
HDRS		= global.h /usr/include/stdio.h /usr/include/ctype.h
PROG		= trans

all:		$(PROG)

trans:		$(HDRS) $(OBJS)
		$(CC) $(CFLAGS) -o $(PROG) $(OBJS)

clean:
		-rm $(OBJS) $(PROG)

lint:
		lint -lc $(SRCS)
SHAR_EOF
echo shar: extracting "'parser.c'" '(1108 characters)'
sed 's/^	X//' << \SHAR_EOF > 'parser.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

int lookahead;

parse()			/* parses and translates expression list */
{
    lookahead = lexan();
    while (lookahead != DONE) {
	expr(); match(';');
    }
}

expr()
{
    int t;
    term();
    while (1)
        switch (lookahead) {
	    case '+': case '-':
	        t = lookahead;
		match(lookahead); term(); emit(t,NONE);
		continue;
	    default:
	        return;
	}
}

term()
{
    int t;
    factor();
    while (1)
        switch(lookahead) {
	    case '*': case '/': case DIV: case MOD:
	        t = lookahead;
		match(lookahead); factor(); emit(t,NONE);
		continue;
	    default:
	        return;
	}
}

factor()
{
    switch(lookahead) {
	case '(':
	    match('('); expr(); match(')'); break;
	case NUM:
	    emit(NUM, tokenval); match(NUM); break;
	case ID:
	    emit(ID, tokenval); match(ID); break;
	default:
	    error("syntax error");
    }
}

match(t)
    int t;
{
    if (lookahead == t)
        lookahead = lexan();
    else error("syntax error");
}
SHAR_EOF
echo shar: extracting "'symbol.c'" '(1123 characters)'
sed 's/^	X//' << \SHAR_EOF > 'symbol.c'
/*
 * Aho, A. V., R Sethi, and J. D. Ullman [1986]
 * Compilers Principles, Techniques, and Tools
 * pages 73 - 78
 */

#include "global.h"

#define STRMAX 999		/* size of lexemes array */
#define SYMMAX 100		/* size of symtable */

char lexemes[STRMAX];
int lastchar = -1;		/* last used position in lexemes */
struct entry symtable[SYMMAX];
int lastentry = 0;		/* last used position in symtable */

int lookup(s)			/* returns position of entry for s */
    char s[];
{
    int p;
    for (p = lastentry; p > 0; p = p - 1)
        if (strcmp(symtable[p].lexptr, s) == 0)
	    return p;
    return 0;
}

int insert(s, tok)		/* returns position of entry for s */
    char s[];
    int tok;
{
    int len;
    len = strlen(s);		/* strlen computes length of s */
    if (lastentry + 1 >= SYMMAX)
        error("symbol table full");
    if (lastchar + len + 1 >= STRMAX)
	error("lexemes array full");
    lastentry = lastentry + 1;
    symtable[lastentry].token = tok;
    symtable[lastentry].lexptr = &lexemes[lastchar + 1];
    lastchar = lastchar + len + 1;
    strcpy(symtable[lastentry].lexptr, s);
    return lastentry;
}
SHAR_EOF
#	End of shell archive
exit 0