fouts@AMES-NAS.ARPA (Marty) (12/27/85)
I just got the latest 'dragon book', which is considerablly different from the original, and in chapter 2 it has this trivial parser in C, so I typed it in and it worked. Figuring that others who get the book (which I recommend doing, if you're at all interested in compilers) would want the same code, I bundled it up, and here it is: (Interesting that the book carries a 1986 copyright?) ----- CUT HERE And unpack with sh ----- #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # README # emitter.c # error.c # global.h # init.c # lexer.c # main.c # makefile # parser.c # symbol.c # This archive created: Fri Dec 27 09:47:34 1985 export PATH; PATH=/bin:$PATH echo shar: extracting "'README'" '(511 characters)' sed 's/^ X//' << \SHAR_EOF > 'README' Trans is a simple parser for arithmatic expressions. It is from Aho, A. V., R Sethi, and J. D. Ullman [1986] "Compilers Principles, Techniques, and Tools"; 73-78. For a complete description, see the book. Trans accepts simple expressions involving identifies and integer constants and produces 'reverse polish' results. Expressions are terminated by a semicolon. It terminates on any error, or on end of file from standard input. Sample valid input: a + b; CXY - d; (a * b) * (c - d); SHAR_EOF echo shar: extracting "'emitter.c'" '(540 characters)' sed 's/^ X//' << \SHAR_EOF > 'emitter.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" emit(t, tval) /* generates output */ int t, tval; { switch(t) { case '+': case '-': case '*': case '/': printf("%c\n", t); break; case DIV: printf("DIV\n"); break; case MOD: printf("MOD\n"); break; case NUM: printf("%d\n", tval); break; case ID: printf("%s\n", symtable[tval].lexptr); break; default: printf("token %d, tokenval %d\n", t, tval); } } SHAR_EOF echo shar: extracting "'error.c'" '(297 characters)' sed 's/^ X//' << \SHAR_EOF > 'error.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" error(m) /* generates all error messages */ char *m; { fprintf(stderr, "line %d: %s\n", lineno, m); exit(1); /* unsuccessful termination */ } SHAR_EOF echo shar: extracting "'global.h'" '(501 characters)' sed 's/^ X//' << \SHAR_EOF > 'global.h' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include <stdio.h> #include <ctype.h> #define BSIZE 128 /* buffer size */ #define NONE -1 #define EOS '\0' #define NUM 256 #define DIV 257 #define MOD 258 #define ID 259 #define DONE 260 int tokenval; /* value of token attribute */ int lineno; struct entry { /* form of symbol table entry */ char *lexptr; int token; }; struct entry symtable[]; /* symbol table */ SHAR_EOF echo shar: extracting "'init.c'" '(351 characters)' sed 's/^ X//' << \SHAR_EOF > 'init.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" struct entry keywords[] = { "div", DIV, "mod", MOD, 0, 0 }; init() /* loads keywords into symtable */ { struct entry *p; for (p = keywords; p->token; p++) insert(p->lexptr, p->token); } SHAR_EOF echo shar: extracting "'lexer.c'" '(1022 characters)' sed 's/^ X//' << \SHAR_EOF > 'lexer.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" char lexbuf[BSIZE]; int lineno = 1; int tokenval = NONE; int lexan() /* lexical analyzer */ { int t; while (1) { t = getchar(); if (t == ' ' || t == '\t') ; /* strip out white space */ else if (t == '\n') lineno = lineno + 1; else if (isdigit(t)) { /* t is a digit */ ungetc(t, stdin); scanf("%d", &tokenval); return NUM; } else if (isalpha(t)) { /* t is a letter */ int p, b = 0; while (isalnum(t)) { /* t is alphanumeric */ lexbuf[b] = t; t = getchar(); b = b + 1; if (b >= BSIZE) error("compiler error"); } lexbuf[b] = EOS; if (t != EOF) ungetc(t, stdin); p = lookup(lexbuf); if (p == 0) p = insert(lexbuf, ID); tokenval = p; return symtable[p].token; } else if (t == EOF) return DONE; else { tokenval = NONE; return t; } } } SHAR_EOF echo shar: extracting "'main.c'" '(220 characters)' sed 's/^ X//' << \SHAR_EOF > 'main.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" main() { init(); parse(); exit(0); /* successful termination */ } SHAR_EOF echo shar: extracting "'makefile'" '(338 characters)' sed 's/^ X//' << \SHAR_EOF > 'makefile' SRCS = lexer.c parser.c emitter.c symbol.c init.c error.c main.c OBJS = lexer.o parser.o emitter.o symbol.o init.o error.o main.o HDRS = global.h /usr/include/stdio.h /usr/include/ctype.h PROG = trans all: $(PROG) trans: $(HDRS) $(OBJS) $(CC) $(CFLAGS) -o $(PROG) $(OBJS) clean: -rm $(OBJS) $(PROG) lint: lint -lc $(SRCS) SHAR_EOF echo shar: extracting "'parser.c'" '(1108 characters)' sed 's/^ X//' << \SHAR_EOF > 'parser.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" int lookahead; parse() /* parses and translates expression list */ { lookahead = lexan(); while (lookahead != DONE) { expr(); match(';'); } } expr() { int t; term(); while (1) switch (lookahead) { case '+': case '-': t = lookahead; match(lookahead); term(); emit(t,NONE); continue; default: return; } } term() { int t; factor(); while (1) switch(lookahead) { case '*': case '/': case DIV: case MOD: t = lookahead; match(lookahead); factor(); emit(t,NONE); continue; default: return; } } factor() { switch(lookahead) { case '(': match('('); expr(); match(')'); break; case NUM: emit(NUM, tokenval); match(NUM); break; case ID: emit(ID, tokenval); match(ID); break; default: error("syntax error"); } } match(t) int t; { if (lookahead == t) lookahead = lexan(); else error("syntax error"); } SHAR_EOF echo shar: extracting "'symbol.c'" '(1123 characters)' sed 's/^ X//' << \SHAR_EOF > 'symbol.c' /* * Aho, A. V., R Sethi, and J. D. Ullman [1986] * Compilers Principles, Techniques, and Tools * pages 73 - 78 */ #include "global.h" #define STRMAX 999 /* size of lexemes array */ #define SYMMAX 100 /* size of symtable */ char lexemes[STRMAX]; int lastchar = -1; /* last used position in lexemes */ struct entry symtable[SYMMAX]; int lastentry = 0; /* last used position in symtable */ int lookup(s) /* returns position of entry for s */ char s[]; { int p; for (p = lastentry; p > 0; p = p - 1) if (strcmp(symtable[p].lexptr, s) == 0) return p; return 0; } int insert(s, tok) /* returns position of entry for s */ char s[]; int tok; { int len; len = strlen(s); /* strlen computes length of s */ if (lastentry + 1 >= SYMMAX) error("symbol table full"); if (lastchar + len + 1 >= STRMAX) error("lexemes array full"); lastentry = lastentry + 1; symtable[lastentry].token = tok; symtable[lastentry].lexptr = &lexemes[lastchar + 1]; lastchar = lastchar + len + 1; strcpy(symtable[lastentry].lexptr, s); return lastentry; } SHAR_EOF # End of shell archive exit 0