david@ssc-vax.UUCP (David Norris) (12/07/83)
An earlier article, containing my revised grammar for MIL-STD Ada, generated some interest. Rather than send the C source of the lexical analyzer via mail, I am posting it so all may benefit. There is much room for improvement, both in the grammar (error recovery) and the scanner. Comments or improvements greatly appreciated. -- Dave Norris -- ..!uw-beaver!ssc-vax!david ----------------------------------------------------------------------------- #include <stdio.h> #include <ctype.h> #include "y.tab.c" char id[20]; /* identifier from yylex */ int inum; /* integer from yylex */ double rnum; /* real number from yylex */ int i; /* numeric value of character in number */ int base; /* base of based number */ int sleng; /* string length */ #define MAXLINE 132 int ch; /* last character read from source program */ char line[MAXLINE]; /* current line of source text */ int cc; /* character counter */ int lc; /* program location counter */ int ll ; /* length of current line */ int endoffile; /* end of file indicator */ struct key { char *keyword; int keyvalue; } keywordtab[] = { "ABORT", ABORT, "ABS", ABS, "ACCEPT", ACCEPT, "ACCESS", ACCESS, "ALL", ALL, "AND", AND, "ARRAY", ARRAY, "AT", AT, "BEGIN", BEGIN, "BODY", BODY, "CASE", CASE, "CONSTANT", CONSTANT, "DECLARE", DECLARE, "DELAY", DELAY, "DELTA", DELTA, "DIGITS", DIGITS, "DO", DO, "ELSE", ELSE, "ELSIF", ELSIF, "END", END, "ENTRY", ENTRY, "EXCEPTION", EXCEPTION, "EXIT", EXIT, "FOR", FOR, "FUNCTION", FUNCTION, "GENERIC", GENERIC, "GOTO", GOTO, "IF", IF, "IN", IN, "IS", IS, "LIMITED", LIMITED, "LOOP", LOOP, "MOD", MOD, "NEW", NEW, "NOT", NOT, "NULL", NULL, "OF", OF, "OR", OR, "OTHERS", OTHERS, "OUT", OUT, "PACKAGE", PACKAGE, "PRAGMA", PRAGMA, "PRIVATE", PRIVATE, "PROCEDURE", PROCEDURE, "RAISE", RAISE, "RANGE", RANGE, "RECORD", RECORD, "REM", REM, "RENAMES", RENAMES, "RETURN", RETURN, "REVERSE", REVERSE, "SELECT", SELECT, "SEPARATE", SEPARATE, "SUBTYPE", SUBTYPE, "TASK", TASK, "TERMINATE", TERMINATE, "THEN", THEN, "TYPE", TYPE, "USE", USE, "WHEN", WHEN, "WHILE", WHILE, "WITH", WITH, "XOR", XOR }; #define NKEYS (sizeof(keywordtab) / sizeof(struct key)) main() { lc = 0; /* reset line count */ ll = 0; /* reset line length */ cc = 0; /* reset character count */ ch = ' '; /* reset ch (fool yylex into getting first token */ endoffile = 0; /* reset end of file indicator */ printf("Ada compiler\n\n"); if (yyparse() == 0) printf("\n%d syntax error(s)",yynerrs); else printf("\ncompilation aborted.\n"); } yyerror(s) char *s; { int i; printf("**-=> "); for (i = 0; i < cc; i++) printf(" "); printf("^ "); printf("%s\n",s); } binary(word) char *word; { int low,high,mid,cond; low = 0; high = NKEYS - 1; while (low <= high) { mid = (low+high) / 2; if ((cond = strcmp(word,keywordtab[mid].keyword)) < 0) high = mid - 1; else if (cond > 0) low = mid + 1; else return(keywordtab[mid].keyvalue); } return(-1); } nextch() { int lim; if (cc == ll) { cc = -1; ll = 0; lim = MAXLINE; while (--lim > 0 && (ch=getchar())!=EOF && ch!='\n') line[ll++] = ch; if (ch == EOF) return EOF; if (ch == '\n') line[ll++] = ch; line[ll] = '\0'; printf("%5d : %s",++lc,line); } ch = line[++cc]; } /* compute value of character ch using base. return true if */ /* value is acceptable in the given number base. */ inbase() { if (isdigit(ch)) i = ch - '0'; else if (toupper(ch) >= 'A' && toupper(ch) <= 'F') i = ch - 'A' + 10; else return(0); if (i < base) return(1); else return(0); } /* return integer value of the string of input digits. */ getinteger() { int num; num = 0; while (inbase()) { num = num * base + i; nextch(); if (ch == '_') { nextch(); if (inbase() == 0) yyerror("extended digit expected"); } } return(num); } getfraction() { double num; double divi; divi = 1.0 / base; while (inbase()) { num = num + i * divi; divi = divi / base; nextch(); if (ch == '_') { nextch(); if (inbase() == 0) yyerror("extended digit expected"); } } return(num); } yylex() { int k; char based_ch; /* character denoting based literal; either # or : */ int exponent; /* integer exponent of numeric literal */ int sign; /* sign of numeric literal */ /* skip white space */ while (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\0') nextch(); /* check for alpha */ if (isalpha(ch)) { k = 0; while (isalpha(ch) || isdigit(ch)) { if (isalpha(ch)) ch = toupper(ch); id[k++] = ch; if (ch == '_') { nextch(); if (isalpha(ch) || isdigit(ch)) id[k++] = '_'; else yyerror("letter or digit expected"); } else nextch(); } id[k] = '\0'; k = binary(id); if (k == -1) return(IDENTIFIER); else return(k); } /* check for number */ else if (isdigit(ch)) { base = 10; inum = getinteger(); if (ch == '#' || ch == ':') { /* process based number */ based_ch = ch; nextch(); base = inum; inum = getinteger(); if (ch == based_ch) nextch(); else if (ch == '.') { rnum = inum + getfraction(); if (ch == based_ch) nextch(); else yyerror("mismatched # or : in based number"); } } else if (ch == '.') { /* process real number */ nextch(); if (ch == '.') --cc; else rnum = inum + getfraction(); } if (ch == 'E' || ch == 'e') { /* process exponent */ nextch(); if (ch == '+') nextch(); else if (ch == '-') { sign = -1; nextch(); } exponent = getinteger(); } return(NUMERIC_LITERAL); } else { switch(ch) { case '"' : while(1) { nextch(); if (ch == '"') { nextch(); if (ch != '"') return(STRING_LITERAL); } } case '\'' : nextch(); if (isalpha(ch) || ch == '(') { nextch(); if (ch == '\'') { nextch(); return(CHARACTER_LITERAL); } else { --cc; return(QUOTE); } } else { nextch(); if (ch != '\'') yyerror("quote expected"); else nextch(); return(CHARACTER_LITERAL); } case ':' : nextch(); if (ch == '=') { nextch(); return(REPLACEMENT); } else return(COLON); case '<' : nextch(); if (ch == '<') { nextch(); return(LEFT_LABEL); } else if (ch == '=') { nextch(); return(LESS_EQUAL); } else if (ch == '>') { nextch(); return(BOX); } else return(LESS_THAN); case '>' : nextch(); if (ch == '>') { nextch(); return(RIGHT_LABEL); } else if (ch == '=') { nextch(); return(GREATER_EQUAL); } else return(GREATER_THAN); case '.' : nextch(); if (ch == '.') { nextch(); return(ELLIPSIS); } else return(PERIOD); case '-' : nextch(); if (ch == '-') { cc = ll; ch = ' '; return(yylex()); } else return(MINUS); case '*' : nextch(); if (ch == '*') { nextch(); return(DOUBLE_STAR); } else return(SPLAT); case '=' : nextch(); if (ch == '>') { nextch(); return(ARROW); } else return(EQUAL_TO); case '/' : nextch(); if (ch == '=') { nextch(); return(NOT_EQUAL_TO); } else return(SLASH); case '+' : nextch(); return(PLUS); case '|' : nextch(); return(BAR); case '&' : nextch(); return(AMPERSAND); case ';' : nextch(); return(SEMICOLON); case ',' : nextch(); return(COMMA); case '(' : nextch(); return(LEFT_PAREN); case ')' : nextch(); return(RIGHT_PAREN); case EOF : if (endoffile == 0) { endoffile = 1; return(EOF); } else { yyerror("unexpected end of file"); exit(); } default : yyerror("invalid character"); return(yylex()); } } }