[net.sources] Scanner for Ada parser

david@ssc-vax.UUCP (David Norris) (12/07/83)
	An earlier article, containing my revised grammar for MIL-STD Ada,
generated some interest.  Rather than send the C source of the lexical
analyzer via mail, I am posting it so all may benefit.  There is much room for
improvement, both in the grammar (error recovery) and the scanner.  Comments
or improvements greatly appreciated.

	-- Dave Norris
	-- ..!uw-beaver!ssc-vax!david

-----------------------------------------------------------------------------
#include <stdio.h>
#include <ctype.h>
#include "y.tab.c"

char id[20];	/* identifier from yylex */

int inum;	/* integer from yylex */
double rnum;	/* real number from yylex */
int i;		/* numeric value of character in number */
int base;	/* base of based number */

int sleng;	/* string length */

#define MAXLINE 132
int ch;			/* last character read from source program */
char line[MAXLINE];	/* current line of source text */

int cc;		/* character counter */
int lc;		/* program location counter */
int ll ;	/* length of current line */
int endoffile;	/* end of file indicator */

struct key {
	char *keyword;
	int keyvalue;
} keywordtab[] = {
	"ABORT",	ABORT,
	"ABS",		ABS,
	"ACCEPT",	ACCEPT,
	"ACCESS",	ACCESS,
	"ALL",		ALL,
	"AND",		AND,
	"ARRAY",	ARRAY,
	"AT",		AT,
	"BEGIN",	BEGIN,
	"BODY",		BODY,
	"CASE",		CASE,
	"CONSTANT",	CONSTANT,
	"DECLARE",	DECLARE,
	"DELAY",	DELAY,
	"DELTA",	DELTA,
	"DIGITS",	DIGITS,
	"DO",		DO,
	"ELSE",		ELSE,
	"ELSIF",	ELSIF,
	"END",		END,
	"ENTRY",	ENTRY,
	"EXCEPTION",	EXCEPTION,
	"EXIT",		EXIT,
	"FOR",		FOR,
	"FUNCTION",	FUNCTION,
	"GENERIC",	GENERIC,
	"GOTO",		GOTO,
	"IF",		IF,
	"IN",		IN,
	"IS",		IS,
	"LIMITED",	LIMITED,
	"LOOP",		LOOP,
	"MOD",		MOD,
	"NEW",		NEW,
	"NOT",		NOT,
	"NULL",		NULL,
	"OF",		OF,
	"OR",		OR,
	"OTHERS",	OTHERS,
	"OUT",		OUT,
	"PACKAGE",	PACKAGE,
	"PRAGMA",	PRAGMA,
	"PRIVATE",	PRIVATE,
	"PROCEDURE",	PROCEDURE,
	"RAISE",	RAISE,
	"RANGE",	RANGE,
	"RECORD",	RECORD,
	"REM",		REM,
	"RENAMES",	RENAMES,
	"RETURN",	RETURN,
	"REVERSE",	REVERSE,
	"SELECT",	SELECT,
	"SEPARATE",	SEPARATE,
	"SUBTYPE",	SUBTYPE,
	"TASK",		TASK,
	"TERMINATE",	TERMINATE,
	"THEN",		THEN,
	"TYPE",		TYPE,
	"USE",		USE,
	"WHEN",		WHEN,
	"WHILE",	WHILE,
	"WITH",		WITH,
	"XOR",		XOR
};

#define NKEYS (sizeof(keywordtab) / sizeof(struct key))

main()
{
	lc = 0;		/* reset line count */
	ll = 0;		/* reset line length */
	cc = 0;		/* reset character count */
	ch = ' ';	/* reset ch (fool yylex into getting first token */
	endoffile = 0;	/* reset end of file indicator */

	printf("Ada compiler\n\n");
	if (yyparse() == 0)
		printf("\n%d syntax error(s)",yynerrs);
	else
		printf("\ncompilation aborted.\n");
}

yyerror(s) char *s;
{
	int i;

	printf("**-=>  ");
	for (i = 0; i < cc; i++) printf(" ");
	printf("^ ");
	printf("%s\n",s);
}

binary(word)
char *word;
{
	int low,high,mid,cond;

	low = 0;
	high = NKEYS - 1;
	while (low <= high) {
		mid = (low+high) / 2;
		if ((cond = strcmp(word,keywordtab[mid].keyword)) < 0)
			high = mid - 1;
		else if (cond > 0)
			low = mid + 1;
		else
			return(keywordtab[mid].keyvalue);
	}
	return(-1);
}

nextch()
{
	int lim;

	if (cc == ll) {
		cc = -1;
		ll = 0;
		lim = MAXLINE;
		while (--lim > 0 && (ch=getchar())!=EOF && ch!='\n')
			line[ll++] = ch;
		if (ch == EOF)
			return EOF;
		if (ch == '\n')
			line[ll++] = ch;
		line[ll] = '\0';
		printf("%5d : %s",++lc,line);
	}
	ch = line[++cc];
}

/* compute value of character ch using base.  return true if */
/* value is acceptable in the given number base.             */
inbase()
{
	if (isdigit(ch))
		i = ch - '0';
	else if (toupper(ch) >= 'A' && toupper(ch) <= 'F')
		i = ch - 'A' + 10;
	else
		return(0);
	
	if (i < base)
		return(1);
	else
		return(0);
}

/* return integer value of the string of input digits. */
getinteger()
{
	int num;

	num = 0;
	while (inbase()) {
		num = num * base + i;
		nextch();
		if (ch == '_') {
			nextch();
			if (inbase() == 0)
				yyerror("extended digit expected");
		}
	}
	return(num);
}

getfraction()
{
	double num;
	double divi;

	divi = 1.0 / base;
	while (inbase()) {
		num = num + i * divi;
		divi = divi / base;
		nextch();
		if (ch == '_') {
			nextch();
			if (inbase() == 0)
				yyerror("extended digit expected");
		}
	}
	return(num);
}

yylex()
{
	int k;
	char based_ch;	/* character denoting based literal; either # or : */
	int exponent;	/* integer exponent of numeric literal */
	int sign;	/* sign of numeric literal */

	/* skip white space */
	while (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\0')
		nextch();
	
	/* check for alpha */
	if (isalpha(ch)) {
		k = 0;
		while (isalpha(ch) || isdigit(ch)) {
			if (isalpha(ch)) ch = toupper(ch);
			id[k++] = ch;
			if (ch == '_') {
				nextch();
				if (isalpha(ch) || isdigit(ch))
					id[k++] = '_';
				else
					yyerror("letter or digit expected");
			} else
				nextch();
		}
		id[k] = '\0';
		k = binary(id);
		if (k == -1)
			return(IDENTIFIER);
		else
			return(k);
	}

	/* check for number */
	else if (isdigit(ch)) {
		base = 10;
		inum = getinteger();
		if (ch == '#' || ch == ':') {
			/* process based number */
			based_ch = ch;
			nextch();
			base = inum;
			inum = getinteger();
			if (ch == based_ch) nextch();
			else if (ch == '.') {
				rnum = inum + getfraction();
				if (ch == based_ch) nextch();
				else
					yyerror("mismatched # or : in based number");
			}
		}
		else if (ch == '.') {
			/* process real number */
			nextch();
			if (ch == '.')
				--cc;
			else
				rnum = inum + getfraction();
		}

		if (ch == 'E' || ch == 'e') {
			/* process exponent */
			nextch();
			if (ch == '+') 
				nextch();
			else if (ch == '-') {
				sign = -1;
				nextch();
			}
			exponent = getinteger();
		}
		return(NUMERIC_LITERAL);
	}

	else {
		switch(ch) {
			case '"' :
				while(1) {
					nextch();
					if (ch == '"') {
						nextch();
						if (ch != '"')
							return(STRING_LITERAL);
					}
				}

			case '\'' :
				nextch();
				if (isalpha(ch) || ch == '(') {
					nextch();
					if (ch == '\'') {
						nextch();
						return(CHARACTER_LITERAL);
					} else {
						--cc;
						return(QUOTE);
					}
				} else {
					nextch();
					if (ch != '\'')
						yyerror("quote expected");
					else
						nextch();
					return(CHARACTER_LITERAL);
				}

			case ':' :
				nextch();
				if (ch == '=') {
					nextch();
					return(REPLACEMENT);
				}
				else
					return(COLON);

			case '<' :
				nextch();
				if (ch == '<') {
					nextch();
					return(LEFT_LABEL);
				}
				else if (ch == '=') {
					nextch();
					return(LESS_EQUAL);
				}
				else if (ch == '>') {
					nextch();
					return(BOX);
				}
				else
					return(LESS_THAN);
	

			case '>' :
				nextch();
				if (ch == '>') {
					nextch();
					return(RIGHT_LABEL);
				}
				else if (ch == '=') {
					nextch();
					return(GREATER_EQUAL);
				}
				else
					return(GREATER_THAN);

			case '.' :
				nextch();
				if (ch == '.') {
					nextch();
					return(ELLIPSIS);
				}
				else
					return(PERIOD);

			case '-' :
				nextch();
				if (ch == '-') {
					cc = ll;
					ch = ' ';
					return(yylex());
				}
				else
					return(MINUS);

			case '*' :
				nextch();
				if (ch == '*') {
					nextch();
					return(DOUBLE_STAR);
				}
				else
					return(SPLAT);

			case '=' :
				nextch();
				if (ch == '>') {
					nextch();
					return(ARROW);
				}
				else
					return(EQUAL_TO);

			case '/' :
				nextch();
				if (ch == '=') {
					nextch();
					return(NOT_EQUAL_TO);
				}
				else
					return(SLASH);

			case '+' :
				nextch();
				return(PLUS);

			case '|' :
				nextch();
				return(BAR);

			case '&' :
				nextch();
				return(AMPERSAND);

			case ';' :
				nextch();
				return(SEMICOLON);

			case ',' :
				nextch();
				return(COMMA);

			case '(' :
				nextch();
				return(LEFT_PAREN);

			case ')' :
				nextch();
				return(RIGHT_PAREN);

			case EOF :
				if (endoffile == 0) {
					endoffile = 1;
					return(EOF);
				}
				else {
					yyerror("unexpected end of file");
					exit();
				}

			default :
				yyerror("invalid character");
				return(yylex());
		}
	}
}