sigma@usl.UUCP (Spyridon Triantafyllopoulos) (01/16/85)
Finally, I got together (with the help of many net-people, whom I wish to thank), some examples and doc on yacc and lex. For additional documentation, the UNIX programming environment book by Kernigham and Pike has a chapter on them, chapter 8. There is also some stuff on lexical analysers and lex on Aho's book Principles of Compiler Design. (The one with the Dragon in the Cover). Additional examples (if you have BSD 4.2) can be found in the source codes from Berkeley. I found a Pascal grammar, and grammars for awk, grep, and other amenities. The archive below contains the examples received (the small ones), plus the UNIX programmer manual examples. Enjoy them!!!! -- Spiros Spiros Triantafyllopoulos <> USENET {ut-sally, akgua}!usl!sigma Computer Science Dept, USL <> CSNet TriantafyllopoulosS%usl@csnet-relay.ARPA "This file contains no opinions whatsoever" -------------- c u t h e r e -------------- a n d h e r e ------------- !<arch> lex1 472255039 4 32 100644 465 ` %% [ \t]+$ ; ##################################################### %% [ \t]+$ ; [ \t]+ printf(" "); ##################################################### %% int k; [0-9]+ { sscanf(yytext,"%d",&k); if (k % 7 == 0) printf("%d",k + 3); else printf("%d",k); } yacc1 472255160 4 32 100644 869 ` %token abc %% A : abc { printf("hello abc\n"); } %% int yya = 1; yylex () { extern int yya; if( yya++ == 1 ) return(257); else exit(0) ; } ##################################################### %token month31 1 %token February 2 %token month30 3 %token firstdays 4 %token twonine 5 %token middledays 6 %token lateday 7 %token lastday 8 %token year 9 %token blank 10 %token comma 11 %start date %% date:month blank day comma blank year {printf("\nok");} month:month31|month30|February ; day:firstdays|twonine|middledays|lateday|lastday ; ##################################################### %token DING 1 DONG 2 DELL 3 %% rhyme : sound place ; sound : DING DONG ; place : DELL ; #################################################### %right '=' %left '+' '-' %left '*' '/' %token V 257 %% E : E '=' E | E '+' E | E '-' E | E '*' E | E '/' E | V ; yacc2 472351254 4 32 100644 2322 ` /* A Simple Example This example gives the Yacc specification for a small desk calculator; arithmetic expressions made up of the operators +, \-, *, /, % (mod operator), & (bitwise and), | (bitwise or), and assignment. If an expression at the top level is an assignment, the value is not printed; otherwise it is. As in C, an integer that begins with 0 (zero) is assumed to be octal; otherwise, it is assumed to be decimal. As an example of a Yacc specification, the desk calculator does a reasonable job of showing how precedences and ambiguities are used, and demonstrating simple error recovery. The major oversimplifications are that the lexical analysis is much simpler than for most applications, and the Note the way that decimal and octal are read in by the grammar rules; */ %{ # include <stdio.h> # include <ctype.h> int regs[26]; int base; %} %start list %token DIGIT LETTER %left '|' %left '&' %left '+' '-' %left '*' '/' '%' %left UMINUS /* supplies precedence for unary minus */ %% /* beginning of rules section */ list : /* empty */ | list stat '\n' | list error '\n' { yyerrok; } ; stat : expr { printf( "%d\n", $1 ); } | LETTER '=' expr { regs[$1] = $3; } ; expr : '(' expr ')' { $$ = $2; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } | expr '/' expr { $$ = $1 / $3; } | expr '%' expr { $$ = $1 % $3; } | expr '&' expr { $$ = $1 & $3; } | expr '|' expr { $$ = $1 | $3; } | '-' expr %prec UMINUS { $$ = -$2; } | LETTER { $$ = regs[$1]; } | number ; number : DIGIT { $$ = $1; base = ($1==0) ? 8 : 10; } | number DIGIT { $$ = base * $1 + $2; } ; %% /* start of programs */ int c; while( (c=getchar()) == ' ' ) { /* skip blanks */ } /* c is now nonblank */ if( islower( c ) ) { yylval = c - 'a'; return ( LETTER ); } if( isdigit( c ) ) { yylval = c - '0'; return( DIGIT ); } return( c ); } yyerror( ) { /* error handle routine */ printf("error here..\n"); } main() { return(yyparse()); } yacc3 472255563 4 32 100644 4409 ` %{ # include <stdio.h> # include <ctype.h> typedef struct interval { double lo, hi; } INTERVAL; INTERVAL vmul(), vdiv(); double atof(); double dreg[ 26 ]; INTERVAL vreg[ 26 ]; %} %start lines %union { int ival; double dval; INTERVAL vval; } %token <ival> DREG VREG /* indices into dreg, vreg arrays */ %token <dval> CONST /* floating point constant */ %type <dval> dexp /* expression */ %type <vval> vexp /* interval expression */ /* precedence information about the operators */ %left \'+\' \'\-\' %left \'*\' \'/\' %left UMINUS /* precedence for unary minus */ %% lines : /* empty */ | lines line ; line : dexp \'\en\' { printf( "%15.8f\en", $1 ); } | vexp \'\en\' { printf( "(%15.8f , %15.8f )\en", $1.lo, $1.hi ); } | DREG \'=\' dexp \'\en\' { dreg[$1] = $3; } | VREG \'=\' vexp \'\en\' { vreg[$1] = $3; } | error \'\en\' { yyerrok; } ; dexp : CONST | DREG { $$ = dreg[$1]; } | dexp \'+\' dexp { $$ = $1 + $3; } | dexp \'\-\' dexp { $$ = $1 \- $3; } | dexp \'*\' dexp { $$ = $1 * $3; } | dexp \'/\' dexp { $$ = $1 / $3; } | \'\-\' dexp %prec UMINUS { $$ = \- $2; } | \'(\' dexp \')\' { $$ = $2; } ; vexp : dexp { $$.hi = $$.lo = $1; } | \'(\' dexp \',\' dexp \')\' { $$.lo = $2; $$.hi = $4; if( $$.lo > $$.hi ){ printf( "interval out of order\en" ); YYERROR; } } | VREG { $$ = vreg[$1]; } | vexp \'+\' vexp { $$.hi = $1.hi + $3.hi; $$.lo = $1.lo + $3.lo; } | dexp \'+\' vexp { $$.hi = $1 + $3.hi; $$.lo = $1 + $3.lo; } | vexp \'\-\' vexp { $$.hi = $1.hi \- $3.lo; $$.lo = $1.lo \- $3.hi; } | dexp \'\-\' vexp { $$.hi = $1 \- $3.lo; $$.lo = $1 \- $3.hi; } | vexp \'*\' vexp { $$ = vmul( $1.lo, $1.hi, $3 ); } | dexp \'*\' vexp { $$ = vmul( $1, $1, $3 ); } | vexp \'/\' vexp { if( dcheck( $3 ) ) YYERROR; $$ = vdiv( $1.lo, $1.hi, $3 ); } | dexp \'/\' vexp { if( dcheck( $3 ) ) YYERROR; $$ = vdiv( $1, $1, $3 ); } | \'\-\' vexp %prec UMINUS { $$.hi = \-$2.lo; $$.lo = \-$2.hi; } | \'(\' vexp \')\' { $$ = $2; } ; %% # define BSZ 50 /* buffer size for floating point numbers */ /* lexical analysis */ yylex(){ register c; while( (c=getchar()) == \' \' ){ /* skip over blanks */ } if( isupper( c ) ){ yylval.ival = c \- \'A\'; return( VREG ); } if( islower( c ) ){ yylval.ival = c \- \'a\'; return( DREG ); } if( isdigit( c ) || c==\'.\' ){ /* gobble up digits, points, exponents */ char buf[BSZ+1], *cp = buf; int dot = 0, exp = 0; for( ; (cp\-buf)<BSZ ; ++cp,c=getchar() ){ *cp = c; if( isdigit( c ) ) continue; if( c == \'.\' ){ if( dot++ || exp ) return( \'.\' ); /* will cause syntax error */ continue; } if( c == \'e\' ){ if( exp++ ) return( \'e\' ); /* will cause syntax error */ continue; } /* end of number */ break; } *cp = \'\e0\'; if( (cp\-buf) >= BSZ ) printf( "constant too long: truncated\en" ); else ungetc( c, stdin ); /* push back last char read */ yylval.dval = atof( buf ); return( CONST ); } return( c ); } INTERVAL hilo( a, b, c, d ) double a, b, c, d; { /* returns the smallest interval containing a, b, c, and d */ /* used by *, / routines */ INTERVAL v; if( a>b ) { v.hi = a; v.lo = b; } else { v.hi = b; v.lo = a; } if( c>d ) { if( c>v.hi ) v.hi = c; if( d<v.lo ) v.lo = d; } else { if( d>v.hi ) v.hi = d; if( c<v.lo ) v.lo = c; } return( v ); } INTERVAL vmul( a, b, v ) double a, b; INTERVAL v; { return( hilo( a*v.hi, a*v.lo, b*v.hi, b*v.lo ) ); } dcheck( v ) INTERVAL v; { if( v.hi >= 0. && v.lo <= 0. ){ printf( "divisor interval contains 0.\en" ); return( 1 ); } return( 0 ); } INTERVAL vdiv( a, b, v ) double a, b; INTERVAL v; { return( hilo( a/v.hi, a/v.lo, b/v.hi, b/v.lo ) ); } yacc4 472967049 4 32 100644 1333 ` /* Here is a SIMPLE yacc program that evaluates logical expressions. The operators are: ~ not & logical and | " or > " implication (if -> then, onlyif) = " equivalence (if and only if) these are evaluated by '('s or in the order shown: not, and, or, >, = examples are: ((t&f)>F)|[f=~T] (t&f) ([f]|~T|T|~[F>~T]) T F&~f get it? */ %{ %} %token BOOLEAN %left '&' '|' '>' '=' %left '~' %% list: | list '\n' | list expr '\n' { printf(" %c\n",$2 ? 'T': 'F'); } | list expr ',' { printf(" %c,",$2 ? 'T': 'F'); } ; expr: BOOLEAN { $$ = $1; } | '~' expr { $$ = (!$2); } | expr '&' expr { $$ = ($1 && $3); } | expr '|' expr { $$ = ($1 || $3); } | expr '>' expr { $$ = ((!$1) || $3); } | expr '=' expr { $$ = ($1 == $3); } | '(' expr ')' { $$ = $2; } | '[' expr ']' { $$ = $2; } ; %% #include <stdio.h> main( ) { yyparse( ); } yyerror() { printf("syntax error\n"); } yylex() { int c; while((c = getchar()) == ' ' || c == '\t'); if (c == EOF) return(0); /*accept t, f, T, F */ if ((c == 't') || (c == 'T') || (c == 'f') || (c == 'F')) { yylval = ((c == 't') || (c == 'T')); return (BOOLEAN); } return (c); } -------- c u t h e r e a l s o ------------------ have fun ------------- Software Disclaimer: This terminal assumes no responsibility for the correctness of the above stuff. Legal actions can be redirected to my lawyer at /dev/null or any other suitable environment. Copyright rights that might have been violated have my sympathy. -- -- Spiros