smith_3@h-sc1.UUCP (joel smith) (05/03/85)
The problem of reading in a roman numeral is a rather complex one. Namely, not all digits can be repeated 3 times and only certain prefix combinations are allowed: CM, CD, XC, XL, IX, IV. Therefore the number 999 is not "IM" as it is tempting to write it. 999 is actually: "CMXCIX". A bit long but syntactically correct. Below is a YACC grammar that creates a program to read roman numerals. For those unfamiliar with YACC, it creates a parser for a specified input grammar. In order to create the program, you must be on a UNIX system with YACC. If so, type "yacc <filename><return>". When this returns type "cc y.tab.c<return>". Joel Smith Harvard University ------------------------cut here-------------------------------- /* glossary of non-terminals: rnumber a roman number (0-3999) less_thou 0-999 five_hund 0-899 hund 0-399 less_hund 0-99 fifty 0-89 ten 0-39 less_ten 0-9 five 0-8 one 0-3 */ %{ #include <stdio.h> #include <ctype.h> #define TRUE 1 #define FALSE 0 %} %start rnumber %% /* thousands */ rnumber : 'M' less_thou {$$ = $2 + 1000;} | 'M''M' less_thou {$$ = $3 + 2000;} | 'M''M''M' less_thou {$$ = $4 + 3000;} | less_thou ; less_thou : 'C''M' less_hund {$$ = $3 + 900;} | five_hund ; /* five hundreds */ five_hund : hund | 'D' hund {$$ = $2 + 500;} | 'C''D' less_hund {$$ = $3 + 400;} ; /* hundreds */ hund : 'C' less_hund {$$ = $2 + 100;} | 'C''C' less_hund {$$ = $3 + 200;} | 'C''C''C' less_hund {$$ = $4 + 300;} | less_hund ; less_hund : 'X''C' less_ten {$$ = $3 + 90;} | fifty ; /* fifty */ fifty : ten | 'L' ten {$$ = $2 + 50;} | 'X''L' less_ten {$$ = $3 + 40;} ; /* tens */ ten : 'X' less_ten {$$ = $2 + 10;} | 'X''X' less_ten {$$ = $3 + 20;} | 'X''X''X' less_ten {$$ = $4 + 30;} | less_ten ; less_ten : 'I''X' {$$ = 9;} | five ; /* fives */ five : one | 'V' one {$$ = $2 + 5;} | 'I''V' {$$ = 4;} ; /* ones */ one : 'I' {$$ = 1;} | 'I''I' {$$ = 2;} | 'I''I''I' {$$ = 3;} | 'J' {$$ = 1;} | 'I''J' {$$ = 2;} | 'I''I''J' {$$ = 3;} | {$$ = 0;} ; %% yylex() { int next; char error[25]; next = getchar(); if (next == EOF) /* check for end */ exit(0); if (next == '\n') return(0); /* end token character */ if (isalpha(next)) /* convert to upper case */ if (islower(next)) next = toupper(next); if (is_roman(next) == FALSE) { sprintf(error,"Illegal Character: %c",next); yyerror(error); } else return(next); } yyerror(s) char *s; { fprintf(stderr,"%s\n",s); while (getchar() != '\n'); } is_roman(s) int s; { if (index("MDCLXVIJ",s) == NULL) return(FALSE); else return(TRUE); } main() { printf("Roman to decimal Converter\n"); while (TRUE) { printf("--> "); if (yyparse() == 0) printf("%d\n", yyval); } }