[net.sources] roman numbers to decimal

smith_3@h-sc1.UUCP (joel smith) (05/03/85)

	The problem of reading in a roman numeral is a rather complex one.

Namely, not all digits can be repeated 3 times and only certain prefix

combinations are allowed: CM, CD, XC, XL, IX, IV.

Therefore the number 999 is not "IM" as it is tempting to write it.

999 is actually: "CMXCIX".  A bit long but syntactically correct.

Below is a YACC grammar that creates a program to read roman numerals.

For those unfamiliar with YACC, it creates a parser for a specified input

grammar.  In order to create the program, you must be on a UNIX system with

YACC.  If so,  type "yacc <filename><return>".  When this returns type

"cc y.tab.c<return>".


						Joel Smith
						Harvard University

------------------------cut here--------------------------------
/*	glossary of non-terminals:

	rnumber		a roman number	(0-3999)
	less_thou	0-999
	five_hund	0-899
	hund		0-399
	less_hund	0-99
	fifty		0-89
	ten		0-39
	less_ten	0-9
	five		0-8
	one		0-3
*/
%{
#include <stdio.h>
#include <ctype.h>

#define TRUE 1
#define FALSE 0

%}
%start rnumber

%%
/* thousands */
rnumber :	  'M' less_thou		{$$ = $2 + 1000;}
		| 'M''M' less_thou	{$$ = $3 + 2000;}
		| 'M''M''M' less_thou	{$$ = $4 + 3000;}
		| less_thou
		;

less_thou :	  'C''M' less_hund	{$$ = $3 + 900;}
		| five_hund
		;

/* five hundreds */
five_hund :	  hund
		| 'D' hund		{$$ = $2 + 500;}
		| 'C''D' less_hund	{$$ = $3 + 400;}
		;

/* hundreds */
hund :		  'C' less_hund		{$$ = $2 + 100;}
		| 'C''C' less_hund	{$$ = $3 + 200;}
		| 'C''C''C' less_hund	{$$ = $4 + 300;}
		| less_hund
		;

less_hund :	  'X''C' less_ten	{$$ = $3 + 90;}
		| fifty
		;

/* fifty */
fifty :		  ten
		| 'L' ten		{$$ = $2 + 50;}
		| 'X''L' less_ten	{$$ = $3 + 40;}
		;

/* tens */
ten :		  'X' less_ten		{$$ = $2 + 10;}
		| 'X''X' less_ten	{$$ = $3 + 20;}
		| 'X''X''X' less_ten	{$$ = $4 + 30;}
		| less_ten
		;

less_ten :	  'I''X'		{$$ = 9;}
		| five
		;

/* fives */
five :		  one
		| 'V' one		{$$ = $2 + 5;}
		| 'I''V'		{$$ = 4;}
		;

/* ones */
one :		  'I'			{$$ = 1;}
		| 'I''I'		{$$ = 2;}
		| 'I''I''I'		{$$ = 3;}
		| 'J'			{$$ = 1;}
		| 'I''J'		{$$ = 2;}
		| 'I''I''J'		{$$ = 3;}
		|			{$$ = 0;}
		;
%%

yylex()
{
	int next;
	char error[25];

	next = getchar();

	if (next == EOF)	/* check for end */
		exit(0);

	if (next == '\n')
		return(0);	/* end token character */

	if (isalpha(next))	/* convert to upper case */
		if (islower(next))
			next = toupper(next);

	if (is_roman(next) == FALSE) {
		sprintf(error,"Illegal Character: %c",next);
		yyerror(error);
	}
	else return(next);
}

yyerror(s)
char *s;
{
	fprintf(stderr,"%s\n",s);
	while (getchar() != '\n');
}

is_roman(s)
int s;
{
	if (index("MDCLXVIJ",s) == NULL)
		return(FALSE);
	else return(TRUE);
}

main()
{
	printf("Roman to decimal Converter\n");
	while (TRUE) {
		printf("--> ");
		if (yyparse() == 0)
			printf("%d\n", yyval);
	}
}