[net.sources] English to Phoneme translation

wasser_1@viking.DEC (John A. Wasser) (03/28/85)

	Here are the programs you need to translate English text to
	phonemes.  The translation rules are from a government study
	and are probably in the public domain.  The rest of the code
	was written by me and is now in the public domain.  Have fun!

		-John A. Wasser

p.s.  If you don't like the way it works, you have my permission to
      fix it without complaining to me first.

Work address:
ARPAnet:	WASSER%VIKING.DEC@decwrl.ARPA
Usenet:		{allegra,Shasta,decvax}!decwrl!dec-rhea!dec-viking!wasser
USPS:		Digital Equipment Corp.
		Mail stop: LJO2/E4
		30 Porter Rd
		Littleton, MA  01460

-------------------------------------------------------------------------------
Programmers information:

	PARSE.C is the main program.

		PARSE [infile [outfile]]

	It breaks the input file up into words and translates them
	individually.  If a word is made of digits, the digits
	are said individually and a terminating "." is treated as a
	decimal point.  Words containing both letters and digits
	will produce unexpected results.  This program is a small
	example and can easily be improved apon.


	ENGLISH.C contains the English to Phoneme rules.

	PHONEME.C contains the translation procedure for each word.

-------------------------------------------------------------------------------
ENGLISH.C
-------------------------------------------------------------------------------
/*
**	English to Phoneme rules.
**
**	Derived from: 
**
**	     AUTOMATIC TRANSLATION OF ENGLISH TEXT TO PHONETICS
**	            BY MEANS OF LETTER-TO-SOUND RULES
**
**			NRL Report 7948
**
**		      January 21st, 1976
**	    Naval Research Laboratory, Washington, D.C.
**
**
**	Published by the National Technical Information Service as
**	document "AD/A021 929".
**
**
**
**	The Phoneme codes:
**
**		IY	bEEt		IH	bIt
**		EY	gAte		EH	gEt
**		AE	fAt		AA	fAther
**		AO	lAWn		OW	lOne
**		UH	fUll		UW	fOOl
**		ER	mURdER		AX	About
**		AH	bUt		AY	hIde
**		AW	hOW		OY	tOY
**	
**		p	Pack		b	Back
**		t	Time		d	Dime
**		k	Coat		g	Goat
**		f	Fault		v	Vault
**		TH	eTHer		DH	eiTHer
**		s	Sue		z	Zoo
**		SH	leaSH		ZH	leiSure
**		HH	How		m	suM
**		n	suN		NG	suNG
**		l	Laugh		w	Wear
**		y	Young		r	Rate
**		CH	CHar		j	Jar
**		WH	WHere
**
**
**	Rules are made up of four parts:
**	
**		The left context.
**		The text to match.
**		The right context.
**		The phonemes to substitute for the matched text.
**
**	Procedure:
**
**		Seperate each block of letters (apostrophes included) 
**		and add a space on each side.  For each unmatched 
**		letter in the word, look through the rules where the 
**		text to match starts with the letter in the word.  If 
**		the text to match is found and the right and left 
**		context patterns also match, output the phonemes for 
**		that rule and skip to the next unmatched letter.
**
**
**	Special Context Symbols:
**
**		#	One or more vowels
**		:	Zero or more consonants
**		^	One consonant.
**		.	One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
**			consonants)
**		%	One of ER, E, ES, ED, ING, ELY (a suffix)
**			(Found in right context only)
**		+	One of E, I or Y (a "front" vowel)
**
*/


/* Context definitions */
static char Anything[] = "";	/* No context requirement */
static char Nothing[] = " ";	/* Context is beginning or end of word */

/* Phoneme definitions */
static char Pause[] = " ";	/* Short silence */
static char Silent[] = "";	/* No phonemes */

#define LEFT_PART	0
#define MATCH_PART	1
#define RIGHT_PART	2
#define OUT_PART	3

typedef char *Rule[4];	/* Rule is an array of 4 character pointers */

/*0 = Punctuation */
/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule punct_rules[] =
	{
	{Anything,	" ",		"'",		Silent	},
	{Anything,	" ",		Anything,	Pause	},
	{Anything,	"-",		Anything,	Silent	},
	{".",		"'S",		Anything,	"z"	},
	{"#:.E",	"'S",		Anything,	"z"	},
	{"#",		"'S",		Anything,	"z"	},
	{Anything,	"'",		Anything,	Silent	},
	{Anything,	",",		Anything,	Pause	},
	{Anything,	".",		Anything,	Pause	},
	{Anything,	"?",		Anything,	Pause	},
	{Anything,	"!",		Anything,	Pause	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule A_rules[] =
	{
	{Anything,	"A",		Nothing,	"AX"	},
	{Nothing,	"ARE",		Nothing,	"AAr"	},
	{Nothing,	"AR",		"O",		"AXr"	},
	{Anything,	"AR",		"#",		"EHr"	},
	{"^",		"AS",		"#",		"EYs"	},
	{Anything,	"A",		"WA",		"AX"	},
	{Anything,	"AW",		Anything,	"AO"	},
	{" :",		"ANY",		Anything,	"EHnIY"	},
	{Anything,	"A",		"^+#",		"EY"	},
	{"#:",		"ALLY",		Anything,	"AXlIY"	},
	{Nothing,	"AL",		"#",		"AXl"	},
	{Anything,	"AGAIN",	Anything,	"AXgEHn"},
	{"#:",		"AG",		"E",		"IHj"	},
	{Anything,	"A",		"^+:#",		"AE"	},
	{" :",		"A",		"^+ ",		"EY"	},
	{Anything,	"A",		"^%",		"EY"	},
	{Nothing,	"ARR",		Anything,	"AXr"	},
	{Anything,	"ARR",		Anything,	"AEr"	},
	{" :",		"AR",		Nothing,	"AAr"	},
	{Anything,	"AR",		Nothing,	"ER"	},
	{Anything,	"AR",		Anything,	"AAr"	},
	{Anything,	"AIR",		Anything,	"EHr"	},
	{Anything,	"AI",		Anything,	"EY"	},
	{Anything,	"AY",		Anything,	"EY"	},
	{Anything,	"AU",		Anything,	"AO"	},
	{"#:",		"AL",		Nothing,	"AXl"	},
	{"#:",		"ALS",		Nothing,	"AXlz"	},
	{Anything,	"ALK",		Anything,	"AOk"	},
	{Anything,	"AL",		"^",		"AOl"	},
	{" :",		"ABLE",		Anything,	"EYbAXl"},
	{Anything,	"ABLE",		Anything,	"AXbAXl"},
	{Anything,	"ANG",		"+",		"EYnj"	},
	{Anything,	"A",		Anything,	"AE"	},
 	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule B_rules[] =
	{
	{Nothing,	"BE",		"^#",		"bIH"	},
	{Anything,	"BEING",	Anything,	"bIYIHNG"},
	{Nothing,	"BOTH",		Nothing,	"bOWTH"	},
	{Nothing,	"BUS",		"#",		"bIHz"	},
	{Anything,	"BUIL",		Anything,	"bIHl"	},
	{Anything,	"B",		Anything,	"b"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule C_rules[] =
	{
	{Nothing,	"CH",		"^",		"k"	},
	{"^E",		"CH",		Anything,	"k"	},
	{Anything,	"CH",		Anything,	"CH"	},
	{" S",		"CI",		"#",		"sAY"	},
	{Anything,	"CI",		"A",		"SH"	},
	{Anything,	"CI",		"O",		"SH"	},
	{Anything,	"CI",		"EN",		"SH"	},
	{Anything,	"C",		"+",		"s"	},
	{Anything,	"CK",		Anything,	"k"	},
	{Anything,	"COM",		"%",		"kAHm"	},
	{Anything,	"C",		Anything,	"k"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule D_rules[] =
	{
	{"#:",		"DED",		Nothing,	"dIHd"	},
	{".E",		"D",		Nothing,	"d"	},
	{"#^:E",	"D",		Nothing,	"t"	},
	{Nothing,	"DE",		"^#",		"dIH"	},
	{Nothing,	"DO",		Nothing,	"dUW"	},
	{Nothing,	"DOES",		Anything,	"dAHz"	},
	{Nothing,	"DOING",	Anything,	"dUWIHNG"},
	{Nothing,	"DOW",		Anything,	"dAW"	},
	{Anything,	"DU",		"A",		"jUW"	},
	{Anything,	"D",		Anything,	"d"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule E_rules[] =
	{
	{"#:",		"E",		Nothing,	Silent	},
	{"'^:",		"E",		Nothing,	Silent	},
	{" :",		"E",		Nothing,	"IY"	},
	{"#",		"ED",		Nothing,	"d"	},
	{"#:",		"E",		"D ",		Silent	},
	{Anything,	"EV",		"ER",		"EHv"	},
	{Anything,	"E",		"^%",		"IY"	},
	{Anything,	"ERI",		"#",		"IYrIY"	},
	{Anything,	"ERI",		Anything,	"EHrIH"	},
	{"#:",		"ER",		"#",		"ER"	},
	{Anything,	"ER",		"#",		"EHr"	},
	{Anything,	"ER",		Anything,	"ER"	},
	{Nothing,	"EVEN",		Anything,	"IYvEHn"},
	{"#:",		"E",		"W",		Silent	},
	{"T",		"EW",		Anything,	"UW"	},
	{"S",		"EW",		Anything,	"UW"	},
	{"R",		"EW",		Anything,	"UW"	},
	{"D",		"EW",		Anything,	"UW"	},
	{"L",		"EW",		Anything,	"UW"	},
	{"Z",		"EW",		Anything,	"UW"	},
	{"N",		"EW",		Anything,	"UW"	},
	{"J",		"EW",		Anything,	"UW"	},
	{"TH",		"EW",		Anything,	"UW"	},
	{"CH",		"EW",		Anything,	"UW"	},
	{"SH",		"EW",		Anything,	"UW"	},
	{Anything,	"EW",		Anything,	"YUw"	},
	{Anything,	"E",		"O",		"IY"	},
	{"#:S",		"ES",		Nothing,	"IHz"	},
	{"#:C",		"ES",		Nothing,	"IHz"	},
	{"#:G",		"ES",		Nothing,	"IHz"	},
	{"#:Z",		"ES",		Nothing,	"IHz"	},
	{"#:X",		"ES",		Nothing,	"IHz"	},
	{"#:J",		"ES",		Nothing,	"IHz"	},
	{"#:CH",	"ES",		Nothing,	"IHz"	},
	{"#:SH",	"ES",		Nothing,	"IHz"	},
	{"#:",		"E",		"S ",		Silent	},
	{"#:",		"ELY",		Nothing,	"lIY"	},
	{"#:",		"EMENT",	Anything,	"mEHnt"	},
	{Anything,	"EFUL",		Anything,	"fUHl"	},
	{Anything,	"EE",		Anything,	"IY"	},
	{Anything,	"EARN",		Anything,	"ERn"	},
	{Nothing,	"EAR",		"^",		"ER"	},
	{Anything,	"EAD",		Anything,	"EHd"	},
	{"#:",		"EA",		Nothing,	"IYAX"	},
	{Anything,	"EA",		"SU",		"EH"	},
	{Anything,	"EA",		Anything,	"IY"	},
	{Anything,	"EIGH",		Anything,	"EY"	},
	{Anything,	"EI",		Anything,	"IY"	},
	{Nothing,	"EYE",		Anything,	"AY"	},
	{Anything,	"EY",		Anything,	"IY"	},
	{Anything,	"EU",		Anything,	"YUw"	},
	{Anything,	"E",		Anything,	"EH"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule F_rules[] =
	{
	{Anything,	"FUL",		Anything,	"fUHl"	},
	{Anything,	"F",		Anything,	"f"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule G_rules[] =
	{
	{Anything,	"GIV",		Anything,	"gIHv"	},
	{Nothing,	"G",		"I^",		"g"	},
	{Anything,	"GE",		"T",		"gEH"	},
	{"SU",		"GGES",		Anything,	"gjEHs"	},
	{Anything,	"GG",		Anything,	"g"	},
	{" B#",		"G",		Anything,	"g"	},
	{Anything,	"G",		"+",		"j"	},
	{Anything,	"GREAT",	Anything,	"grEYt"	},
	{"#",		"GH",		Anything,	Silent	},
	{Anything,	"G",		Anything,	"g"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule H_rules[] =
	{
	{Nothing,	"HAV",		Anything,	"hAEv"	},
	{Nothing,	"HERE",		Anything,	"hIYr"	},
	{Nothing,	"HOUR",		Anything,	"AWER"	},
	{Anything,	"HOW",		Anything,	"hAW"	},
	{Anything,	"H",		"#",		"h"	},
	{Anything,	"H",		Anything,	Silent	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule I_rules[] =
	{
	{Nothing,	"IN",		Anything,	"IHn"	},
	{Nothing,	"I",		Nothing,	"AY"	},
	{Anything,	"IN",		"D",		"AYn"	},
	{Anything,	"IER",		Anything,	"IYER"	},
	{"#:R",		"IED",		Anything,	"IYd"	},
	{Anything,	"IED",		Nothing,	"AYd"	},
	{Anything,	"IEN",		Anything,	"IYEHn"	},
	{Anything,	"IE",		"T",		"AYEH"	},
	{" :",		"I",		"%",		"AY"	},
	{Anything,	"I",		"%",		"IY"	},
	{Anything,	"IE",		Anything,	"IY"	},
	{Anything,	"I",		"^+:#",		"IH"	},
	{Anything,	"IR",		"#",		"AYr"	},
	{Anything,	"IZ",		"%",		"AYz"	},
	{Anything,	"IS",		"%",		"AYz"	},
	{Anything,	"I",		"D%",		"AY"	},
	{"+^",		"I",		"^+",		"IH"	},
	{Anything,	"I",		"T%",		"AY"	},
	{"#^:",		"I",		"^+",		"IH"	},
	{Anything,	"I",		"^+",		"AY"	},
	{Anything,	"IR",		Anything,	"ER"	},
	{Anything,	"IGH",		Anything,	"AY"	},
	{Anything,	"ILD",		Anything,	"AYld"	},
	{Anything,	"IGN",		Nothing,	"AYn"	},
	{Anything,	"IGN",		"^",		"AYn"	},
	{Anything,	"IGN",		"%",		"AYn"	},
	{Anything,	"IQUE",		Anything,	"IYk"	},
	{Anything,	"I",		Anything,	"IH"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule J_rules[] =
	{
	{Anything,	"J",		Anything,	"j"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule K_rules[] =
	{
	{Nothing,	"K",		"N",		Silent	},
	{Anything,	"K",		Anything,	"k"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule L_rules[] =
	{
	{Anything,	"LO",		"C#",		"lOW"	},
	{"L",		"L",		Anything,	Silent	},
	{"#^:",		"L",		"%",		"AXl"	},
	{Anything,	"LEAD",		Anything,	"lIYd"	},
	{Anything,	"L",		Anything,	"l"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule M_rules[] =
	{
	{Anything,	"MOV",		Anything,	"mUWv"	},
	{Anything,	"M",		Anything,	"m"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule N_rules[] =
	{
	{"E",		"NG",		"+",		"nj"	},
	{Anything,	"NG",		"R",		"NGg"	},
	{Anything,	"NG",		"#",		"NGg"	},
	{Anything,	"NGL",		"%",		"NGgAXl"},
	{Anything,	"NG",		Anything,	"NG"	},
	{Anything,	"NK",		Anything,	"NGk"	},
	{Nothing,	"NOW",		Nothing,	"nAW"	},
	{Anything,	"N",		Anything,	"n"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule O_rules[] =
	{
	{Anything,	"OF",		Nothing,	"AXv"	},
	{Anything,	"OROUGH",	Anything,	"EROW"	},
	{"#:",		"OR",		Nothing,	"ER"	},
	{"#:",		"ORS",		Nothing,	"ERz"	},
	{Anything,	"OR",		Anything,	"AOr"	},
	{Nothing,	"ONE",		Anything,	"wAHn"	},
	{Anything,	"OW",		Anything,	"OW"	},
	{Nothing,	"OVER",		Anything,	"OWvER"	},
	{Anything,	"OV",		Anything,	"AHv"	},
	{Anything,	"O",		"^%",		"OW"	},
	{Anything,	"O",		"^EN",		"OW"	},
	{Anything,	"O",		"^I#",		"OW"	},
	{Anything,	"OL",		"D",		"OWl"	},
	{Anything,	"OUGHT",	Anything,	"AOt"	},
	{Anything,	"OUGH",		Anything,	"AHf"	},
	{Nothing,	"OU",		Anything,	"AW"	},
	{"H",		"OU",		"S#",		"AW"	},
	{Anything,	"OUS",		Anything,	"AXs"	},
	{Anything,	"OUR",		Anything,	"AOr"	},
	{Anything,	"OULD",		Anything,	"UHd"	},
	{"^",		"OU",		"^L",		"AH"	},
	{Anything,	"OUP",		Anything,	"UWp"	},
	{Anything,	"OU",		Anything,	"AW"	},
	{Anything,	"OY",		Anything,	"OY"	},
	{Anything,	"OING",		Anything,	"OWIHNG"},
	{Anything,	"OI",		Anything,	"OY"	},
	{Anything,	"OOR",		Anything,	"AOr"	},
	{Anything,	"OOK",		Anything,	"UHk"	},
	{Anything,	"OOD",		Anything,	"UHd"	},
	{Anything,	"OO",		Anything,	"UW"	},
	{Anything,	"O",		"E",		"OW"	},
	{Anything,	"O",		Nothing,	"OW"	},
	{Anything,	"OA",		Anything,	"OW"	},
	{Nothing,	"ONLY",		Anything,	"OWnlIY"},
	{Nothing,	"ONCE",		Anything,	"wAHns"	},
	{Anything,	"ON'T",		Anything,	"OWnt"	},
	{"C",		"O",		"N",		"AA"	},
	{Anything,	"O",		"NG",		"AO"	},
	{"^:",		"O",		"N",		"AH"	},
	{"I",		"ON",		Anything,	"AXn"	},
	{"#:",		"ON",		Nothing,	"AXn"	},
	{"#^",		"ON",		Anything,	"AXn"	},
	{Anything,	"O",		"ST ",		"OW"	},
	{Anything,	"OF",		"^",		"AOf"	},
	{Anything,	"OTHER",	Anything,	"AHDHER"},
	{Anything,	"OSS",		Nothing,	"AOs"	},
	{"#^:",		"OM",		Anything,	"AHm"	},
	{Anything,	"O",		Anything,	"AA"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule P_rules[] =
	{
	{Anything,	"PH",		Anything,	"f"	},
	{Anything,	"PEOP",		Anything,	"pIYp"	},
	{Anything,	"POW",		Anything,	"pAW"	},
	{Anything,	"PUT",		Nothing,	"pUHt"	},
	{Anything,	"P",		Anything,	"p"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule Q_rules[] =
	{
	{Anything,	"QUAR",		Anything,	"kwAOr"	},
	{Anything,	"QU",		Anything,	"kw"	},
	{Anything,	"Q",		Anything,	"k"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule R_rules[] =
	{
	{Nothing,	"RE",		"^#",		"rIY"	},
	{Anything,	"R",		Anything,	"r"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule S_rules[] =
	{
	{Anything,	"SH",		Anything,	"SH"	},
	{"#",		"SION",		Anything,	"ZHAXn"	},
	{Anything,	"SOME",		Anything,	"sAHm"	},
	{"#",		"SUR",		"#",		"ZHER"	},
	{Anything,	"SUR",		"#",		"SHER"	},
	{"#",		"SU",		"#",		"ZHUW"	},
	{"#",		"SSU",		"#",		"SHUW"	},
	{"#",		"SED",		Nothing,	"zd"	},
	{"#",		"S",		"#",		"z"	},
	{Anything,	"SAID",		Anything,	"sEHd"	},
	{"^",		"SION",		Anything,	"SHAXn"	},
	{Anything,	"S",		"S",		Silent	},
	{".",		"S",		Nothing,	"z"	},
	{"#:.E",	"S",		Nothing,	"z"	},
	{"#^:##",	"S",		Nothing,	"z"	},
	{"#^:#",	"S",		Nothing,	"s"	},
	{"U",		"S",		Nothing,	"s"	},
	{" :#",		"S",		Nothing,	"z"	},
	{Nothing,	"SCH",		Anything,	"sk"	},
	{Anything,	"S",		"C+",		Silent	},
	{"#",		"SM",		Anything,	"zm"	},
	{"#",		"SN",		"'",		"zAXn"	},
	{Anything,	"S",		Anything,	"s"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule T_rules[] =
	{
	{Nothing,	"THE",		Nothing,	"DHAX"	},
	{Anything,	"TO",		Nothing,	"tUW"	},
	{Anything,	"THAT",		Nothing,	"DHAEt"	},
	{Nothing,	"THIS",		Nothing,	"DHIHs"	},
	{Nothing,	"THEY",		Anything,	"DHEY"	},
	{Nothing,	"THERE",	Anything,	"DHEHr"	},
	{Anything,	"THER",		Anything,	"DHER"	},
	{Anything,	"THEIR",	Anything,	"DHEHr"	},
	{Nothing,	"THAN",		Nothing,	"DHAEn"	},
	{Nothing,	"THEM",		Nothing,	"DHEHm"	},
	{Anything,	"THESE",	Nothing,	"DHIYz"	},
	{Nothing,	"THEN",		Anything,	"DHEHn"	},
	{Anything,	"THROUGH",	Anything,	"THrUW"	},
	{Anything,	"THOSE",	Anything,	"DHOWz"	},
	{Anything,	"THOUGH",	Nothing,	"DHOW"	},
	{Nothing,	"THUS",		Anything,	"DHAHs"	},
	{Anything,	"TH",		Anything,	"TH"	},
	{"#:",		"TED",		Nothing,	"tIHd"	},
	{"S",		"TI",		"#N",		"CH"	},
	{Anything,	"TI",		"O",		"SH"	},
	{Anything,	"TI",		"A",		"SH"	},
	{Anything,	"TIEN",		Anything,	"SHAXn"	},
	{Anything,	"TUR",		"#",		"CHER"	},
	{Anything,	"TU",		"A",		"CHUW"	},
	{Nothing,	"TWO",		Anything,	"tUW"	},
	{Anything,	"T",		Anything,	"t"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule U_rules[] =
	{
	{Nothing,	"UN",		"I",		"yUWn"	},
	{Nothing,	"UN",		Anything,	"AHn"	},
	{Nothing,	"UPON",		Anything,	"AXpAOn"},
	{"T",		"UR",		"#",		"UHr"	},
	{"S",		"UR",		"#",		"UHr"	},
	{"R",		"UR",		"#",		"UHr"	},
	{"D",		"UR",		"#",		"UHr"	},
	{"L",		"UR",		"#",		"UHr"	},
	{"Z",		"UR",		"#",		"UHr"	},
	{"N",		"UR",		"#",		"UHr"	},
	{"J",		"UR",		"#",		"UHr"	},
	{"TH",		"UR",		"#",		"UHr"	},
	{"CH",		"UR",		"#",		"UHr"	},
	{"SH",		"UR",		"#",		"UHr"	},
	{Anything,	"UR",		"#",		"yUHr"	},
	{Anything,	"UR",		Anything,	"ER"	},
	{Anything,	"U",		"^ ",		"AH"	},
	{Anything,	"U^^",		Anything,	"AH"	},
	{Anything,	"UY",		Anything,	"AY"	},
	{" G",		"U",		"#",		Silent	},
	{"G",		"U",		"%",		Silent	},
	{"G",		"U",		"#",		"w"	},
	{"#N",		"U",		Anything,	"YUw"	},
	{"T",		"U",		Anything,	"UW"	},
	{"S",		"U",		Anything,	"UW"	},
	{"R",		"U",		Anything,	"UW"	},
	{"D",		"U",		Anything,	"UW"	},
	{"L",		"U",		Anything,	"UW"	},
	{"Z",		"U",		Anything,	"UW"	},
	{"N",		"U",		Anything,	"UW"	},
	{"J",		"U",		Anything,	"UW"	},
	{"TH",		"U",		Anything,	"UW"	},
	{"CH",		"U",		Anything,	"UW"	},
	{"SH",		"U",		Anything,	"UW"	},
	{Anything,	"U",		Anything,	"YUw"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule V_rules[] =
	{
	{Anything,	"VIEW",		Anything,	"vYUw"	},
	{Anything,	"V",		Anything,	"v"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule W_rules[] =
	{
	{Nothing,	"WERE",		Anything,	"wER"	},
	{Anything,	"WA",		"S",		"wAA"	},
	{Anything,	"WA",		"T",		"wAA"	},
	{Anything,	"WERE",		Anything,	"WHEHr"	},
	{Anything,	"WHAT",		Anything,	"WHAAt"	},
	{Anything,	"WHOL",		Anything,	"hOWl"	},
	{Anything,	"WHO",		Anything,	"hUW"	},
	{Anything,	"WH",		Anything,	"WH"	},
	{Anything,	"WAR",		Anything,	"wAOr"	},
	{Anything,	"WOR",		"^",		"wER"	},
	{Anything,	"WR",		Anything,	"r"	},
	{Anything,	"W",		Anything,	"w"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule X_rules[] =
	{
	{Anything,	"X",		Anything,	"ks"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule Y_rules[] =
	{
	{Anything,	"YOUNG",	Anything,	"yAHNG"	},
	{Nothing,	"YOU",		Anything,	"yUW"	},
	{Nothing,	"YES",		Anything,	"yEHs"	},
	{Nothing,	"Y",		Anything,	"y"	},
	{"#^:",		"Y",		Nothing,	"IY"	},
	{"#^:",		"Y",		"I",		"IY"	},
	{" :",		"Y",		Nothing,	"AY"	},
	{" :",		"Y",		"#",		"AY"	},
	{" :",		"Y",		"^+:#",		"IH"	},
	{" :",		"Y",		"^#",		"AY"	},
	{Anything,	"Y",		Anything,	"IH"	},
	};

/*
**	LEFT_PART	MATCH_PART	RIGHT_PART	OUT_PART
*/
static Rule Z_rules[] =
	{
	{Anything,	"Z",		Anything,	"z"	},
	};

Rule *Rules[] =
	{
	punct_rules,
	A_rules, B_rules, C_rules, D_rules, E_rules, F_rules, G_rules, 
	H_rules, I_rules, J_rules, K_rules, L_rules, M_rules, N_rules, 
	O_rules, P_rules, Q_rules, R_rules, S_rules, T_rules, U_rules, 
	V_rules, W_rules, X_rules, Y_rules, Z_rules
	};

-------------------------------------------------------------------------------
PARSE.C
-------------------------------------------------------------------------------

#include <stdio.h>
#include <ctype.h>

#define UPPER 1
#define LOWER 2
#define NUMBER 4
#define SPECIAL 8

#define MAX_WORD_LENGTH 128

FILE *In_file;
FILE *Out_file;

main(argc, argv)
	int argc;
	char *argv[];
	{
	if (argc > 3)
		{
		fputs("Usage: PHONEME [infile [outfile]]\n", stderr);
		exit();
		}

	if (argc > 1)
		{
		In_file = fopen(argv[1], "r");
		if (In_file == 0)
			{
			fputs("Error: Cannot open input file.\n", stderr);
			exit();
			}
		}
	else
		In_file = stdin;

	if (argc > 2)
		{
		Out_file = fopen(argv[2], "w");
		if (Out_file == 0)
			{
			fputs("Error: Cannot create output file.\n", stderr);
			exit();
			}
		}
	else
		Out_file = stdout;

	xlate_file();
	}

xlate_file()
	{
	char buffer[MAX_WORD_LENGTH];	/* Storage for word */
	int count;		/* number of characters in word */
	int type;		/* types of characters in word */

	int chr;
	int terminator;		/* Character after word */

	for (;;)	/* All of the words in the file */
		{
		type = 0;	/* Flags for types of characters */
		count = 0;

		buffer[count++] = ' ';	/* Initial blank */

		for (;;)	/* All of the characters in the word */
			{
			chr = getc(In_file);

			/* Check for buffer full */
			if (count > MAX_WORD_LENGTH - 2)
				break;

			/* Check for end of word */
			if (isspace(chr) || chr == EOF || chr == '.' ||
			    chr == '?' || chr == '!')
				{
				terminator = chr;
				break;
				}

			buffer[count++] = toupper(chr);

			if (isupper(chr))
				type |= UPPER;
			else
			if (islower(chr) || chr == '\'')
				type |= LOWER;
			else
			if (isdigit(chr))
				type |= NUMBER;
			else
				type |= SPECIAL;
			}

		buffer[count++] = ' ';	/* Terminating blank */
		buffer[count++] = '\0';	/* Terminating null */

		/* Figure out what it is */
		mash_word(buffer, type, terminator);

		if (chr == EOF)
			return;
		}
	}

mash_word(word, type, terminator)
	char *word;
	int type;
	int terminator;
	{
	switch (type)
		{
	case NUMBER:
		for (word++; *word != ' '; word++)
			{
			switch (*word)
				{
			case '0': xlate_word(" ZERO ");	break;
			case '1': xlate_word(" ONE ");	break;
			case '2': xlate_word(" TWO ");	break;
			case '3': xlate_word(" THREE ");	break;
			case '4': xlate_word(" FOUR ");	break;
			case '5': xlate_word(" FIVE ");	break;
			case '6': xlate_word(" SIX ");	break;
			case '7': xlate_word(" SEVEN ");	break;
			case '8': xlate_word(" EIGHT ");	break;
			case '9': xlate_word(" NINE ");	break;
				}
			}
		if (terminator == '.')
			xlate_word(" POINT ");
		break;

	default:
		xlate_word(word);
		break;
		}
	}

-------------------------------------------------------------------------------
PHONEME.C
-------------------------------------------------------------------------------
#include <stdio.h>
#include <ctype.h>
/*
**	English to Phoneme translation.
**
**	Rules are made up of four parts:
**	
**		The left context.
**		The text to match.
**		The right context.
**		The phonemes to substitute for the matched text.
**
**	Procedure:
**
**		Seperate each block of letters (apostrophes included) 
**		and add a space on each side.  For each unmatched 
**		letter in the word, look through the rules where the 
**		text to match starts with the letter in the word.  If 
**		the text to match is found and the right and left 
**		context patterns also match, output the phonemes for 
**		that rule and skip to the next unmatched letter.
**
**
**	Special Context Symbols:
**
**		#	One or more vowels
**		:	Zero or more consonants
**		^	One consonant.
**		.	One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
**			consonants)
**		%	One of ER, E, ES, ED, ING, ELY (a suffix)
**			(Right context only)
**		+	One of E, I or Y (a "front" vowel)
*/

typedef char *Rule[4];	/* A rule is four character pointers */

extern Rule *Rules[];	/* An array of pointers to rules */

extern FILE *In_file, *Out_file;

int isvowel(chr)
	char chr;
	{
	return (chr == 'A' || chr == 'E' || chr == 'I' || 
		chr == 'O' || chr == 'U');
	}

int isconsonant(chr)
	char chr;
	{
	return (isupper(chr) && !isvowel(chr));
	}

xlate_word(word)
	char word[];
	{
	int index;	/* Current position in word */
	int type;	/* First letter of match part */

	index = 1;	/* Skip the initial blank */
	do
		{
		if (isupper(word[index]))
			type = word[index] - 'A' + 1;
		else
		if (word[index] == '\'' || word[index] == ' ')
			type = 0;
		else
			fprintf(stderr,"Bad character in word: '%c'\n", word[index]);

		index = find_rule(word, index, Rules[type]);
		}
	while (word[index] != '\0');
	}

find_rule(word, index, rules)
	char word[];
	int index;
	Rule *rules;
	{
	Rule *rule;
	char *left, *match, *right, *output;
	int remainder;

	for (;;)	/* Search for the rule */
		{
		rule = rules++;
		match = (*rule)[1];

		for (remainder = index; *match != '\0'; match++, remainder++)
			{
			if (*match != word[remainder])
				break;
			}

		if (*match != '\0')	/* found missmatch */
			continue;

		left = (*rule)[0];
		right = (*rule)[2];

		if (!leftmatch(left, &word[index-1]))
			continue;

		if (!rightmatch(right, &word[remainder]))
			continue;

		output = (*rule)[3];

		fputs(output, Out_file);
		return remainder;
		}
	}


leftmatch(pattern, context)
	char *pattern;	/* first char of pattern to match in text */
	char *context;	/* last char of text to be matched */
	{
	char *pat;
	char *text;

	if (*pattern == '\0')	/* null string matches any context */
		return TRUE;

	/* point to last character in pattern string */
	pat = pattern + (strlen(pattern) - 1);

	text = context;

	for (; pat != pattern; pat--)
		{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
				{
				text--;
				continue;
				}

		switch (*pat)
			{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text--;

			while (isvowel(*text))
				text--;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text--;
			break;

		case '^':	/* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text--;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			   && *text != 'G' && *text != 'J' && *text != 'L'
			   && *text != 'M' && *text != 'N' && *text != 'R'
			   && *text != 'W' && *text != 'Z')
				return FALSE;
			text--;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text--;
			break;

		case '%':
		default:
			fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
			return FALSE;
			}
		}

	return TRUE;
	}


rightmatch(pattern, context)
	char *pattern;	/* first char of pattern to match in text */
	char *context;	/* last char of text to be matched */
	{
	char *pat;
	char *text;

	if (*pattern == '\0')	/* null string matches any context */
		return TRUE;

	pat = pattern;
	text = context;

	for (pat = pattern; *pat != '\0'; pat++)
		{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
				{
				text++;
				continue;
				}

		switch (*pat)
			{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text++;

			while (isvowel(*text))
				text++;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text++;
			break;

		case '^':	/* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text++;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			   && *text != 'G' && *text != 'J' && *text != 'L'
			   && *text != 'M' && *text != 'N' && *text != 'R'
			   && *text != 'W' && *text != 'Z')
				return FALSE;
			text++;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text++;
			break;

		case '%':	/* ER, E, ES, ED, ING, ELY (a suffix) */
			if (*text == 'E')
				{
				text++;
				if (*text == 'L')
					{
					text++;
					if (*text == 'Y')
						{
						text++;
						break;
						}
					else
						{
						text--; /* Don't gobble L */
						break;
						}
					}
				else
				if (*text == 'R' || *text == 'S' 
				   || *text == 'D')
					text++;
				break;
				}
			else
			if (*text == 'I')
				{
				text++;
				if (*text == 'N')
					{
					text++;
					if (*text == 'G')
						{
						text++;
						break;
						}
					}
				return FALSE;
				}
			else
			return FALSE;

		default:
			fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
			return FALSE;
			}
		}

	return TRUE;
	}
-------------------------------------------------------------------------------
End of Source Files
-------------------------------------------------------------------------------