/*
 *	recite - english text speech synthesizer
 *	Copyright (C) 1993 Peter Miller.
 *	All rights reserved.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 1, or (at your option)
 *	any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * MANIFEST: functions to scan translation table
 */

#include <stdio.h>
#include <ctype.h>
#include <string.h>

#define FALSE (0)
#define TRUE (!0)

#include <english.h>
#include <phoneme.h>
#include <rules.h>
#include <trace.h>

/*
 *	English to Phoneme translation.
 *
 *	Rules are made up of four parts:
 *	
 *		The left context.
 *		The text to match.
 *		The right context.
 *		The phonemes to substitute for the matched text.
 *
 *	Procedure:
 *
 *		Seperate each block of letters (apostrophes included) 
 *		and add a space on each side.  For each unmatched 
 *		letter in the word, look through the rules where the 
 *		text to match starts with the letter in the word.  If 
 *		the text to match is found and the right and left 
 *		context patterns also match, output the phonemes for 
 *		that rule and skip to the next unmatched letter.
 *
 *
 *	Special Context Symbols:
 *
 *		#	One or more vowels
 *		:	Zero or more consonants
 *		^	One consonant.
 *		.	One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
 *			consonants)
 *		%	One of ER, E, ES, ED, ING, ELY (a suffix)
 *			(Right context only)
 *		+	One of E, I or Y (a "front" vowel)
 *		@	one of T S R D L Z N J TH CH SH (left only)
 *		&	one of S C G Z X J CH SH (left only)
 *		?	one or more digits (left and right)
 */


static int isvowel _((int));

static int
isvowel(chr)
	int	chr;
{
	return (chr == 'A' || chr == 'E' || chr == 'I' || chr == 'O' || chr == 'U');
}


static int isconsonant _((int));

static int
isconsonant(chr)
	int	chr;
{
	return (isupper(chr) && !isvowel(chr));
}


static int leftmatch _((char *pattern, char *context));

static int
leftmatch(pattern, context)
	char	*pattern;	/* first char of pattern to match in text */
	char	*context;	/* last char of text to be matched */
{
	char	*pat;
	char	*text;
	int	count;

	if (*pattern == '\0')	/* null string matches any context */
	{
		return TRUE;
	}

	/* point to last character in pattern string */
	count = strlen(pattern);
	pat = pattern + (count - 1);

	text = context;

	for (; count > 0; pat--, count--)
	{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
			{
				text--;
				continue;
			}

		switch (*pat)
		{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text--;

			while (isvowel(*text))
				text--;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text--;
			break;

		case '^':	 /* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text--;
			break;

		case '?':
			/* one or more digits */
			if (!isdigit(*text))
				return FALSE;
			text--;
			while (isdigit(*text))
				--text;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			   && *text != 'G' && *text != 'J' && *text != 'L'
			   && *text != 'M' && *text != 'N' && *text != 'R'
			   && *text != 'W' && *text != 'Z')
				return FALSE;
			text--;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text--;
			break;

		case '@':
			/* T S R D L Z N J TH CH SH */
			if (strchr("TSRDLZNJ", *text))
			{
				--text;
				break;
			}
			if
			(
				count > 1
			&&
				*text == 'H'
			&&
				strchr("TSC", text[-1]))
			{
				text -= 2;
				break;
			}
			return 0;

		case '&':
			/* S C G Z X J CH SH */
			if (strchr("SCGZXJ", *text))
			{
				--text;
				break;
			}
			if
			(
				count > 1
			&&
				*text == 'H'
			&&
				strchr("SC", text[-1]))
			{
				text -= 2;
				break;
			}
			return 0;

		case '%':
		default:
			error("Bad char in left rule: '%c'", *pat);
			return FALSE;
		}
	}
	return TRUE;
}


static int rightmatch _((char *pattern, char *context));

static int
rightmatch(pattern, context)
	char *pattern;	/* first char of pattern to match in text */
	char *context;	/* last char of text to be matched */
{
	char *pat;
	char *text;

	if (*pattern == '\0')	/* null string matches any context */
		return TRUE;

	pat = pattern;
	text = context;

	for (pat = pattern; *pat != '\0'; pat++)
	{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
			{
				text++;
				continue;
			}

		switch (*pat)
		{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text++;

			while (isvowel(*text))
				text++;
			break;

		case '?':
			/* one or more digits */
			if (!isdigit(*text))
				return FALSE;
			text++;
			while (isdigit(*text))
				text++;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text++;
			break;

		case '^':	/* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text++;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			   && *text != 'G' && *text != 'J' && *text != 'L'
			   && *text != 'M' && *text != 'N' && *text != 'R'
			   && *text != 'W' && *text != 'Z')
				return FALSE;
			text++;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text++;
			break;

		case '%':	/* ER, E, ES, ED, ING, ELY (a suffix) */
			if (*text == 'E')
			{
				text++;
				if (*text == 'L')
				{
					text++;
					if (*text == 'Y')
					{
						text++;
						break;
					}
					else
					{
						text--; /* Don't gobble L */
						break;
					}
				}
				else
				if (*text == 'R' || *text == 'S' 
				   || *text == 'D')
					text++;
				break;
			}
			else if (*text == 'I')
			{
				text++;
				if (*text == 'N')
				{
					text++;
					if (*text == 'G')
					{
						text++;
						break;
					}
				}
				return FALSE;
			}
			else
			return FALSE;

		default:
			error("Bad char in right rule:'%c'", *pat);
			return FALSE;
		}
	}

	return TRUE;
}


static int find_rule _((char *word, int index, rule_t *rules));

static int
find_rule(word, index, rules)
	char	*word;
	int	index;
	rule_t	*rules;
{
	rule_t	*rule;
	char	*left, *match, *right, *output;
	int	remainder;
	int	result;

	trace(("find_rule(word = \"%s\", index = %d)\n{\n"/*}*/, word, index));
	result = index;
	for (;;)
	{
		/* Search for the rule */
		rule = rules++;
		match = rule->middle;

		if (!match)
		{
			/* bad symbol! */
			error("no rule for \"%s\" at %d", word, index);
			/* Skip it! */
			result = index + 1;
			break;
		}

		for (remainder = index; *match; match++, remainder++)
		{
			if (*match != word[remainder])
				break;
		}

		if (*match)
			/* found missmatch */
			continue;
		trace(("Word: \"%.*s/%s\"\n", index, word, word + index));
		trace
		((
			"Trying: \"%s/%s/%s\" -> \"%s\"\n",
			rule->left,
			rule->middle,
			rule->right,
			rule->output
		));
		left = rule->left;
		right = rule->right;

		if (!leftmatch(left, &word[index - 1]))
			continue;
		trace(("leftmatch(\"%s\", \"%c\") succeded!\n",
			left, word[index-1]));
		if (!rightmatch(right, &word[remainder]))
			continue;
		trace(("rightmatch(\"%s\", \"%s\") succeded!\n",
			right, &word[remainder]));
		output = rule->output;
		trace(("Success:\n"));
		english_outstring(output);
		result = remainder;
		break;
	}
	trace(("return %d;\n", result));
	trace((/*{*/"}\n"));
	return result;
}


void
xlate_word(word)
	char	*word;
{
	int	index;	/* Current position in word */
	int	type;	/* First letter of match part */

	trace(("xlate_word(\"%s\")\n{\n"/*}*/, word));
	index = 1;	/* Skip the initial blank */
	do
	{
		if (isupper(word[index]))
			type = word[index] - 'A' + 1;
		else
			type = 0;

		index = find_rule(word, index, english_rules[type]);
	}
		while (word[index]);
	trace((/*{*/"}\n"));
}
