/*
 * chartrans.c
 * Copyright (C) 1999,2000 A.J. van Os
 *
 * Description:
 * Translate characters to ISO-8859-1
 */

#include <ctype.h>
#include <limits.h>
#include "antiword.h"

/*
 * iTranslateCharacters - Translate characters to ISO-8859-1
 *
 * Translate Macintosh characters, Window characters and some Unicode
 * characters into ISO-8859-1 (aka Latin1).
 *
 * returns the translated character or EOF if the input character should
 *	be ignored
 */
int
iTranslateCharacters(int iChar, int iFileOffset, BOOL bMacFile)
{
	if (bMacFile) {
		/* Translate special Macintosh characters */
		switch (iChar) {
		case MAC_CAPTAL_A_DIAERESIS:
			return OUR_CAPTAL_A_DIAERESIS;
		case MAC_CAPTAL_A_RING_ABOVE:
			return OUR_CAPTAL_A_RING_ABOVE;
		case MAC_CAPTAL_C_CEDILLA:
			return OUR_CAPTAL_C_CEDILLA;
		case MAC_CAPTAL_O_ACUTE_ACCENT:
			return OUR_CAPTAL_O_ACUTE_ACCENT;
		case MAC_CAPTAL_N_TILDE:
			return OUR_CAPTAL_N_TILDE;
		case MAC_CAPTAL_O_DIAERESIS:
			return OUR_CAPTAL_O_DIAERESIS;
		case MAC_CAPTAL_U_DIAERESIS:
			return OUR_CAPTAL_U_DIAERESIS;
		case MAC_SMALL_A_ACUTE_ACCENT:
			return OUR_SMALL_A_ACUTE_ACCENT;
		case MAC_SMALL_A_GRAVE_ACCENT:
			return OUR_SMALL_A_GRAVE_ACCENT;
		case MAC_SMALL_A_CIRCUMFLEX_ACCENT:
			return OUR_SMALL_A_CIRCUMFLEX_ACCENT;
		case MAC_SMALL_A_DIAERESIS:
			return OUR_SMALL_A_DIAERESIS;
		case MAC_SMALL_A_TILDE:
			return OUR_SMALL_A_TILDE;
		case MAC_SMALL_A_RING_ABOVE:
			return OUR_SMALL_A_RING_ABOVE;
		case MAC_SMALL_C_CEDILLA:
			return OUR_SMALL_C_CEDILLA;
		case MAC_SMALL_E_ACUTE_ACCENT:
			return OUR_SMALL_E_ACUTE_ACCENT;
		case MAC_SMALL_E_GRAVE_ACCENT:
			return OUR_SMALL_E_GRAVE_ACCENT;
		case MAC_SMALL_E_CIRCUMFLEX_ACCENT:
			return OUR_SMALL_E_CIRCUMFLEX_ACCENT;
		case MAC_SMALL_E_DIAERESIS:
			return OUR_SMALL_E_DIAERESIS;
		case MAC_SMALL_I_ACUTE_ACCENT:
			return OUR_SMALL_I_ACUTE_ACCENT;
		case MAC_SMALL_I_GRAVE_ACCENT:
			return OUR_SMALL_I_GRAVE_ACCENT;
		case MAC_SMALL_I_CIRCUMFLEX_ACCENT:
			return OUR_SMALL_I_CIRCUMFLEX_ACCENT;
		case MAC_SMALL_I_DIAERESIS:
			return OUR_SMALL_I_DIAERESIS;
		case MAC_SMALL_N_TILDE:
			return OUR_SMALL_N_TILDE;
		case MAC_SMALL_O_ACUTE_ACCENT:
			return OUR_SMALL_O_ACUTE_ACCENT;
		case MAC_SMALL_O_GRAVE_ACCENT:
			return OUR_SMALL_O_GRAVE_ACCENT;
		case MAC_SMALL_O_CIRCUMFLEX_ACCENT:
			return OUR_SMALL_O_CIRCUMFLEX_ACCENT;
		case MAC_SMALL_O_DIAERESIS:
			return OUR_SMALL_O_DIAERESIS;
		case MAC_SMALL_O_TILDE:
			return OUR_SMALL_O_TILDE;
		case MAC_SMALL_U_ACUTE_ACCENT:
			return OUR_SMALL_U_ACUTE_ACCENT;
		case MAC_SMALL_U_GRAVE_ACCENT:
			return OUR_SMALL_U_GRAVE_ACCENT;
		case MAC_SMALL_U_CIRCUMFLEX_ACCENT:
			return OUR_SMALL_U_CIRCUMFLEX_ACCENT;
		case MAC_SMALL_U_DIAERESIS:
			return OUR_SMALL_U_DIAERESIS;
		case MAC_SMALL_SHARP_S:
			return OUR_SMALL_SHARP_S;
		case MAC_LEFT_DOUBLE_QMARK:
			return OUR_LEFT_DOUBLE_QMARK;
		case MAC_RIGHT_DOUBLE_QMARK:
			return OUR_RIGHT_DOUBLE_QMARK;
		case MAC_EN_DASH:
			return OUR_EN_DASH;
		case MAC_EM_DASH:
			return OUR_EM_DASH;
		case MAC_OPENING_DOUBLE_QUOTE:
			return OUR_OPENING_DOUBLE_QUOTE;
		case MAC_CLOSING_DOUBLE_QUOTE:
			return OUR_CLOSING_DOUBLE_QUOTE;
		case MAC_LEFT_SINGLE_QUOTE:
			return OUR_LEFT_SINGLE_QUOTE;
		case MAC_RIGHT_SINGLE_QUOTE:
			return OUR_RIGHT_SINGLE_QUOTE;
		default:
			break;
		}
	}

	/* Translate characters to ISO-8859-1 */
	switch (iChar) {
	case IGNORE_CHAR:
	case ANNOTATION:
	case FRAME:
	case WORD_SOFT_HYPHEN:
	case UNICODE_HYPHENATION_POINT:
		return EOF;
	case PICTURE:
	case TABLE_SEPARATOR:
	case TAB:
	case HARD_RETURN:
	case FORM_FEED:
	case PAR_END:
	case COLUMN_FEED:
		return iChar;
	case FOOTNOTE_OR_ENDNOTE:
		NO_DBG_HEX(iFileOffset);
		switch (eGetNotetype(iFileOffset)) {
		case notetype_is_footnote:
			return FOOTNOTE_CHAR;
		case notetype_is_endnote:
			return ENDNOTE_CHAR;
		default:
			return UNKNOWN_NOTE_CHAR;
		}
	case WORD_UNBREAKABLE_JOIN:
	case UNICODE_NON_BREAKING_HYPHEN:
		return OUR_UNBREAKABLE_JOIN;
	case WORD_EURO_SIGN:
	case UNICODE_EURO_SIGN:
		return OUR_EURO_SIGN;
	case WORD_CEDILLA:
		return OUR_CEDILLA;
	case WORD_DUTCH_GUILDER_SIGN:
		return OUR_DUTCH_GUILDER_SIGN;
	case WORD_LOW_DOUBLE_QUOTE:
	case UNICODE_LOW_DOUBLE_QUOTE:
		return OUR_LOW_DOUBLE_QUOTE;
	case WORD_ELLIPSIS:
	case UNICODE_ELLIPSIS:
		return OUR_ELLIPSIS;
	case WORD_DAGGER:
	case UNICODE_DAGGER:
		return OUR_DAGGER;
	case WORD_DOUBLE_DAGGER:
	case UNICODE_DOUBLE_DAGGER:
		return OUR_DOUBLE_DAGGER;
	case WORD_NON_SPACING_CIRCUMFLEX_ACCENT:
		return OUR_NON_SPACING_CIRCUMFLEX_ACCENT;
	case WORD_PER_MILLE_SIGN:
	case UNICODE_PER_MILLE_SIGN:
		return OUR_PER_MILLE_SIGN;
	case WORD_LEFT_SINGLE_QMARK:
	case UNICODE_LEFT_SINGLE_QMARK:
		return OUR_LEFT_SINGLE_QMARK;
	case WORD_CAPITAL_LIGATURE_OE:
		return OUR_CAPITAL_LIGATURE_OE;
	case WORD_LEFT_SINGLE_QUOTE:
	case UNICODE_LEFT_SINGLE_QUOTE:
	case UNICODE_LEFT_SINGLE_QUOTE_ALT:
		return OUR_LEFT_SINGLE_QUOTE;
	case WORD_RIGHT_SINGLE_QUOTE:
	case UNICODE_RIGHT_SINGLE_QUOTE:
	case UNICODE_RIGHT_SINGLE_QUOTE_ALT:
		return OUR_RIGHT_SINGLE_QUOTE;
	case WORD_OPENING_DOUBLE_QUOTE:
	case UNICODE_OPENING_DOUBLE_QUOTE:
		return OUR_OPENING_DOUBLE_QUOTE;
	case WORD_CLOSING_DOUBLE_QUOTE:
	case UNICODE_CLOSING_DOUBLE_QUOTE:
		return OUR_CLOSING_DOUBLE_QUOTE;
	case WORD_BULLET:
	case UNICODE_BULLET:
	case UNICODE_BULLET_ALT:
		return OUR_BULLET;
	case WORD_EM_DASH:
	case UNICODE_EM_DASH:
	case UNICODE_HORIZONTAL_BAR:
		return OUR_EM_DASH;
	case WORD_EN_DASH:
	case UNICODE_EN_DASH:
	case UNICODE_FIGURE_DASH:
		return OUR_EN_DASH;
	case WORD_NON_SPACING_TILDE:
		return OUR_NON_SPACING_TILDE;
	case WORD_TRADEMARK:
	case UNICODE_TRADEMARK:
		return OUR_TRADEMARK;
	case WORD_RIGHT_SINGLE_QMARK:
	case UNICODE_RIGHT_SINGLE_QMARK:
		return OUR_RIGHT_SINGLE_QMARK;
	case WORD_SMALL_LIGATURE_OE:
		return OUR_SMALL_LIGATURE_OE;
	case UNICODE_CAPITAL_W_CIRCUMFLEX_ACCENT:
		return OUR_CAPITAL_W_CIRCUMFLEX_ACCENT;
	case UNICODE_SMALL_W_CIRCUMFLEX_ACCENT:
		return OUR_SMALL_W_CIRCUMFLEX_ACCENT;
	case UNICODE_CAPITAL_Y_CIRCUMFLEX_ACCENT:
		return OUR_CAPITAL_Y_CIRCUMFLEX_ACCENT;
	case UNICODE_SMALL_Y_CIRCUMFLEX_ACCENT:
		return OUR_SMALL_Y_CIRCUMFLEX_ACCENT;
	case UNICODE_HYPHEN:
		return OUR_HYPHEN;
	case UNICODE_DOUBLE_VERTICAL_LINE:
		return OUR_DOUBLE_VERTICAL_LINE;
	case UNICODE_DOUBLE_LOW_LINE:
		return OUR_DOUBLE_LOW_LINE;
	case UNICODE_FRACTION_SLASH:
		return OUR_FRACTION_SLASH;
	case UNICODE_WHITE_SMILING_FACE:
		return OUR_WHITE_SMILING_FACE;
	case UNICODE_BLACK_SMILING_FACE:
		return OUR_BLACK_SMILING_FACE;
	case UNICODE_DIAMOND:
		return OUR_DIAMOND;
	case UNICODE_COPYRIGHT:
		return OUR_COPYRIGHT;
	default:
		DBG_HEX_C(iChar >= 0x80 && iChar <= 0x9f, iFileOffset);
		DBG_HEX_C(iChar >= 0x80 && iChar <= 0x9f, iChar);
		DBG_HEX_C(iChar < 0x20 || iChar > 0xff, iFileOffset);
		DBG_HEX_C(iChar < 0x20 || iChar > 0xff, iChar);
		if (iChar < 0x20) {
			/* A control character slipped through */
			return EOF;
		}
		if (iChar > 0xff) {
			/* Untranslated Unicode character */
			return '?';
		}
		return iChar;
	}
} /* end of iTranslateCharacters */

/*
 * iToUpper -  convert letter to upper case
 *
 * This function converts a letter to upper case. Unlike toupper(3) this
 * function is independent from the settings of locale. This comes in handy
 * for people who have to read Word documents in more than one language or
 * contain more than one language.
 *
 * returns the converted letter, or iChar if the conversion was not possible.
 */
int iToUpper(int iChar)
{
	if ((iChar & ~0x7f) == 0) {
		/* US ASCII: use standard function */
		return toupper(iChar);
	}
	if (iChar >= 0xe0 && iChar <= 0xfe && iChar != 0xf7) {
		/*
		 * Lower case accented characters
		 * 0xf7 is Division sign; 0xd7 is Multiplication sign
		 * 0xff is y with diaeresis; 0xdf is Sharp s
		 */
		return iChar & ~0x20;
	}
	return iChar;
} /* end of iToUpper */
