/*
 * findtext.c
 * Copyright (C) 1998,1999 A.J. van Os
 *
 * Description:
 * Find the blocks that contain the text of MS Word files
 */

#include <stdio.h>
#include <stdlib.h>
#include "antiword.h"


/*
 * Add the blocks to the text block list
 *
 * Returns TRUE is successful, FALSE if not
 */
BOOL
bAddBlocks(int iFirstOffset, int iTotalLength, BOOL bUsesUnicode,
	int iStartBlock, const int *aiBBD, int iBBDLen)
{
	text_block_type	tTextBlock;
	int	iIndex, iTextOffset, iOffset, iToGo;

	fail(iFirstOffset < 0 || iTotalLength < 0 || iStartBlock < 0);
	fail(aiBBD == NULL || iBBDLen < 0);

	NO_DBG_HEX(iFirstOffset);
	NO_DBG_DEC(iTotalLength);

	if (bUsesUnicode) {
		/* One character equals two bytes */
		NO_DBG_MSG("Uses Unicode");
		iToGo = iTotalLength * 2;
	} else {
		/* One character equals one byte */
		NO_DBG_MSG("Uses ASCII");
		iToGo = iTotalLength;
	}

	iTextOffset = iFirstOffset;
	iOffset = iFirstOffset;
	for (iIndex = iStartBlock;
	     iIndex != END_OF_CHAIN && iToGo > 0;
	     iIndex = aiBBD[iIndex]) {
		if (iIndex < 0 || iIndex >= iBBDLen) {
			werr(1, "The Big Block Depot is corrupt");
		}
		if (iOffset >= BIG_BLOCK_SIZE) {
			iOffset -= BIG_BLOCK_SIZE;
			continue;
		}
		tTextBlock.iFileOffset =
				(iIndex + 1) * BIG_BLOCK_SIZE + iOffset;
		tTextBlock.iTextOffset = iTextOffset;
		tTextBlock.iLength = min(BIG_BLOCK_SIZE - iOffset, iToGo);
		tTextBlock.bUsesUnicode = bUsesUnicode;
		iOffset = 0;
		if (!bAdd2TextBlockList(&tTextBlock)) {
			DBG_HEX(tTextBlock.iFileOffset);
			DBG_HEX(tTextBlock.iTextOffset);
			DBG_DEC(tTextBlock.iLength);
			DBG_DEC(tTextBlock.bUsesUnicode);
			return FALSE;
		}
		iTextOffset += tTextBlock.iLength;
		iToGo -= tTextBlock.iLength;
	}
	return iToGo == 0;
} /* end of bAddBlocks */

/*
 * bGet6DocumentText - make a list of the text blocks of Word 6/7 files
 *
 * Code for "fast saved" files.
 */
text_info_enum
eGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, int iStartBlock,
	const int *aiBBD, int iBBDLen, const unsigned char *aucHeader)
{
	unsigned char	*aucBuffer;
	int	iIndex, iOffset;
	int	iBeginTextInfo, iTextInfoLen;
	int	iOff, iType, iLen, iPieces, iTotLength;

	DBG_MSG("eGet6DocumentText");

	fail(pFile == NULL);
	fail(aiBBD == NULL || iBBDLen < 0);
	fail(aucHeader == NULL);

	iBeginTextInfo = (int)ulGetLong(0x160, aucHeader);
	iTextInfoLen = (int)ulGetLong(0x164, aucHeader);
	DBG_HEX(iBeginTextInfo);
	DBG_DEC(iTextInfoLen);

	aucBuffer = xmalloc(iTextInfoLen);
	if (!bReadBuffer(pFile, iStartBlock,
			aiBBD, iBBDLen, BIG_BLOCK_SIZE,
			aucBuffer, iBeginTextInfo, iTextInfoLen)) {
		aucBuffer = xfree(aucBuffer);
		return text_failure;
	}

	iOff = 0;
	while (iOff < iTextInfoLen) {
		iType = (int)ucGetByte(iOff, aucBuffer);
		iOff++;
		if (iType == 0) {
			iOff++;
			continue;
		}
		iLen = (int)usGetWord(iOff, aucBuffer);
		iOff += 2;
		if (iType == 1) {
			iOff += iLen;
			continue;
		}
		if (iType != 2) {
			werr(0, "Unknown type of 'fastsaved' format");
			aucBuffer = xfree(aucBuffer);
			return text_failure;
		}
		/* Type 2 */
		NO_DBG_DEC(iLen);
		iOff += 2;
		iPieces = (iLen - 4) / 12;
		DBG_DEC(iPieces);
		for (iIndex = 0; iIndex < iPieces; iIndex++) {
			iOffset = (int)ulGetLong(
				iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,
				aucBuffer);
			iTotLength = (int)ulGetLong(
						iOff + (iIndex + 1) * 4,
						aucBuffer) -
					(int)ulGetLong(
						iOff + iIndex * 4,
						aucBuffer);
			if (!bAddBlocks(iOffset, iTotLength, bUsesUnicode,
					iStartBlock,
					aiBBD, iBBDLen)) {
				aucBuffer = xfree(aucBuffer);
				return text_failure;
			}
		}
		break;
	}
	aucBuffer = xfree(aucBuffer);
	return text_success;
} /* end of eGet6DocumentText */

/*
 * eGet8DocumentText - make a list of the text blocks of Word 8/97 files
 */
text_info_enum
eGet8DocumentText(FILE *pFile, const pps_info_type *pPPS,
	const int *aiBBD, int iBBDLen, const int *aiSBD, int iSBDLen,
	const unsigned char *aucHeader)
{
	const int	*aiBlockDepot;
	unsigned char	*aucBuffer;
	int	iTableStartBlock, iTableSize, iBlockDepotLen, iBlockSize;
	int	iIndex, iOffset;
	int	iBeginTextInfo, iTextInfoLen;
	int	iOff, iType, iLen, iPieces, iTotLength;
	BOOL	bUsesUnicode;
	unsigned short	usDocStatus;

	DBG_MSG("eGet8DocumentText");

	fail(pFile == NULL || pPPS == NULL);
	fail(aiBBD == NULL || iBBDLen < 0);
	fail(aucHeader == NULL);

  	iBeginTextInfo = (int)ulGetLong(0x1a2, aucHeader);
	iTextInfoLen = (int)ulGetLong(0x1a6, aucHeader);
	DBG_HEX(iBeginTextInfo);
	DBG_DEC(iTextInfoLen);

	/* Use 0Table or 1Table? */
	usDocStatus = usGetWord(0x0a, aucHeader);
	if (usDocStatus & BIT(9)) {
		iTableStartBlock = pPPS->t1Table.iSb;
		iTableSize = pPPS->t1Table.iSize;
	} else {
		iTableStartBlock = pPPS->t0Table.iSb;
		iTableSize = pPPS->t0Table.iSize;
	}
	DBG_DEC(iTableStartBlock);
	if (iTableStartBlock < 0) {
		DBG_DEC(iTableStartBlock);
		return text_failure;
	}
	DBG_HEX(iTableSize);
	if (iTableSize < MIN_SIZE_FOR_BBD_USE) {
	  	/* Use the Small Block Depot */
		aiBlockDepot = aiSBD;
		iBlockDepotLen = iSBDLen;
		iBlockSize = SMALL_BLOCK_SIZE;
	} else {
	  	/* Use the Big Block Depot */
		aiBlockDepot = aiBBD;
		iBlockDepotLen = iBBDLen;
		iBlockSize = BIG_BLOCK_SIZE;
	}
	aucBuffer = xmalloc(iTextInfoLen);
	if (!bReadBuffer(pFile, iTableStartBlock,
			aiBlockDepot, iBlockDepotLen, iBlockSize,
			aucBuffer, iBeginTextInfo, iTextInfoLen)) {
		aucBuffer = xfree(aucBuffer);
		return text_no_information;
	}
	DBG_PRINT_BLOCK(aucBuffer, iTextInfoLen);

	iOff = 0;
	while (iOff < iTextInfoLen) {
		iType = (int)ucGetByte(iOff, aucBuffer);
		iOff++;
		if (iType == 0) {
			iOff++;
			continue;
		}
		if (iType == 1) {
			iLen = (int)usGetWord(iOff, aucBuffer);
			iOff += iLen + 2;
			continue;
		}
		if (iType != 2) {
			werr(0, "Unknown type of 'fastsaved' format");
			aucBuffer = xfree(aucBuffer);
			return text_failure;
		}
		/* Type 2 */
		iLen = (int)ulGetLong(iOff, aucBuffer);
		NO_DBG_DEC(iLen);
		iOff += 4;
		iPieces = (iLen - 4) / 12;
		DBG_DEC(iPieces);
		for (iIndex = 0; iIndex < iPieces; iIndex++) {
			iOffset = (int)ulGetLong(
				iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,
				aucBuffer);
			if ((iOffset & BIT(30)) == 0) {
				bUsesUnicode = TRUE;
			} else {
				bUsesUnicode = FALSE;
				iOffset &= ~BIT(30);
				iOffset /= 2;
			}
			iTotLength = (int)ulGetLong(
						iOff + (iIndex + 1) * 4,
						aucBuffer) -
					(int)ulGetLong(
						iOff + iIndex * 4,
						aucBuffer);
			if (!bAddBlocks(iOffset, iTotLength, bUsesUnicode,
					pPPS->tWordDocument.iSb,
					aiBBD, iBBDLen)) {
				aucBuffer = xfree(aucBuffer);
				return text_failure;
			}
		}
		break;
	}
	aucBuffer = xfree(aucBuffer);
	return text_success;
} /* end of eGet8DocumentText */
