/* Copyright (C) 2006 G.P. Halkes
   Licensed under the Open Software License version 2.0 */

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>

#include "definitions.h"

typedef enum {
	NONE,
	WHITESPACE,
	WORD
} MatchState;


int differences = 0;
Statistics statistics;

/** Alert the user of a fatal error and quit.
	@param fmt The format string for the message. See fprintf(3) for details.
	@param ... The arguments for printing.
*/
void fatal(const char *fmt, ...) {
	va_list args;
	
	va_start(args, fmt);
	vfprintf(stderr, fmt, args);
	va_end(args);
	exit(2);
}

/** Write a character to a token file, escaping as necessary.
	@param file The file to write to.
	@param c The character to write.
*/
static void writeTokenChar(FILE *file, int c) {
	if (option.transliterate) {
		if (c == '\n')
			fputs("\\n", file);
		else if (c == '\\')
			fputs("\\\\", file);
#ifdef NO_MINUS_A
		else if (!isprint(c))
			fprintf(file, "\\%X", c);
#endif
		else
			putc(c, file);
	} else {
		putc(c, file);
	}
}

/** Read a file and separate whitespace from the rest.
	@param file The @a InputFile to read.
	@return The number of "words" in @a file.

	The separated parts of @a file are put into temporary files. The temporary
	files' information is stored in the @a InputFile structure.

	For runs in which the newline character is not included in the whitespace list,
	the newline character is transliterated into the first character of the
	whitespace list. Just before writing the output the characters are again
	transliterated to restore the original text.
*/
static int readFile(InputFile *file) {
	MatchState state = NONE;
	int c, wordCount = 0;
	
	if (file->name != NULL && (file->input = fopen(file->name, "r")) == NULL)
		fatal(_("Can't open file %s: %s\n"), file->name, strerror(errno));
		
	if ((file->tokens = tempFile()) == NULL)
		fatal(_("Could not create temporary file: %s\n"), strerror(errno));
	
	if ((file->whitespace = tempFile()) == NULL)
		fatal(_("Could not create temporary file: %s\n"), strerror(errno));
	
	while ((c = getc(file->input)) != EOF) {
		switch (state) {
			case NONE:
				if (TEST_BIT(option.whitespace, c)) {
					putc(c, file->whitespace->file);
					state = WHITESPACE;
					break;
				}
				putc(option.whitespaceDelimiter, file->whitespace->file);
				writeTokenChar(file->tokens->file, c);
				if (TEST_BIT(option.delimiters, c)) {
					putc('\n', file->tokens->file);
					state = WHITESPACE;
				} else {
					state = WORD;
				}
				break;
			case WORD:
				if (TEST_BIT(option.whitespace, c)) {
					/* Found the end of a "word". Go to whitespace mode. */
					wordCount++;
					putc('\n', file->tokens->file);
					putc(c, file->whitespace->file);
					state = WHITESPACE;
				} else if (TEST_BIT(option.delimiters, c)) {
					/* Found a delimiter. Finish the current word, add a zero length whitespace
					   to the whitespace file, add the delimiter as a word, and go into
					   whitespace mode. */
					wordCount += 2;
					putc('\n', file->tokens->file);
					writeTokenChar(file->tokens->file, c);
					putc('\n', file->tokens->file);
					putc(option.whitespaceDelimiter, file->whitespace->file);
					state = WHITESPACE;
				} else {
					writeTokenChar(file->tokens->file, c);
				}
				break;
			case WHITESPACE:
				if (TEST_BIT(option.whitespace, c)) {
					putc(c, file->whitespace->file);
				} else if (TEST_BIT(option.delimiters, c)) {
					/* Found a delimiter. Finish the current whitespace, and add the delimiter
					   as a word. Then start new whitespace. */
					wordCount++;
					writeTokenChar(file->tokens->file, c);
					putc('\n', file->tokens->file);
					putc(option.whitespaceDelimiter, file->whitespace->file);
				} else {
					/* Found the start of a word. Finish the whitespace, and go into
					   word mode. */
					writeTokenChar(file->tokens->file, c);
					putc(option.whitespaceDelimiter, file->whitespace->file);
					state = WORD;
				}
				break;
			default:
				PANIC();
		}
	}
	/* Make sure there is whitespace to end the output with. This may
	   be zero-length. */
	putc(option.whitespaceDelimiter, file->whitespace->file);
	/* Make sure the word is terminated, or otherwise diff will add
	   extra output. */
	if (state == WORD) {
		wordCount++;
		putc('\n', file->tokens->file);
	}
	/* Close the input, and make sure the output is in the filesystem.
	   Then rewind so we can start reading from the start. */
	fclose(file->input);
	fflush(file->whitespace->file);
	rewind(file->whitespace->file);
	fflush(file->tokens->file);
	rewind(file->tokens->file);
	
	return wordCount;
}

/** Main. */
int main(int argc, char *argv[]) {
#ifdef USE_GETTEXT
	setlocale(LC_ALL, "");
	bindtextdomain("dwdiff", LOCALEDIR);
	textdomain("dwdiff");
#endif

	parseCmdLine(argc, argv);

	statistics.oldTotal = readFile(&option.oldFile);
	statistics.newTotal = readFile(&option.newFile);
	
	doDiff();

	if (option.statistics) {
		int common = statistics.oldTotal - statistics.deleted - statistics.oldChanged;
		printf(_("old: %d words  %d %d%% common  %d %d%% deleted  %d %d%% changed\n"), statistics.oldTotal,
			common, (common * 100)/statistics.oldTotal,
			statistics.deleted, (statistics.deleted * 100) / statistics.oldTotal,
			statistics.oldChanged, (statistics.oldChanged * 100) / statistics.oldTotal);
		common = statistics.newTotal - statistics.added - statistics.newChanged;
		printf(_("new: %d words  %d %d%% common  %d %d%% inserted  %d %d%% changed\n"), statistics.newTotal,
			common, (common * 100)/statistics.newTotal,
			statistics.added, (statistics.added * 100) / statistics.newTotal,
			statistics.newChanged, (statistics.newChanged * 100) / statistics.newTotal);
	}
	return differences;
}
