/*
    BFilter - a web proxy which removes banner ads
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <iostream>
#include <fstream>
#include <sstream>
#include "lexgen.h"
#include "NfaChar.h"
#include "NfaCharClass.h"
#include "NfaNegatedCharClass.h"
#include "NfaConcatenation.h"
#include "NfaClosure.h"
#include "NfaPositiveClosure.h"
#include "NfaOption.h"
#include "NfaString.h"
#include "NfaInsensitiveString.h"
#include "NfaUnion.h"
#include "NfaEpsilon.h"

#include "XmlLexerDefinitions.h"

int main(int argc, char** argv)
{
	if (argc < 8) {
		std::cerr << "Usage: makelexer <OutputClass> <out.h> <out.cpp> "
			"<Definitions> <definitions.h> <Subclass> <subclass.h>" << std::endl;
		return 1;
	}
	const char* out_class = argv[1];
	const char* out_header = argv[2];
	const char* out_impl = argv[3];
	const char* def_class = argv[4];
	const char* def_header = argv[5];
	const char* subclass = argv[6];
	const char* subclass_header = argv[7];
	
	std::ofstream header_strm(out_header);
	if (header_strm.fail()) {
		std::cerr << "Failed opening " << out_header << " for writing" << std::endl;
	}
	
	std::ofstream impl_strm(out_impl);
	if (impl_strm.fail()) {
		std::cerr << "Failed opening " << out_impl << " for writing" << std::endl;
	}
	
	LexGen lexgen(out_class, subclass);
	lexgen.trackLineCol(true);
	
	NfaNegatedCharClass any;
	NfaEpsilon epsilon;
	NfaCharClass space(" \t\r\n");
	NfaNegatedCharClass not_space(" \t\r\n");
	NfaNegatedCharClass not_lbracket("<");
	
	NfaCharClass alpha('A', 'Z');
	alpha.addChars('a', 'z');
	
	NfaCharClass name_first(alpha);
	name_first.addChars("_:");
	
	NfaCharClass name_char(name_first);
	name_char.addChars('0', '9');
	name_char.addChars("-.");
	
	// <\?[A-Za-z_:].*\?>
	NfaConcatenation proc_inst(NfaString("<?"), name_first);
	proc_inst.addComponent(NfaClosure(any));
	proc_inst.addComponent(NfaString("?>"));
	
	// <!--.*-->
	NfaConcatenation comment(NfaString("<!--"));
	comment.addComponent(NfaClosure(any));
	comment.addComponent(NfaString("-->"));
	
	// <!\[CDATA\[.*\]\]>
	NfaConcatenation cdata(NfaString("<![CDATA["));
	cdata.addComponent(NfaClosure(any));
	cdata.addComponent(NfaString("]]>"));
	
	// <!DOCTYPE\s+[^>\[]*(\[.*\]\s*)?>
	NfaConcatenation doctype(NfaString("<!DOCTYPE"));
	doctype.addComponent(NfaPositiveClosure(space));
	doctype.addComponent(NfaClosure(NfaNegatedCharClass(">[")));
	doctype.addComponent(NfaOption(
		NfaConcatenation(NfaChar('['), NfaClosure(any))
		.addComponent(NfaChar(']')).addComponent(NfaClosure(space))
	));
	doctype.addComponent(NfaChar('>'));
	
	
	typedef XmlLexerDefinitions Defs;
	
	lexgen.addRule(Defs::INITIAL, NfaPositiveClosure(not_lbracket),
		"obj->processTextNode(tok_begin, tok_end);"
	);
	lexgen.addRule(Defs::INITIAL, comment,
		"obj->processCommentNode(tok_begin, tok_end);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, cdata,
		"obj->processCDataNode(tok_begin, tok_end);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, doctype,
		"BEGIN(INITIAL);\n\t"
		"obj->processDocTypeNode(tok_begin, tok_end);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, proc_inst,
		"BEGIN(INITIAL);\n\t"
		"obj->processProcInstNode(tok_begin, tok_end);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, NfaString("</"),
		"BEGIN(CLOSING_TAG_NAME);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, NfaChar('<'), name_first,
		"BEGIN(TAG_NAME);"
	).setLazy();
	lexgen.addRule(Defs::INITIAL, any,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.addRule(Defs::CLOSING_TAG_NAME,
		NfaConcatenation(name_first, NfaClosure(name_char)),
		NfaConcatenation(NfaClosure(space), NfaChar('>')),
		"obj->processClosingTag(tok_begin, tok_end);\n\t"
		"CONSUME_TRAILER(); BEGIN(INITIAL);"
	).setLazy();
	lexgen.addRule(Defs::CLOSING_TAG_NAME, epsilon,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.addRule(Defs::TAG_NAME, NfaClosure(name_char),
		"BEGIN(INSIDE_TAG);\n\t"
		"obj->processOpeningTagName(tok_begin, tok_end);"
	);
	lexgen.addRule(Defs::TAG_NAME, epsilon,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.addRule(Defs::INSIDE_TAG, NfaPositiveClosure(space), name_first,
		"BEGIN(ATTR_NAME);"
	);
	lexgen.addRule(Defs::INSIDE_TAG,
		NfaConcatenation(NfaClosure(space), NfaChar('>')),
		"BEGIN(INITIAL);\n\t"
		"obj->processOpeningTag(false);"
	);
	lexgen.addRule(Defs::INSIDE_TAG,
		NfaConcatenation(NfaClosure(space), NfaString("/>")),
		"BEGIN(INITIAL);\n\t"
		"obj->processOpeningTag(true);"
	);
	lexgen.addRule(Defs::INSIDE_TAG, epsilon,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.addRule(Defs::ATTR_NAME, NfaClosure(name_char),
		"BEGIN(ATTR_EQ);\n\t"
		"obj->processAttrName(tok_begin, tok_end);"
	);
	
	
	lexgen.addRule(Defs::ATTR_EQ,
		NfaConcatenation(
			NfaClosure(space), NfaChar('=')
		).addComponent(NfaClosure(space)),
		"BEGIN(ATTR_VALUE);"
	);
	lexgen.addRule(Defs::ATTR_EQ, epsilon,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.addRule(Defs::ATTR_VALUE,
		NfaConcatenation(
			NfaChar('"'), NfaClosure(NfaNegatedCharClass("\""))
		).addComponent(NfaChar('"')),
		"BEGIN(INSIDE_TAG);\n\t"
		"obj->processAttrValueWithQuotes(tok_begin, tok_end);"
	);
	lexgen.addRule(Defs::ATTR_VALUE,
		NfaConcatenation(
			NfaChar('\''), NfaClosure(NfaNegatedCharClass("'"))
		).addComponent(NfaChar('\'')),
		"BEGIN(INSIDE_TAG);\n\t"
		"obj->processAttrValueWithQuotes(tok_begin, tok_end);"
	);
	lexgen.addRule(Defs::ATTR_VALUE, epsilon,
		"ABORT();\n\t"
		"obj->processParseError(obj->tokenPosition(tok_begin));"
	);
	
	
	lexgen.writeLexer(header_strm, impl_strm, def_class, def_header, subclass_header);
	return 0;
}

