
// This file is part of the pdr/pdx project.
// Copyright (C) 2010 Torsten Mueller, Bern, Switzerland
//
// This program is free software: you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation, either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include "../libpdrx/common.h"

using namespace std;
using namespace boost;
using namespace boost::posix_time;
using namespace boost::gregorian;
using namespace boost::program_options;
using namespace boost::filesystem;

#include "../libpdrx/datatypes.h"
#include "../libpdrx/xception.h"
#include "../libpdrx/conversions.h"
#include "../libpdrx/config.h"
#include "db.h"
#include "in_impl.h"

#include <Poco/Net/QuotedPrintableDecoder.h>
#include <Poco/Base64Decoder.h>

using namespace Poco;
using namespace Poco::Net;

//=== InputImpl (abstract base class) ======================================
InputImpl::InputImpl (const string& option_key)
	: m_option_key(option_key)
{
}

string InputImpl::ConvertMailSubject (const string& subject)
{
	// a coded mail subject can be a complex thing, it can have a shape
	// like this
	//
	//	coded ascii coded coded coded ascii coded
	//
	// "coded" means an expression like this
	//
	//	=?charset?Q?text?=
	//	=?charset?B?text?=
	//
	// for every of these expressions we must first decode the stuff
	// into human readable text and then convert it's character set
	// into utf-8

	string result;
	{
		static const regex rx_coded_part("=\\?(.+)\\?([QB])\\?(.+)\\?=");

		string s(subject);
		while (!s.empty())
		{
			smatch mr;
			if (regex_search(s, mr, rx_coded_part, match_any))
			{
				result += string(s, 0, mr.position());

				string text(mr[3]);
				string letter(mr[2]);

				// if a coded text contains underlines
				// these underlines mean spaces, real
				// underlines are coded
				for (size_t i = 0; i < text.length(); i++)
				{
					if (text[i] == '_')
						text[i] = ' ';
				}

				// decode
				switch (letter[0])
				{
					case 'B':
					{
						stringstream ss(text);
						Base64Decoder decoder(ss);
						decoder >> text;
						break;
					}
					default: // especially Q
					{
						stringstream ss(text);
						QuotedPrintableDecoder decoder(ss);
						decoder >> text;
						break;
					}
				}

				string charset(mr[1]);
				if (charset.empty())
					charset = "us-ascii";
				else
					to_lower(charset);
				s.erase(0, mr.length());

				if (charset != "utf-8")
					text = Decode(text, SpecificEncoding(charset));

				// damned, some guys even code spaces!
				if (text == " ")
				{
					trim_right(result);
					trim_left(s);
				}

				result += text;
			}
			else
			{
				result += s;
				s.clear();
			}
		}
	}

	return result;
}

ptime InputImpl::Parse (const string& expr, const ptime& timestamp, bool verbose, const set<string>& collections, Database::CollectionsItems& items) throw (Xception)
{
	string line(expr);

	static const regex rx_comment("[;#](.*)$");						// #... ;...
	static const regex rx_datetime("([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2})-([0-9]{1,2})[:]([0-9]{1,2})(?:[:]([0-9]{1,2}))?"); // 2009-02-27-15:27[:59]
	static const regex rx_time("([0-9]{1,2})[:]([0-9]{1,2})(?:[:]([0-9]{1,2}))?");		// 15:27[:59]
	static const regex rx_date("([0-9]{1,4})-([0-9]{1,2})-([0-9]{1,2})");			// 2009-02-27
	static const regex rx_string("\"((?:[^\"]|(?:\"\"))*)\"("RX_COLLECTION_NAME")");	// "..."RX_COLLECTION_NAME
	static const regex rx_ratio("([0-9]+(?:\\.[0-9]+)?)([/%])([0-9]+(?:\\.[0-9]+)?)");	// double[/%]double
	static const regex rx_numeric("([0-9]+(?:\\.[0-9]+)?)("RX_COLLECTION_NAME")?");		// doubleRX_COLLECTION_NAME

	smatch mr;

	// try to find a comment
	string comment;
	if (regex_search(line, mr, rx_comment, boost::match_not_dot_newline))
	{
		comment = mr[1];
		trim(comment);
		line.erase(mr.position(), mr.length());
	}

	// if the expression contains date and/or time components, adapt the
	// timestamp variable
	ptime t(timestamp);
	if (regex_search(line, mr, rx_datetime, boost::match_not_dot_newline))
	{
		int year, month, day, hour, min, sec;
		year = lexical_cast<int>(mr[1]);
		month = lexical_cast<int>(mr[2]);
		day = lexical_cast<int>(mr[3]);
		hour = lexical_cast<int>(mr[4]);
		min = lexical_cast<int>(mr[5]);
		sec = (mr[6].length() > 0) ? lexical_cast<int>(mr[6]) : 0;
		t = ptime(date(year, month, day), time_duration(hour, min, sec, 0));
		line.erase(mr.position(), mr.length());
	}
	else
	{
		if (regex_search(line, mr, rx_time, boost::match_not_dot_newline))
		{
			// time found
			{
				int hour, min, sec;
				hour = lexical_cast<int>(mr[1]);
				min = lexical_cast<int>(mr[2]);
				sec = (mr[3].length() > 0) ? lexical_cast<int>(mr[3]) : 0;
				t -= t.time_of_day();
				t += time_duration(hour, min, sec, 0);
				line.erase(mr.position(), mr.length());
			}

			if (regex_search(line, mr, rx_date, boost::match_not_dot_newline))
			{
				// date found
				int year, month, day;
				year = lexical_cast<int>(mr[1]);
				month = lexical_cast<int>(mr[2]);
				day = lexical_cast<int>(mr[3]);
				t = ptime(date(year, month, day), t.time_of_day());
				line.erase(mr.position(), mr.length());
			}
		}
	}
	if (t == not_a_date_time)
	{
		// date is syntactical correct but contains an invalid
		// point in history, 2010-25-39-84:97 or so
		THROW(format("illegal timestamp in expression: %s") % expr);
	}

	if (verbose)
	{
		// we want to have possible date and time specifications
		// from the original expression only once on a line, so
		// we have to format the output a bit
		string s(line);
		trim(s);
		if (!comment.empty())
			s += string(" ; ") + any_cast<string>(comment);
		s = (format("    [%s] %s") % lexical_cast<string>(t) % s).str();
		encoded::cout << s << endl;
	}

	// now build a data vector for a database insert
	Database::CollectionsItems temp;
	if (!comment.empty())
		temp.insert(Database::CollectionsItems::value_type("#", Database::CollectionItem(t, comment)));

	while (regex_search(line, mr, rx_ratio, boost::match_not_dot_newline))
	{
		string name(mr[2]);
		if (collections.find(name) == collections.end())
			THROW(format("unknown collection: %s") % name);

		temp.insert(Database::CollectionsItems::value_type(name, Database::CollectionItem(t, Ratio(lexical_cast<double>(mr[1]), lexical_cast<double>(mr[3])))));
		line.erase(mr.position(), mr.length());
	}

	while (regex_search(line, mr, rx_numeric, boost::match_not_dot_newline))
	{
		string name(mr[2]);
		if (name.empty())
			name = '*';
		if (collections.find(name) == collections.end())
			THROW(format("unknown collection: %s") % name);

		temp.insert(Database::CollectionsItems::value_type(name, Database::CollectionItem(t, lexical_cast<double>(mr[1]))));
		line.erase(mr.position(), mr.length());
	}

	while (regex_search(line, mr, rx_string, boost::match_not_dot_newline))
	{
		string name(mr[2]);
		if (collections.find(name) == collections.end())
			THROW(format("unknown collection: %s") % name);

		string value(mr[1]);
		replace_all(value, "\"\"", "\"");

		temp.insert(Database::CollectionsItems::value_type(name, Database::CollectionItem(t, value)));
		line.erase(mr.position(), mr.length());
	}

	// last check
	trim(line);
	if (!line.empty())
		THROW(format("expression contains unrecognized characters: %s") % line);

	// everything went fine, take over the items
	foreach(const Database::CollectionsItems::value_type& item, temp)
	{
		items.insert(item);
	}

	return t;
}

//=== FileInputImpl (base class) ===========================================
FileInputImpl::FileInputImpl (const string& option_key, const string& vector_key)
	: InputImpl(option_key)
	, m_vector_key(vector_key)
{
}

void FileInputImpl::Do (const Config& config, Database& database) const throw (Xception)
{
	// get configuration data
	vector<string> filenames;
	bool keep = false;
	{
		if (m_option_key.empty())
		{
			// get all filenames from command line
			filenames = config.GetVectorOption(m_vector_key);
			keep = true;
		}
		else
		{
			// get the configured filename from config file
			const string& f = config.GetStringOption(m_option_key + ".filename");
			if (f.empty())
				THROW(format("missing specification in configuration file: %s.filename") % m_option_key);

			// check if there are wild cards, if so iterate
			// over the files and fill the filenames vector
			if (f.find('*') != string::npos || f.find('?') != string::npos)
			{
				string filemask(path(f).stem().string());
				{
					string extension(path(f).extension().string());
					if (extension != ".")
						filemask += extension;
					replace_all(filemask, ".", "\\.");
					replace_all(filemask, "*", ".*");
					replace_all(filemask, "?", ".");
				}
				const path& dir = path(f).parent_path();
				if (is_directory(dir))
				{
					regex rx(filemask);
					for (directory_iterator I(dir); I != directory_iterator(); I++)
					{
						const path& p = (*I).path();
						if (regex_match(p.filename().string(), rx) && is_regular_file((*I).status()))
							filenames.push_back(p.string());
					}
				}
				else
					THROW(format("invalid path specification in configuration file: %s.filename") % m_option_key);
			}
			else
				filenames.push_back(f);

			keep = config.GetBoolOption(m_option_key + ".keep");
		}
	}

	// process file by file
	foreach (const string& filename, filenames)
	{
		try
		{
			// get collections items
			Database::CollectionsItems items;
			{
				ifstream ifs(filename.c_str(), ios::in);
				if (!ifs.good())
					THROW(format("file not found: %s") % filename);

				bool verbose = config.GetBoolOption("verbose");
				if (verbose)
					encoded::cout << "parsing " << m_vector_key << " file " << filename << endl;

				ProcessFile(config, database, ifs, items);
			}

			// add the contents of the file in a single
			// transaction to the database
			database.AddCollectionsItems(items);

			// cleanup
			if (!keep)
				remove(filename.c_str());
		}
		catch (const Xception& x)
		{
			encoded::cerr << x.Message(Xception::complete) << endl;
		}
	}
}
