/*
** Modular Logfile Analyzer
** Copyright 2000 Jan Kneschke <jan@kneschke.de>
**
** Homepage: http://www.modlogan.org
**

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

**
** $Id: plugin_config.c,v 1.41 2003/05/15 13:04:04 miham Exp $
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>

#include "mlocale.h"
#include "mplugins.h"
#include "mrecord.h"
#include "mdatatypes.h"
#include "misc.h"

#include "plugin_config.h"

int mplugins_input_clf_dlinit(mconfig *ext_conf) {
	config_input *conf = NULL;
#ifdef USE_REGEX_VERSIONS
	const char *errptr;
	int erroffset = 0;
#endif
	int i;

	if (0 != strcmp(ext_conf->version, VERSION)) {
		M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "version string doesn't match: (mla) %s != (plugin) %s\n", ext_conf->version, VERSION);
		return -1;
	}

	conf = malloc(sizeof(config_input));
	memset(conf, 0, sizeof(config_input));

	conf->match_useragent = mlist_init();
	conf->record_list = mlist_init();
	conf->inputfilename = NULL;
	conf->dont_strip_hostname = 0;

	/* will be replaced by setdefaults if we have a real inputfilename */
	conf->buf = buffer_init();
	
	for (i = 0; i < UA_CACHE_SIZE; i++) {
		conf->ua_cache[i].timestamp = 0;
		conf->ua_cache[i].key = NULL;
		conf->ua_cache[i].ua_os = NULL;
	}

	conf->match_clf = NULL;
	conf->match_clf_squid = NULL;
	conf->match_clf_extended = NULL;
	conf->match_clf_extra = NULL;
	conf->match_clf_squid_extra = NULL;
	conf->match_clf_extended_extra = NULL;

	ext_conf->plugin_conf = conf;

	return 0;
}

int mplugins_input_clf_dlclose(mconfig *ext_conf) {
	config_input *conf = ext_conf->plugin_conf;
	int i;

	if (conf->inputfilename)
		free(conf->inputfilename);

	mclose(&(conf->inputfile));

	mlist_free(conf->match_useragent);
	mlist_free(conf->record_list);

	if (conf->match_clf) pcre_free(conf->match_clf);

	buffer_free(conf->buf);
	if (conf->format) free(conf->format);
	
	for (i = 0; i < UA_CACHE_SIZE; i++) {
		if (conf->ua_cache[i].key) free(conf->ua_cache[i].key);
		if (conf->ua_cache[i].ua_os) free(conf->ua_cache[i].ua_os);
	}

	free(ext_conf->plugin_conf);
	ext_conf->plugin_conf = NULL;

	return 0;
}

int mplugins_input_clf_parse_config(mconfig *ext_conf, const char *filename, const char *section) {
	config_input *conf = ext_conf->plugin_conf;

	const mconfig_values config_values[] = {
		{"match_useragent",	M_CONFIG_TYPE_SUBSTITUTE,	M_CONFIG_VALUE_APPEND, &(conf->match_useragent)},
		{"inputfile",	M_CONFIG_TYPE_STRING,	M_CONFIG_VALUE_OVERWRITE, &(conf->inputfilename)},
		{"format",	M_CONFIG_TYPE_STRING,	M_CONFIG_VALUE_OVERWRITE, &(conf->format)},
		{"dont_strip_hostname",	M_CONFIG_TYPE_INT, 	M_CONFIG_VALUE_OVERWRITE, (&conf->dont_strip_hostname)},

		{NULL, M_CONFIG_TYPE_INT, 0, NULL}
	};

	return mconfig_parse_section(ext_conf, filename, section, config_values);
}

typedef struct {
	char	*field;
	int	id;
	char	*match;
} clf_field_def;

static const clf_field_def def[] =
/*
  %a	Remote IP-address		- handled
  %A	Local IP-address		- not handled
  %b	Bytes sent, excluding HTTP headers. - handled
  %B	Bytes sent, excluding HTTP headers. - not handled
  %{FOOBAR}e   				- not handled
  	The contents of the environment
  	variable FOOBAR
  %f	Filename			- not handled
  %h	Remote host			- handled
  %H	The request protocol		- not handled
  %{FOOBAR}i				- partly handled
  	The contents of Foobar: header
  	line(s) in the request sent to
  	the server
  %l	Remote logname 			- known, but not handled
  	(from identd, if supplied)
  %m	The request method
  %{Foobar}n
  	The contents of note "Foobar"	- not handled
  	from another module.
  %{Foobar}o
  	The contents of Foobar: header	- not handled
  	line(s) in the reply.
  %p	The canonical Port of the 	- handled
  	server serving the request
  %P	The process ID of the child 	- not handled
  	that serviced the request.
  %q	The query string (prepended 	- not handled
  	with a ? if a query string
  	exists otherwise an empty
  	string)
  %r	First line of request		- handled
  %s	Status.  For requests that got  - only '%>s' is handled
  	internally redirected, this is
  	the status of the *original*
  	request --- %...>s for the last.
  %t	Time, in common log format time - handled
  	format (standard english format)
  %{format}t				- not handled
  	The time, in the form given by
  	format, which should be in
  	strftime(3) format.
  	(potentially localised)
  %T	The time taken to serve the 	- not handled
  	request, in seconds.
  %u	Remote user (from auth; may be  - handled
  	bogus if return status (%s)
  	is 401)
  %U	The URL path requested.		- handled
  %v	The canonical ServerName of the - handled
  	server serving the request.
  %V	The server name according to	- handled
  	the UseCanonicalName setting.
*/
{ { "%h",		M_CLF_FIELD_REQ_HOST,	"(.+?)"},
	{ "%l",		M_CLF_FIELD_AUTH_USERNAME, "(.+?)"},
	{ "%u",		M_CLF_FIELD_USERNAME,	"(.+?)"},
	{ "%t", 	M_CLF_FIELD_TIMESTAMP,	"\\[(.+?)\\]"},
	{ "%r",	        M_CLF_FIELD_REQUEST,	"(.+?)"},
	{ "%>s",	M_CLF_FIELD_STATUS,	"([0-9]{1,3})"},
	{ "%b",		M_CLF_FIELD_BYTES_SEND,	"([-0-9]+)"},
	{ "%{User-Agent}i",	M_CLF_FIELD_USER_AGENT, "(.*?)"},
	{ "%{Referer}i",	M_CLF_FIELD_REFERRER,	"(.*?)"},
	{ "%T",		M_CLF_FIELD_DURATION,	"([-0-9]+)"},
	{ "%v",		M_CLF_FIELD_SERVER_IP,	"(.+)"},
	{ "%V",		M_CLF_FIELD_SERVER_IP,	"((?i)[a-z0-9][-.a-z0-9]+[a-z0-9]\\.[a-z0-9]{2,4})"},
	{ "%p",		M_CLF_FIELD_SERVER_PORT, "([-0-9]+)"},
	{ "%a",		M_CLF_FIELD_REMOTE_IP,	"([.0-9]+)"},
	{ "%c",		M_CLF_FIELD_CONNECTION_STATUS,	"([-+X])"},

	{ NULL, 0, NULL}
};


int parse_clf_field_info(mconfig *ext_conf, const char *logformat) {
	config_input *conf = ext_conf->plugin_conf;
	const char *s;
	const char *errptr;
	int erroffset = 0;
	enum {KEY, NOKEY} state = NOKEY;
	enum {KEY_PLAIN, KEY_BRACES} sub_state = KEY_PLAIN;

	char buf[255] = "";
	char regex_buf[1024] = "^";
	int buf_i = 0, i = 0, pos = 0;


	/* %[a-z] is a key
	 * %{.*}[a-z] is a key
	 * the rest are character that are static
	 */

	for (s = logformat; *s; s++) {
		switch (state) {
		case NOKEY:
			if (*s == '%') {
				buf[buf_i] = '\0';
#if 0
				fprintf(stderr, "non-key: %s\n", buf);
#endif
				strcat(regex_buf, buf);

				/* start the key */
				state = KEY;
				buf_i = 0;
				buf[buf_i++] = *s;
			} else {
				if (*s == '.' ||
				    *s == '(' || *s == ')' ||
				    *s == '[' || *s == ']') {
					buf[buf_i++] = '\\';
				}
				buf[buf_i++] = *s;
			}
			break;
		case KEY:
			switch (sub_state) {
			case KEY_PLAIN:
				if ((*s >= 'a' && *s <= 'z') ||
				    (*s >= 'A' && *s <= 'Z')) {
					/* finish key */

					buf[buf_i++] = *s;
					buf[buf_i] = '\0';
#if 0
					fprintf(stderr, "key: %s\n", buf);
#endif
					for (i = 0; def[i].field != NULL; i++) {
						if (strncmp(def[i].field, buf, buf_i) == 0) {
							break;
						}
					}

					if (def[i].field) {
						if (pos >= M_CLF_MAX_FIELDS) {
							fprintf(stderr, "pos >= M_CLF_MAX_FIELDS\n");

							return -1;
						}
						/* set field type */
						conf->trans_fields[pos++] = def[i].id;

						strcat(regex_buf, def[i].match);
					} else {
						conf->trans_fields[pos++] = M_CLF_FIELD_UNSET;

						strcat(regex_buf, "([-_:0-9a-zA-Z]+)");

						M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
							 "Unknown fieldtype: %s\n", buf);
					}

					/* set the new non-key buf */

					buf_i = 0;

					state = NOKEY;
				} else if ( *s == '>' ) {
					buf[buf_i++] = *s;
				} else if (*s == '{') {
					buf[buf_i++] = *s;
					sub_state = KEY_BRACES;
				} else {
					fprintf(stderr, "character not allowed outside of {...}: %c\n", *s);
					return -1;
				}
				break;
			case KEY_BRACES:
				if ((*s >= 'a' && *s <= 'z') ||
				    (*s >= 'A' && *s <= 'Z') ||
				    (*s >= '0' && *s <= '9') ||
				    *s == '_' || *s == '-') {
					buf[buf_i++] = *s;
				} else if (*s == '}') {
					buf[buf_i++] = *s;
					sub_state = KEY_PLAIN;
				} else {
					fprintf(stderr, "character not allowed between {...}: %c\n", *s);
					return -1;
				}
				break;
			}
			break;
		}
	}

	buf[buf_i] = '\0';
	strcat(regex_buf, buf);
	strcat(regex_buf, "$");

	fprintf(stderr, "regex_buf: %s\n", regex_buf);

	M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE,
		"match = %s\n", regex_buf);

	if ((conf->match_clf = pcre_compile(regex_buf,
		0, &errptr, &erroffset, NULL)) == NULL) {

		M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "regexp compilation error at %s\n", errptr);

		return -1;
	}

	conf->match_clf_extra = pcre_study(conf->match_clf, 0, &errptr);
	if (errptr != NULL) {
		M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
			 "regexp studying error at %s\n", errptr);
		return -1;
	}
	return 0;
}


int mplugins_input_clf_set_defaults(mconfig *ext_conf) {
	config_input *conf = ext_conf->plugin_conf;

	if (conf->inputfilename && strcmp(conf->inputfilename, "-") != 0) {
		if (mopen(&(conf->inputfile), conf->inputfilename)) {
			M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "%s: %s\n", conf->inputfilename, strerror(errno));
			return -1;
		}
		M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE,
			 "(clf) using %s as inputfile\n", conf->inputfilename);
	} else {
		/* stdin */
		if (mopen(&(conf->inputfile), NULL)) {
			M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "%s: %s\n", conf->inputfilename, strerror(errno));
			return -1;
		}
		
		M_DEBUG0(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE,
			 "(clf) using (stdin) as inputfile\n");
	}

	if (!conf->format) {
		const char *errptr;
		int erroffset = 0;
		if ((conf->match_clf = pcre_compile(
			"^(.*?) (.*?) (.*?) \\[(.*?)\\] \"(.*?)\" ([-0-9]{1,3}) ([-0-9]+)( \"(.*?)\" \"(.*?)\"| ([A-Z:_]+?)|)\\s*$",
			0, &errptr, &erroffset, NULL)) == NULL) {

			M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS,
				 "regexp compilation error at %s\n", errptr);
			return -1;
		}
	} else {
		if (parse_clf_field_info(ext_conf, conf->format)) {
			return -1;
		}
	}

	if (!conf->dont_strip_hostname) {
		conf->dont_strip_hostname = 0;
	}
	
	return 0;
}

int mplugins_init(mplugin *func) {
	func->dlinit = mplugins_input_clf_dlinit;
	func->dlclose = mplugins_input_clf_dlclose;
	func->parse_config = mplugins_input_clf_parse_config;
	func->set_defaults = mplugins_input_clf_set_defaults;
	func->get_next_record = mplugins_input_clf_get_next_record;
	func->insert_record = NULL;
	func->gen_report = NULL;
        func->gen_history = NULL;

	return 0;
}
