/*
** Modular Logfile Analyzer
** Copyright 2000 Jan Kneschke <jan@kneschke.de>
**
** Homepage: http://www.modlogan.org
**

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

**
** $Id: parse.c,v 1.14 2003/04/18 18:40:40 ostborn Exp $
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <errno.h>

#include "mlocale.h"
#include "mplugins.h"
#include "mrecord.h"
#include "mdatatypes.h"
#include "misc.h"

#include "plugin_config.h"

const char *short_month[] = {	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
			"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL};

int find_os (mconfig *ext_conf, char *str) {
	config_input *conf = ext_conf->plugin_conf;
	mlist *l = conf->match_os;
	int str_len;
	if (!str || !l) return 0;

	while (*str == ' ') str++;
	
	str_len = strlen(str);

	for (l = conf->match_os; l; l = l->next) {
		mdata *data = l->data;

		if (data && strmatch(data->data.match.match, NULL, str, str_len)) {
			return 1;
		}
	}

	return 0;
}

int find_ua (mconfig *ext_conf, char *str) {
	config_input *conf = ext_conf->plugin_conf;
	mlist *l = conf->match_ua;
	int str_len;
	
	if (!str || !l) return 0;

	while (*str == ' ') str++;
	
	str_len = strlen(str);

	for (l = conf->match_ua; l; l = l->next) {
		mdata *data = l->data;

		if (data && strmatch(data->data.match.match, NULL, str, str_len)) {
			return 1;
		}
	}

	return 0;
}

int parse_timestamp(mconfig *ext_conf, const char *str, mlogrec *record) {
#define N 20 + 1
	int ovector[3 * N], n, i;
	char buf[10];
	struct tm tm;
	config_input *conf = ext_conf->plugin_conf;

	if ((n = pcre_exec(conf->match_timestamp, conf->match_timestamp_extra, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, str);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return -1;
	}

	memset(&tm, 0, sizeof(struct tm));

	/* everything has matched, take the different pieces and be happy :) */
	pcre_copy_substring(str, ovector, n, 1, buf, sizeof(buf));
	tm.tm_mday = strtol(buf, NULL, 10);

	pcre_copy_substring(str, ovector, n, 2, buf, sizeof(buf));
	for (i = 0; short_month[i];i++) {
		if (!strcmp(buf, short_month[i])) {
			tm.tm_mon = i;
		}
	}

	pcre_copy_substring(str, ovector, n, 3, buf, sizeof(buf));
	tm.tm_year = strtol(buf, NULL, 10)-1900;

	pcre_copy_substring(str, ovector, n, 4, buf, sizeof(buf));
	tm.tm_hour = strtol(buf, NULL, 10);
	pcre_copy_substring(str, ovector, n, 5, buf, sizeof(buf));
	tm.tm_min = strtol(buf, NULL, 10);
	pcre_copy_substring(str, ovector, n, 6, buf, sizeof(buf));
	tm.tm_sec = strtol(buf, NULL, 10);

	record->timestamp = mktime (&tm);

	return 0;
#undef  N
}

int parse_useragent(mconfig *ext_conf,const char *str, mlogrec_web_extclf *record) {
/* get user agent */
	char *pc1 = (char *)str, *pc3, *pc2 = (char *)str, *buf_copy;

	buf_copy = malloc(strlen(str)+1);
	strcpy(buf_copy, str);

	str = urlescape((char *)str);

	if ((pc3 = strchr(pc1, '(') )) {
		if (strstr(pc3, "compatible")) {
			int finished = 0;

			pc1 = pc2 = (pc3+1);

			while (!finished) {
				while (*pc2 && !(*pc2 == ';' || *pc2 == ')')) pc2++;
				if (!*pc2) {
					if (ext_conf->debug_level > 0)
						fprintf(stderr, "%s: '%s'\n", _("the 'Useragent' field of the logfile is incorrect"),buf_copy);
					free(buf_copy);
					return -1;
				} else if (*pc2 == ')') {
					finished = 1;
				}

				while (*pc1 == ' ') pc1++;

				*pc2 = '\0';
				if (!record->req_useragent && find_ua(ext_conf, pc1)) {
					buffer_strcpy(record->req_useragent, pc1);
				} else if (!record->req_useros && find_os(ext_conf, pc1)) {
					buffer_strcpy(record->req_useros, pc1);
				}
				pc1 = ++pc2;
			}


		} else {
			int finished = 0;

			pc2 = pc3;

			*pc2 = '\0';

#if 0
			if (!find_ua(ext_conf, pc1)) {
				printf("UA- unknown: %s\n", pc4);
			}
#endif
			buffer_strcpy(record->req_useragent, pc1);

			pc1 = pc2 = (pc3+1);

			while (!finished) {
				while (*pc2 && !(*pc2 == ';' || *pc2 == ')')) pc2++;
				if (!*pc2) {
					if (ext_conf->debug_level > 0)
						fprintf(stderr, "%s: '%s'\n", _("the 'Useragent' field of the logfile is incorrect"),buf_copy);
					free(buf_copy);
					return -1;
				} else if (*pc2 == ')') {
					finished = 1;
				}

				while (*pc1 == ' ') pc1++;

				*pc2 = '\0';


				if (!record->req_useros && find_os(ext_conf, pc1)) {
					buffer_strcpy(record->req_useros, pc1);
				}
				pc1 = ++pc2;
			}
		}

#if 0
		if (!record->req_useragent) {
			printf("UA unknown: %s\n", pc4);
		}

		if (!record->req_useros) {
			printf("OS unknown: %s\n", pc4);
		}
#endif
	} else {
		buffer_strcpy(record->req_useragent, str);
	}

	free(buf_copy);

	return 0;
}

int parse_url(mconfig *ext_conf,const char *str, mlogrec_web *record) {
#define N 20 + 1
	int ovector[3 * N], n;
#ifdef DEBUG_INPUT
	int i;
#endif
	config_input *conf = ext_conf->plugin_conf;
	const char **list;

	if (strcmp("-", str) == 0) {
/* if the url is '-' we don't have to cry about it.
 * if someone knows what a url == '-' is good for, tell me please.
 * doing it this should suppress the warning.
 */
		return -2;
	}

	if ((n = pcre_exec(conf->match_url, conf->match_url_extra, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, str);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return -1;
	}

	if (n >= 3) {
		/* everything has matched, take the different pieces and be happy :) */
		pcre_get_substring_list(str, ovector, n, &list);

		buffer_strcpy(record->req_method, (char *)list[1]);
		buffer_strcpy(record->req_url, (char *)list[2]);

		if (n >= 4) {
			if (strlen((char *)list[4])) {
				buffer_strcpy(record->req_getvars, (char *)list[4]);
			}
		}

		if (n >= 6) {
			buffer_strcpy(record->req_protocol, (char *)list[6]);
		}
#ifdef DEBUG_INPUT
		for (i = 0; i < n ; i++) {
			printf("--> %d: %s\n", i, list[i]);
		}

		fprintf(stderr, "%s.%d: %s, %s, %s, %s\n", __FILE__, __LINE__, record->req_method, record->req_url, record->req_getvars, record->req_protocol);
#endif
		free(list);
	} else {
		fprintf(stderr, "%s.%d: Matched fields below minimum: %d\n", __FILE__, __LINE__, n);
		return -1;
	}

	return 0;
#undef  N
}


int parse_referrer(mconfig *ext_conf,const char *str, mlogrec_web_extclf *record) {
#define N 20 + 1
	int ovector[3 * N], n;
	config_input *conf = ext_conf->plugin_conf;
	const char **list;

	if ((n = pcre_exec(conf->match_referrer, conf->match_referrer_extra, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, str);
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
		}
		return -1;
	}

	if (n >= 2) {
		/* everything has matched, take the different pieces and be happy :) */
		pcre_get_substring_list(str, ovector, n, &list);

		buffer_strcpy(record->ref_url, (char *)list[1]);

		if (n > 3) {
			buffer_strcpy(record->ref_getvars, (char *)list[3]);
		}
#ifdef DEBUG_INPUT
		fprintf(stderr, "%s.%d: %s, %s\n", __FILE__, __LINE__, record->ref_url, record->ref_getvars);
#endif
		free(list);
	} else {
		fprintf(stderr, "%s.%d: Matched fields below minimum: %d\n", __FILE__, __LINE__, n);
		return -1;
	}



	return 0;
#undef  N
}

#define M_NETSCAPE_FIELD_TIMESTAMP		1
#define M_NETSCAPE_FIELD_CLIENT_IP		2
#define M_NETSCAPE_FIELD_USERNAME		3
#define M_NETSCAPE_FIELD_AUTH_USERNAME		4
#define M_NETSCAPE_FIELD_REQUEST		5
#define M_NETSCAPE_FIELD_STATUS			6
#define M_NETSCAPE_FIELD_BYTES_SENT		7

typedef struct {
	char	*field;
	int	id;
	char	*match;
} netscape_field_def;

const netscape_field_def def[] =
	{ { "%Ses->client.ip%",		M_NETSCAPE_FIELD_CLIENT_IP,	"(.+?)"},
	  { "-", 			M_NETSCAPE_FIELD_AUTH_USERNAME,	"(-)"},
	  { "%Req->vars.auth-user%", 	M_NETSCAPE_FIELD_USERNAME,"(.+?)"},
	  { "[%SYSDATE%]",		M_NETSCAPE_FIELD_TIMESTAMP,	"\\[(.+?)\\]"},
	  { "\"%Req->reqpb.clf-request%\"",		M_NETSCAPE_FIELD_REQUEST,	"\"(.+?)\""},
	  { "%Req->srvhdrs.clf-status%",	M_NETSCAPE_FIELD_STATUS, "([0-9]+)"},
	  { "%Req->srvhdrs.content-length%",	M_NETSCAPE_FIELD_BYTES_SENT,"([0-9]+|-)"},

	  { NULL, 0, NULL}
};

int parse_netscape_field_info(mconfig *ext_conf, const char *_buffer) {
	config_input *conf = ext_conf->plugin_conf;
	char *buf, *pa, *pe;
	int pos = 0, i;
	buffer *match_buf;
	const char *errptr;
	int erroffset = 0;

	if (_buffer == NULL) return -1;

	buf = strdup(_buffer);

	for (pa = buf; (pe = strchr(pa, ' ')) != NULL; pa = pe + 1) {
		*pe = '\0';

		for (i = 0; def[i].field != NULL; i++) {
			if (strcmp(def[i].field, pa) == 0) {
				break;
			}
		}

		if (def[i].field != NULL) {
			if (pos >= M_NETSCAPE_MAX_FIELDS) return -1;

			conf->trans_fields[pos++] = i;
		} else {
			fprintf(stderr, "%s.%d: Unknown fieldtype: %s\n", __FILE__, __LINE__, pa);
			free(buf);
			return -1;
		}
	}

	/* don't forget the last param */
	if (*pa) {
		for (i = 0; def[i].field != NULL; i++) {
			if (strcmp(def[i].field, pa) == 0) {
				break;
			}
		}

		if (def[i].field != NULL) {
			if (pos >= M_NETSCAPE_MAX_FIELDS) return -1;
			conf->trans_fields[pos++] = i;
		} else {
			fprintf(stderr, "%s.%d: Unknown fieldtype: %s\n", __FILE__, __LINE__, pa);
			free(buf);
			return -1;
		}
	}

	free(buf);

	match_buf = buffer_init();

	for (i = 0; i < pos; i++) {
		if (match_buf->used == 0) {
			buffer_strcat(match_buf, "^");
		} else {
			buffer_strcat(match_buf, " ");
		}
		buffer_strcat(match_buf, def[conf->trans_fields[i]].match);
	}

	buffer_strcat(match_buf, "$");

	if ((conf->match_clf = pcre_compile(match_buf->ptr,
		0, &errptr, &erroffset, NULL)) == NULL) {

		fprintf(stderr, "%s.%d: rexexp compilation error at %s\n", __FILE__, __LINE__, errptr);
		buffer_free(match_buf);
		return -1;
	}
	buffer_free(match_buf);

	conf->match_clf_extra = pcre_study(conf->match_clf, 0, &errptr);
	if (errptr != NULL) {
		fprintf(stderr, "%s.%d: rexexp studying error at %s\n", __FILE__, __LINE__, errptr);
		return -1;
	}

	return 0;
}

/*
** returns:
** 0  - no error
** -1 - fatal error
** 1  - corrupt record
*/
int parse_record_pcre(mconfig *ext_conf, mlogrec *record, buffer *b) {
#define N 30 + 1
	const char **list;
	int ovector[3 * N], n, i;
	config_input *conf = ext_conf->plugin_conf;
	mlogrec_web *recweb = NULL;
	mlogrec_web_extclf *recext = NULL;

	/* remove the carriage return */
	if (b->ptr[b->used - 1-1] == '\r') {
		b->ptr[b->used - 1-1] = '\0';
		b->used--;
	}

	if (strncmp("format=", b->ptr, 7) == 0) {
		if (parse_netscape_field_info(ext_conf, b->ptr+7) != 0) {
			fprintf(stderr, "%s.%d: failed to parse 'format' header\n", __FILE__, __LINE__);
			return M_RECORD_HARD_ERROR;
		} else {
			return M_RECORD_IGNORED;
		}
	}

/* FIXME: check if format string is already parsed, fail otherwise */

	if (conf->match_clf == NULL) return M_RECORD_HARD_ERROR;
	
	if (record->ext_type != M_RECORD_TYPE_WEB) {
		if (record->ext_type != M_RECORD_TYPE_UNSET) {
			mrecord_free_ext(record);
		}
		
		record->ext_type = M_RECORD_TYPE_WEB;
		record->ext = mrecord_init_web();
	}

	recweb = record->ext;
	
	if (recweb == NULL) return M_RECORD_HARD_ERROR;

	recext = mrecord_init_web_extclf();

	recweb->ext_type = M_RECORD_TYPE_WEB_EXTCLF;
	recweb->ext = recext;

/* parse a CLF record */
	if ((n = pcre_exec(conf->match_clf, conf->match_clf_extra, b->ptr, b->used - 1, 0, 0, ovector, 3 * N)) < 0) {
		if (n == PCRE_ERROR_NOMATCH) {
			fprintf(stderr, "%s.%d: string doesn't match: %s\n", __FILE__, __LINE__, b->ptr);
			return M_RECORD_CORRUPT;
		} else {
			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);
			return M_RECORD_HARD_ERROR;
		}
	}

	if (n == 0) {
		fprintf(stderr, "%s.%d: !REPORT ME! N is too low -> %d\n", __FILE__, __LINE__, N);
		return M_RECORD_HARD_ERROR;
	}

	pcre_get_substring_list(b->ptr, ovector, n, &list);

	for (i = 0; i < n-1; i++) {
		switch (def[conf->trans_fields[i]].id) {
		case M_NETSCAPE_FIELD_TIMESTAMP:
			if (parse_timestamp(ext_conf, (char *)list[i+1], record)) {
				free(list);
				return M_RECORD_CORRUPT;
			}
			break;
		case M_NETSCAPE_FIELD_CLIENT_IP:
			buffer_strcpy(recweb->req_host_ip, (char *)list[i+1]);
			break;
		case M_NETSCAPE_FIELD_USERNAME:
			buffer_strcpy(recweb->req_user, (char *)list[i+1]);
			break;
		case M_NETSCAPE_FIELD_STATUS:
			recweb->req_status = strtol(list[i+1], NULL,10);
			break;
		case M_NETSCAPE_FIELD_BYTES_SENT:
			recweb->xfersize = strtod(list[i+1], NULL);
			break;
		case M_NETSCAPE_FIELD_REQUEST:
			if (parse_url(ext_conf, list[i+1], recweb) == -1) {
				free(list);
				return M_RECORD_CORRUPT;
			}
			break;
		/* no mapping */
		case M_NETSCAPE_FIELD_AUTH_USERNAME:
			break;
		case 255:
			if (ext_conf->debug_level > 2)
				fprintf(stderr, "the field '%s' is known, but not supported yet.\n",def[conf->trans_fields[i]].field);
			break;
		default:
			fprintf(stderr, "the field '%s' is unknown\n", def[conf->trans_fields[i]].field);
			break;
		}
	}

	free(list);

	return M_RECORD_NO_ERROR;
#undef  N
}

int mplugins_input_netscape_get_next_record(mconfig *ext_conf, mlogrec *record) {
	int ret = 0;
	config_input *conf = ext_conf->plugin_conf;

	if (record == NULL) return M_RECORD_HARD_ERROR;

	/* fill the line buffer */
	if (NULL == mgets(&(conf->inputfile), conf->buf)) return M_RECORD_EOF;
	
	ret = parse_record_pcre   (ext_conf, record, conf->buf);
	
	if (ret == M_RECORD_CORRUPT) {
		M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_PARSING, M_DEBUG_LEVEL_WARNINGS,
			 "affected Record: %s\n",
			 conf->buf->ptr
			 );
	}
	return ret;
}
