/*
 * Copyright (c) 2004 Marc Balmer <marc@msys.ch>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <sys/stat.h>
#include <signal.h>
#include <unistd.h>
#include <ctype.h>
#include <netdb.h>
#include <err.h>
#include <syslog.h>
#include <regex.h>

#include "smtp-vilter.h"

#ifdef LINUX
#include <sys/param.h>
#include "strlfunc.h"
#include "sys/queue.h"
#else
#include <sys/queue.h>
#include <sys/syslimits.h>
#endif

#define	ATTACHMENT_CONF	"/etc/smtp-vilter/attachment.conf"

#define TIMEOUT 	60
#define MAXBUF		1024
#define MAXSTR		64
#define MAXTRIES	256

char *attachment_notification;

SLIST_HEAD(, part) dhead;
SLIST_HEAD(argshead, arg);
SLIST_HEAD(pat_head, pattern) fname_pats;
struct pat_head ctype_pats;

struct part {
	char *start;
	char *end;
	int skip;
	SLIST_ENTRY(part) parts;
};

struct arg {
	char *name;
	char *value;
	int name_len;
	int value_len;
	SLIST_ENTRY(arg) args;
};

struct pattern {
	char *pat;
	regex_t preg;
	SLIST_ENTRY(pattern) pats;
};

#ifdef FUTURE
char *base64_chars = 
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";

static char
find_base64(char c)
{
	int r;
	
	for (r = 0; base64_chars[r] != '\0' && base64_chars[r] != c; r++)
		;
	return (char) r; 
}

static int
is_base64(unsigned char c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}
#endif /* FUTURE */

static struct pattern *
compile_pattern(char *value, int cflags)
{
	struct pattern *p;
	int error;
	
	p = malloc(sizeof(struct pattern));
	if (p == NULL)
		err(1, "attachment: out of memory");
	bzero(p, sizeof(struct pattern));
	p->pat = strdup(value);
	if (p->pat == NULL)
		err(1, "attachment: out of memory");
	if ((error = regcomp(&p->preg, p->pat, cflags)) != 0) {
		warnx("attachment: error in regular expression %s", p->pat);
		switch (error) {
			case REG_NOMATCH:
				warnx("regexec() failed to match");
				break;
			case REG_BADPAT:
				warnx("invalid regular expression");
				break;
			case REG_ECOLLATE:
				warnx("invalid collating element");
				break;
			case REG_ECTYPE:
				warnx("invalid character class");
				break;
			case REG_EESCAPE:
				warnx("\\ applied to unescapable character");
				break;
			case REG_ESUBREG:
				warnx("invalid backreference number");
				break;
			case REG_EBRACK:
				warnx("brackets [ ] not balanced");
				break;
			case REG_EPAREN:
				warnx("parentheses ( ) not balanced");
				break;
			case REG_EBRACE:
				warnx("braces { } not balanced");
				break;
			case REG_BADBR:
				warnx("invalid repetition count(s) in { }");
				break;
			case REG_ERANGE:
				warnx("invalid character range in [ ]");
				break;
			case REG_ESPACE:
				warnx("ran out of memory");
				break;
			case REG_BADRPT:
				warnx("?, *, or + operand invalid");
				break;
#ifndef LINUX
			case REG_EMPTY:
				warnx("empty (sub)expression");
				break;
			case REG_ASSERT:
				warnx("``can't happen'' --you found a bug");
				break;
			case REG_INVARG:
				warnx("invalid argument, e.g., negative-length string");
#endif
				break;
		}
		err(1, "attachment: error compiling regular expression %s, error code %d", p->pat, error);
	}

	return p;
}

int
vilter_init(char *cfgfile)
{
	FILE *fp;
	char field[MAXSTR];
	char value[MAXSTR];
	struct pattern *p;
	int cflags = REG_EXTENDED | REG_ICASE | REG_NOSUB;
	attachment_notification = NULL;

#ifdef FUTURE
	warnx("future features are enabled");
#endif
	
	SLIST_INIT(&fname_pats);
	SLIST_INIT(&ctype_pats);
	
	if (cfgfile == NULL)
		cfgfile = ATTACHMENT_CONF;
		
	if ((fp = fopen(cfgfile, "r")) != NULL) {
		if (verbose)
			warnx("attachment: using configuration from file %s", cfgfile);
			
		while (!read_conf(fp, field, sizeof(field), value, sizeof(value))) {
			if (!strcmp(field, "attachment-notification")) {
				attachment_notification = strdup(value);
			} else if (!strcmp(field, "unwanted-content-type")) {
				p = compile_pattern(value, cflags);
				SLIST_INSERT_HEAD(&ctype_pats, p, pats);
				
				if (verbose)
					warnx("attachment: adding unwanted content-type pattern %s", value);
			} else if (!strcmp(field, "unwanted-filename")) {
				p = compile_pattern(value, cflags);
				SLIST_INSERT_HEAD(&fname_pats, p, pats);
				
				if (verbose)
					warnx("attachment: adding unwanted filename pattern %s", value);
			} else if (!strcmp(field, "case-sensitive")) {
				if (!strcmp(value, "true"))
					cflags &= ~REG_ICASE;
				else if (!strcmp(value, "false"))
					cflags |= REG_ICASE;
				else
					warnx("attachment: unknown argument to case-sensitive command, %s", value);
			} else if (verbose)
				warnx("attachment: unknown command %s=%s", field, value);
		}
		fclose(fp);
	} else if (verbose)
		warnx("attachment: configuration file %s for attachment backend not found, using default values", cfgfile);

	return 0;
}

char *
sol(char *p, char *eom)
{
	/* Find a CR */
	
	while (p < eom && *p != '\r')
		++p;
	
	/* Skip a LF, if present */
	
	if (p < eom && *p == '\r') {
		++p;
		
		if (p < eom && *p == '\n') {
			++p;
		}
	}
	
	return (p == eom) ? NULL : p++;
}

int
mstrlen(const char *p, const char *eom)
{
	const char *q = p;
	
	while (q < eom && *q != '\r')
		++q;
		
	return q - p;
}

const char *
mstrchr(const char *p, int c, const char *eom)
{
	const char *q = p;

	while (q < eom && *q != c)
		++q;
		
	return *q == c ? q : NULL;
	
}

void
mprint(char *p, int count, char *eom)
{
	while (count-- && p < eom)
		putchar(*p++);
	printf("\n");
}

void
mputs(const char *p, const char *eom)
{
	const char *q = p;
	
	putchar ('"');
	while (q < eom && *q != '\r')
		putchar(*q++);
	putchar ('"');

	printf("\n");
}


	
char *
analyze_header(struct be_data *priv, char *hdr, char *eom, int *name_len, struct argshead *arguments)
{
	char *value;
	int value_len;
	
	char *p;
	struct arg *a, *la = NULL;
	char *eov;
	
	/* warnx("analyze_header()"); */
	
	for (p = hdr, *name_len = 0; p < eom && *p != '\r' && *p != ':'; ++*name_len, ++p)
		;
	
	if (*p == '\r' || p == eom)
		return ++p;
	
	for (value = ++p; value < eom && (*value == ' ' || *value == '\t'); ++value)
		;
		
	p = value;
	value_len = -1;
	do {
		++value_len;
		
		while (p < eom && *p != '\r') {
			++value_len;
			++p;
		}
		if (p < eom && *p == '\r') {
			p++;	/* Skip LF */
			++value_len;
		}
		if (p < eom)
			p++;	/* Position at first char of next line */
		
	} while ((p < eom) && (*p == ' ' || *p == '\t'));
		
	/* Now split the value part */
	
	p = value;
	eov = p + value_len;
	
	do {
		
		/* Skip whitespace */
		
		while (p < eov && (*p == ' ' || *p == '\t'))
			++p;
			
		a = (struct arg *) malloc(sizeof(struct arg));
		
		if (a == NULL) {
			warnx("memory allocation failure");
			syslog_r(LOG_ERR, priv->sdata, "attachment: unable to allocate memory for header, %m");
		}
		bzero(a, sizeof(struct arg));
		
		a->name = p;
		
		while (p < eov && *p != '=' && *p != ';' && *p != '\n' && *p != '\r') {
			++p;
			++a->name_len;
		}
		
		if (p < eov && *p == '=') {
			++p;
			
			if (p < eov && *p == '"')
				++p;
				
			a->value = p;
				
			while (p < eov && *p != '"' && *p != ';' && *p != '\r' && *p != '\n') {
				++p;
				++a->value_len;
			}
		}		
		if (p < eov && *p == ';')
			++p;
		if (p < eov)
			++p;
			
		if (la == NULL) {
			SLIST_INSERT_HEAD(arguments, a, args);
		} else {
			SLIST_INSERT_AFTER(la, a, args);
		}
		
		la = a;
	} while (p < eov);

	if (*eov == '\r')
		++eov;
						
	return eov == eom ? NULL : eov + 1;	/* Evt. 1 weglassen */
}


int
analyze_part(struct be_data *priv, struct part *p, char *eom, char *pattern, int patlen)
{
	int inhdr = 1;
	char *m;
	char *body;
	char *hdr;
	int name_len;
	struct argshead arguments;
	struct arg *a;
	char *filename, *ctype;
	struct pattern *pat;
	int skip = 0;
	int base64 = 0;
#ifdef FUTURE
	char encoded[3], decoded[3];
#endif
	/* warnx("analyze_part()"); */
	
	m = p->start;
	do {
		
		if (mstrlen(m, p->end) == 0) {
			/* warnx("eoh"); */
			inhdr = 0;
			
			body = sol(m, eom);
#ifdef FUTURE			
			if (base64) {
				if (body < (eom - 3)) {	/* Need at least 3 bytes */
					warnx("first two bytes of message body are %c %c", *body, *(body + 1));
					encoded[0] = find_base64(*body);
					encoded[1] = find_base64(*(body + 1));
					encoded[2] = find_base64(*(body + 2));

					decoded[0] = (encoded[0] << 2) + ((encoded[1] & 0x30) >> 4);
					decoded[1] = ((encoded[1] & 0xf) << 4) + ((encoded[2] & 0x3c) >> 2);
					decoded[2] = ((encoded[2] & 0x3) << 6) + encoded[3];

					warnx("%c%c%c = %02x %02x", body[0], body[1], body[2], decoded[0], decoded[1]);

					if (decoded[0] == 0xff && decoded[1] == 0xd8) {
						warnx("found a jpeg image");
						p->skip = skip = 1;
						strncpy(pattern, "JPEG IMAGE", patlen);
					}
				}
			}
#endif
		} else {
			
			SLIST_INIT(&arguments);
			hdr = m;
			
			m = analyze_header(priv, hdr, eom, &name_len, &arguments);
			/* mprint(hdr, name_len, eom); */
			
			if (name_len >= strlen("Content-Type") && !strncmp(hdr, "Content-Type", strlen("Content-Type"))) {
				a = SLIST_FIRST(&arguments);
				
				if (a->name_len > 0) {
					ctype = malloc(a->name_len + 1);

					if (ctype == NULL) {
						syslog_r(LOG_ERR, priv->sdata, "attachment: unable to allocate memory for content-type, %m");
					}
					strncpy(ctype, a->name, a->name_len);
					ctype[a->name_len] = 0;
					/* warnx("Content-Type: %s", ctype); */

					SLIST_FOREACH(pat, &ctype_pats, pats) {
						/* warnx("checking against %s", pat->pat); */
						if (regexec(&pat->preg, ctype, 0, NULL, 0) == 0) {
							p->skip = skip = 1;
							strlcpy(pattern, "content-type pattern: ", patlen);
							strlcat(pattern, pat->pat, patlen);
							if (verbose)
								warnx("content-type pattern %s matches content type %s", pat->pat, ctype);
							/* warnx("reason: %s", pattern); */
						}
					}
					free(ctype);
				} 
			} else if (name_len >= strlen("Content-Transfer-Encoding") && !strncmp(hdr, "Content-Transfer-Encoding", strlen("Content-Transfer-Encoding"))) {
				SLIST_FOREACH(a, &arguments, args) {
										
					if (a->name_len >= strlen("base64") && !strncmp(a->name, "base64", strlen("base64")))
					base64 = 1;

				}
			} else if (name_len >= strlen("Content-Disposition") && !strncmp(hdr, "Content-Disposition", strlen("Content-Disposition"))) {
				;
			}
			
		
			SLIST_FOREACH(a, &arguments, args) {
										
				if (a->name_len >= strlen("filename") && !strncmp(a->name, "filename", strlen("filename")) && a->value != NULL) {
					filename = malloc(a->value_len + 1);
					
					if (filename == NULL) {
						syslog_r(LOG_ERR, priv->sdata, "attachment: unable to allocate memory for filename, %m");
					}
					strncpy(filename, a->value, a->value_len);
					filename[a->value_len] = 0;
					SLIST_FOREACH(pat, &fname_pats, pats) {
						if (regexec(&pat->preg, filename, 0, NULL, 0) == 0) {
							p->skip = skip = 1;
							strlcpy(pattern, "filname pattern: ", patlen);
							strlcat(pattern, pat->pat, patlen);
							if (verbose)
								warnx("filename pattern %s matches filename %s", pat->pat, filename);
						}
					}
					free(filename);
				} 
			}
			
			while (!SLIST_EMPTY(&arguments)) {
				a = SLIST_FIRST(&arguments);
				SLIST_REMOVE_HEAD(&arguments, args);
				free(a);
			}
		}		
	} while (m != NULL && inhdr);

	return skip;	
}

int
vilter_scan(struct be_data *priv, char *fn, char *chroot, char *virus, int namelen)
{
	char *msg;
	int fd;
	struct stat statbuf;
	struct part *p = NULL;
	struct part *lp = NULL;
	int lines = 0;
	char *m, *q;
	char *som;	/* Start of message, points to the first byte of the mmapped file */
	char *eom;	/* End of message, points to the last byte of the mmapped file */
	char *body;
	char *boundary = NULL;
	int boundary_len = 0;
	long size;
	int inhdr = 1;
	int skip = 0;
	char newfn[PATH_MAX];
	int newfd;
	FILE *fp;
	char buf[1024];
	int nread;

	if (SLIST_EMPTY(&fname_pats) && SLIST_EMPTY(&ctype_pats)) {
		return SCAN_OK;
	}
		
	if (stat(fn, &statbuf)) {
		syslog_r(LOG_ERR, priv->sdata, "attachment: can't stat file to scan (%s %s), %m", chroot, fn);
		return SCAN_ERROR;
	}	
	
	if ((fd = open(fn, O_RDONLY, 0)) == -1) {
		syslog_r(LOG_ERR, priv->sdata, "attachment: can't open file to scan, %m");
		return SCAN_ERROR;
	}
	
	if ((msg = mmap(0, statbuf.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, (off_t) 0L)) == MAP_FAILED) {
		syslog_r(LOG_ERR, priv->sdata, "attachment: can't mmap file, %m");
		close(fd);
		return SCAN_ERROR;
	}
	
	SLIST_INIT(&dhead);
	
	size = (long) statbuf.st_size;
	som = msg;
	eom = som + size - 1;	/* eom points to the last byte */
	
	m = som;
	
	do {
		if (inhdr == 1) {
			if (strncmp(m, "Content-Type: ", strlen("Content-Type: ")) == 0) {
				for (q = m; q < eom && isspace(*q) == 0; q++)
					;
				while (q < eom && isspace(*q))
					q++;

				if (strncmp(q, "multipart", strlen("multipart")) == 0) {
					
					boundary = q;
					while (boundary < (eom - strlen("boundary=")) && strncmp(boundary, "boundary=", strlen("boundary="))) {
						boundary++;
					}
					while (boundary < eom && *boundary != '=')
						++boundary;
					if (boundary < eom)
						++boundary;
					
					if (boundary < eom && *boundary == '"')
						++boundary;
					
					/* See RFC-2046 for characters allowed in a boundary */
					
					for (q = boundary, boundary_len = 0; q < eom && (
						isalpha(*q) || isdigit(*q) ||
						*q == '\'' || *q == '(' || *q == ')' ||
						*q == '+' || *q == '_' || *q == ',' ||
						*q == '-' || *q == '.' || *q == '/' ||
						*q == ':' || *q == '=' || *q == '?'); q++, boundary_len++)
						;
				}
				
			}
		} else if (boundary != NULL) {
			if (mstrlen(m, eom) >= boundary_len + 2) {
				if (!strncmp(m, "--", 2)) {
					if (!strncmp(m + 2, boundary, boundary_len)) {

						/*
						 * p->end points to the last byte of the part, so
						 * to write the complete part, one has to write
						 * p->end - p->start + 1 bytes.
						 */
						 
						if (p != NULL) {
							p->end = m - 1;
							lp = p;
						}
						
						if ((mstrlen(m, eom) >= boundary_len + 4) &&
							!strncmp(m + 2 + boundary_len, "--", 2)) {
								/* end of message */ ;
						} else {
							if ((p = malloc(sizeof(struct part))) != NULL) {
								bzero(p, sizeof(struct part));

								if ((p->start = sol(m, eom)) != NULL) {
									if (lp == NULL)
										SLIST_INSERT_HEAD(&dhead, p, parts);
									else
										SLIST_INSERT_AFTER(lp, p, parts);
								} else {
									free(p);
									p = NULL;
								}
							} else
								syslog_r(LOG_ERR, priv->sdata, "memory allocation error, not all message parts will be written");
						}
					}
				}
			}
		}
		
		++lines;
		
		if (mstrlen(m, eom) == 0 && inhdr == 1) {
			inhdr = 0;
			body = sol(m, eom);
		}
	
		m = sol(m, eom);
	} while (m != NULL);
	
	if (p != NULL) {
		if (p->end == 0)
			p->end = eom;
	}

	SLIST_FOREACH(p, &dhead, parts) {
		skip += analyze_part(priv, p, eom, virus, namelen);
	}
	
	if (verbose)
		warnx("will skip %d attachments", skip);
	
	if (skip > 0) {
		tmpnam(newfn);
		newfd = open(newfn, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
		write(newfd, "This is a multi-part message in MIME format.\r\n\r\n", strlen("This is a multi-part message in MIME format.\r\n\r\n"));
		write(newfd, "--", 2);
		write(newfd, boundary, boundary_len);
		write(newfd, "\r\n", 2);
		write(newfd, "Content-Type: text/plain\r\n\r\n", strlen("Content-Type: text/plain\r\n\r\n"));
		
		if (attachment_notification != NULL && (fp = fopen(attachment_notification, "r")) != NULL) {
			while ((nread = fread(buf, 1, sizeof(buf), fp))) {
				write(newfd, buf, nread);
			}
			fclose(fp);
		} else
			write(newfd, "Vaporized an unwelcome attachment\r\n\r\n", strlen("Vaporized an unwelcome attachment\r\n\r\n"));
				
		SLIST_FOREACH(p, &dhead, parts) {
			if (p->skip == 0 && (p->end - p->start) > 0) {
				write(newfd, "--", 2);
				write(newfd, boundary, boundary_len);
				write(newfd, "\r\n", 2);
				write(newfd, p->start, p->end - p->start + 1);
			} 
		}
		write(newfd, "--", 2);
		write(newfd, boundary, boundary_len);
		write(newfd, "--\r\n", 4);
		close(newfd);
		strcpy(fn, newfn);
	}
	
	if (munmap(msg, statbuf.st_size)) {
		syslog_r(LOG_ERR, priv->sdata, "attachment: can't munmap file, %m");
		/* memory leak */
		return SCAN_ERROR;
	}
	
	if (close(fd)) {
		syslog_r(LOG_ERR, priv->sdata, "attachment: can't close file, %m");
		/* memory leak */
		return SCAN_ERROR;
	}
	
	while (!SLIST_EMPTY(&dhead)) {
		p = SLIST_FIRST(&dhead);
		SLIST_REMOVE_HEAD(&dhead, parts);
		free(p);
	}
	
	return SCAN_OK;
}

char *
vilter_name(void)
{
	return "Attachment Filter (attachment)";
}

int
vilter_type(void)
{
	return SCAN_UNWANTED;
}

void
vilter_exit(void)
{
	struct pattern *p;
	
	while (!SLIST_EMPTY(&fname_pats)) {
		p = SLIST_FIRST(&fname_pats);
		SLIST_REMOVE_HEAD(&fname_pats, pats);
		regfree(&p->preg);
		free(p->pat);
		free(p);
	}

	while (!SLIST_EMPTY(&ctype_pats)) {
		p = SLIST_FIRST(&ctype_pats);
		SLIST_REMOVE_HEAD(&ctype_pats, pats);
		regfree(&p->preg);
		free(p->pat);
		free(p);
	}
	
	if (attachment_notification != NULL)
		free(attachment_notification);
}
