/*
 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Niels Provos.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <ctype.h>
#include <netdb.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <err.h>
#include <errno.h>
#include <time.h>

#include "config.h"

#include <event.h>

#include "connection.h"
#include "http.h"
#include "db.h"
#include "robots.h"

/* Proto-types */
ssize_t atomicio(ssize_t (*f)(), int, void *, size_t);
void http_connectioncb(int, short, void *);
int http_isrobotstxt(struct uri *);

extern int debug;
#define DFPRINTF(x,y)   if (debug >= x) fprintf y

TAILQ_HEAD(uri_list, uri_small) uriqueue;
TAILQ_HEAD(dispatch_list, dispatch) dispatchqueue;

u_char body[32768];
char *useragent = NULL;
char *rawname = NULL;
char *statefile = NULL;

void (*http_movecb)(struct uri *, char *) = NULL;

int http_maxconnects = HTTP_MAXCONNECTS;
int connections;

int use_robots = 1;
/* Set by signal handler if we are supposed to terminate */
int terminate = 0;

#define STATS_INCFIFO	\
	do { \
		http_stats.fifosize++; \
		if (http_stats.fifosize > http_stats.maxsize) \
			 http_stats.maxsize = http_stats.fifosize; \
	} while (0)

#define STATS_DECFIFO	http_stats.fifosize--;

struct stats {
	size_t bodybytes;
	size_t headerbytes;
	size_t gets;
	size_t heads;
	time_t start;
	size_t fifosize;
	size_t maxsize;
} http_stats;

struct uri *
uri_new(void)
{
	struct uri *uri;

	uri = calloc(1, sizeof (struct uri));
	if (uri == NULL)
		return (NULL);

	uri->fd = -1;
	uri->save_fd = -1;
	
	return (uri);
}

void
uri_free(struct uri *uri)
{
	if (event_initialized(&uri->ev))
		event_del(&uri->ev);
	
	/* If this is an aborted robots.txt, we treat it as no robots.txt */
	if (use_robots)
		http_isrobotstxt(uri);

	if (uri->fd != -1) {
		int n = 0;

		close(uri->fd);
		connections--;

		/* Try to start new connections */
		if (TAILQ_FIRST(&uriqueue) != NULL) {
			while (http_postevent() != -1)
				n++;
		}
	}
	
	if (uri->save_fd != -1)
		close(uri->save_fd);

	if (uri->uri != NULL)
		free(uri->uri);
	if (uri->header != NULL)
		free(uri->header);
	if (uri->body != NULL)
		free(uri->body);
	if (uri->format != NULL)
		free(uri->format);
	free(uri);
}

struct uri_small *
uri_small_new(void)
{
	struct uri_small *uri;

	uri = malloc(sizeof (struct uri_small));

	uri->retry = 0;

	return (uri);
}

void
uri_small_free(struct uri_small *uri)
{
	if (uri->uri != NULL)
		free(uri->uri);
	free(uri);
}

void
terminate_handler(int sig)
{
	int s = errno;

	signal(SIGINT, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	terminate = 1;

	errno = s;
}

int
http_init(char *filename)
{
	TAILQ_INIT(&uriqueue);
	TAILQ_INIT(&dispatchqueue);

	if (use_robots)
		http_robots_init();

	connections = 0;

	memset(&http_stats, 0, sizeof (http_stats));
	http_stats.start = time(NULL);

	if (filename) {
		statefile = strdup(filename);
		if (statefile == NULL) {
			perror("strdup");
			return (-1);
		}

		/* Setup signal handler */
		if (signal(SIGINT, terminate_handler) == SIG_ERR) {
			perror("signal");
			return (-1);
		}
		if (signal(SIGPIPE, terminate_handler) == SIG_ERR) {
			perror("signal");
			return (-1);
		}
	}
	
	return (0);
}

void
http_print_stats(void)
{
	time_t spent;
	size_t bytes;

	spent = time(NULL) - http_stats.start;
	bytes = http_stats.headerbytes + http_stats.bodybytes;

	fprintf(stdout, "%d GET for body %d Kbytes\n",
		http_stats.gets, http_stats.bodybytes / 1024);
	fprintf(stdout, "%d HEAD for header %d Kbytes\n",
		http_stats.heads, http_stats.headerbytes / 1024);
	fprintf(stdout, "% 8.3f Requests/sec\n",
		(float)(http_stats.gets + http_stats.heads)/ spent);
	fprintf(stdout, "Throughput %f KBytes/sec\n",
		(float) bytes / 1024 / spent);
	fprintf(stdout, "Max fifo size: %d, %f Kbyte, current: %d/%d\n",
		http_stats.maxsize,
		(float) http_stats.maxsize *
		(sizeof(struct uri_small) + 80)/1024,
		http_stats.fifosize, connections);

	if (use_robots)
		http_robots_print_stats();
}

int
http_setuseragent(char *name)
{
	char agent[1024];

	if (useragent != NULL)
		free(useragent);
	if (rawname != NULL)
		free(rawname);

	/* Safe the raw name for robots.txt handling */
	rawname = strdup(name);
	if (rawname == NULL) {
		useragent = NULL;
		return (-1);
	}

	snprintf(agent, sizeof (agent), "User-Agent: %s %s/%s\r\n",
		 name, HTTP_NAME, HTTP_VERSION);
	useragent = strdup(agent);

	return (useragent != NULL ? 0 : -1);
}

int
http_register_dispatch(char *type, int (*cb)(struct uri *))
{
	struct dispatch *dispatch;

	dispatch = malloc(sizeof (struct dispatch));
	if (dispatch == NULL) {
		warn(__FUNCTION__": malloc");
		return (-1);
	}
	dispatch->type = type;
	dispatch->cb = cb;

	TAILQ_INSERT_TAIL(&dispatchqueue, dispatch, next);

	return (0);
}


/* The file descriptor needs to be connected */

void
http_fetch(struct uri *uri)
{
	char request[1024];
	char sport[NI_MAXSERV];
	char *host, *file;
	struct timeval tv;
	u_short port;

	http_hostportfile(uri->uri, &host, &port, &file);
	/* fprintf(stdout, "Fetching: %s:%d %s\n", host, port, file); */
	snprintf(sport, sizeof(sport), "%d", port);

	snprintf(request, sizeof(request),
		 "%s %s HTTP/1.0\r\n"
		 "%s"
		 "Host: %s%s%s\r\n\r\n",
		 uri->flags & HTTP_REQUEST_GET ? "GET" : "HEAD", file,
		 useragent != NULL ? useragent : "",
		 host,
		 port != HTTP_DEFAULTPORT ? ":" : "",
		 port != HTTP_DEFAULTPORT ? sport : "");

	atomicio(write, uri->fd, request, strlen(request));

	event_set(&uri->ev, uri->fd, EV_READ,
		  http_readheader, uri);
	timerclear(&tv);
	tv.tv_sec = HTTP_READTIMEOUT;
	event_add(&uri->ev, &tv);
}

int
http_newconnection(struct uri *uri, char *host, u_short port)
{
	struct timeval tv;
	int fd;

	fd = connection_new(host, port);
	if (fd == -1) {
		fprintf(stderr, __FUNCTION__
			": connection_new(%s,%d) failed\n", host, port);
		return (-1);
	}

	connections++;

	event_set(&uri->ev, fd, EV_WRITE, http_connectioncb, uri);
	timerclear(&tv);
	tv.tv_sec = HTTP_CONNECT_TIMEOUT;
	event_add(&uri->ev, &tv);

	return (fd);
}

void
http_connection_retry(struct uri *uri)
{
	char *host;
	u_short port;

	uri->retry++;

	if (uri->retry > HTTP_MAXRETRY)
		goto fail;

	/* As we are closing a file descriptor here, try to start
	 * a new connection on failure
	 */
	close(uri->fd);
	connections--;

	http_hostportfile(uri->uri, &host, &port, NULL);
	uri->fd = http_newconnection(uri, host, port);
	if (uri->fd == -1)
		goto fail;

	return;
 fail:
	warnx(__FUNCTION__": failed retry: %s", uri->uri);
	uri_free(uri);

	http_postevent();
}

void
http_connectioncb(int fd, short which, void *arg)
{
	int error;
	socklen_t errsz = sizeof(error);
	struct uri *uri = arg;

	if (which == EV_TIMEOUT)
		goto retry;

	/* Check if the connection completed */
	if (getsockopt(uri->fd, SOL_SOCKET, SO_ERROR, &error,
		       &errsz) == -1) {
		warn(__FUNCTION__": getsockopt for %s", uri->uri);
		uri_free(uri);
		return;
	}

	if (error) {
		if (error == ETIMEDOUT || error == ECONNREFUSED)
			goto retry;

		fprintf(stderr,
			__FUNCTION__": %s failed: %s (%d)\n",
			uri->uri, strerror(error), error);
		uri_free(uri);
		return;
	}

	http_fetch(uri);

	return;

 retry:
	http_connection_retry(uri);
}

/* Separated host, port and file from URI */

int
http_hostportfile(char *url, char **phost, u_short *pport, char **pfile)
{
	static char host[1024];
	static char file[1024];
	char *p, *p2;
	int len;
	u_short port;

	len = strlen(HTTP_PREFIX);
	if (strncasecmp(url, HTTP_PREFIX, len))
		return (-1);

	url += len;

	strlcpy(host, url, sizeof (host));

	p = strchr(host, '/');
	if (p != NULL) {
		*p = '\0';
		p2 = p + 1;
	} else
		p2 = NULL;

	/* Generate request file */
	if (p2 == NULL)
		p2 = "";
	snprintf(file, sizeof(file), "/%s", p2);

	p = strchr(host, ':');
	if (p != NULL) {
		*p = '\0';
		port = atoi(p + 1);

		if (port == 0)
			return (-1);
	} else
		port = HTTP_DEFAULTPORT;

	if (phost != NULL)
		*phost = host;
	if (pport != NULL)
		*pport = port;
	if (pfile != NULL)
		*pfile = file;

	return (0);
}

int
http_restore_state(char *filename)
{
	FILE *fp;
	char line[2048], *p, *p2;
	int i, type, depth, linenum;

	fp = fopen(filename, "r");
	if (fp == NULL)
		err(1, "fopen");

	linenum = 1;
	fprintf(stdout, "Resuming from saved state...\n");
	while (fgets(line, sizeof (line), fp) != NULL) {
		for (i = strlen(line) - 1; i >= 0; i--)
			if (line[i] == '\r' || line[i] == '\n')
				line[i] = '\0';

		p2 = line;
		p = strsep(&p2, " ");
		if (*p != 'g' && *p != 'h')
			goto fail;
		type = *p == 'g' ? HTTP_REQUEST_GET : HTTP_REQUEST_HEAD;

		p = strsep(&p2, " ");
		if (p == NULL || p2 == NULL ||
		    (depth = atoi(p)) == 0)
			goto fail;

		if (strncasecmp(p2, HTTP_PREFIX, strlen(HTTP_PREFIX)))
			goto fail;

		http_add_bfs(type, p2, depth);

		linenum++;
	}
	fclose(fp);

	fprintf(stdout, "Continuing with %d urls.\n", linenum - 1);
	return (0);

 fail:
	fclose(fp);
	fprintf(stderr, "Line %d in state file corrupt.\n", linenum);
	return (-1);
}

void
http_savestate(void)
{
	struct uri_small *uri;
	FILE *fpstate = NULL;
	int linenum = 0;

	fprintf(stdout, "Terminating...\n");

	if ((fpstate = fopen(statefile, "w")) == NULL)
		err(1, "fopen");

	while ((uri = TAILQ_FIRST(&uriqueue)) != NULL) {
		TAILQ_REMOVE(&uriqueue, uri, next);

		fprintf(fpstate, "%c %d %s\n",
			uri->flags & HTTP_REQUEST_GET ? 'g' : 'h',
			uri->depth, uri->uri);

		linenum++;
	}
	fclose(fpstate);

	fprintf(stdout, "Terminated with %d saved urls.\n", linenum);
}

int
http_postevent(void)
{
	struct uri *uri;
	struct uri_small *suri, *next;
	char *host = NULL;
	u_short port;

	if (connections >= http_maxconnects) {
		DFPRINTF(2, (stderr, __FUNCTION__": %d >= http_maxconnects\n",
			     connections));
		return (-1);
	}

	if (terminate) {
		if (connections == 0) {
			http_savestate();
			return (-1);
		}

		fprintf(stdout,
			"Terminating: %3d of %3d connections pending.\n",
			connections, http_maxconnects);
		return (-1);
	}

	if ((uri = uri_new()) == NULL)
		return (-1);

	for (suri = TAILQ_FIRST(&uriqueue); suri; suri = next) {
		next = TAILQ_NEXT(suri, next);

		if (http_hostportfile(suri->uri, &host, &port, NULL) == -1) {
			fprintf(stderr, __FUNCTION__": illegal url: %s\n",
				suri->uri);

			TAILQ_REMOVE(&uriqueue, suri, next);
			uri_small_free(suri);
			continue;
		}

		if (use_robots) {
			struct http_robots *htrobot;

			if ((htrobot = http_robots_find(host, port)) == NULL) {
				if (http_robots_get(host, port) != -1)
					continue;
			} else {
				if (htrobot->flags & HTROBOT_PENDING)
					continue;
				if (!http_robots_allow(htrobot, suri->uri)) {
					fprintf(stderr, __FUNCTION__
						": robots diallowed url: %s\n",
						suri->uri);
				
					TAILQ_REMOVE(&uriqueue, suri, next);
					STATS_DECFIFO;

					uri_small_free(suri);
					continue;
				}
			}
			/* Everything okay */
		}

		uri->fd = http_newconnection(uri, host, port);
		if (uri->fd == -1) {
			/* These errors might be transient */
			suri->retry++;
			if (suri->retry >= HTTP_MAXRETRY) {
				TAILQ_REMOVE(&uriqueue, suri, next);
				uri_small_free(suri);
			}
			continue;
		}

		/* Everything is fine */
		break;
	}

	/* Nothing to do, go away */
	if (suri == NULL)
		goto out;

	TAILQ_REMOVE(&uriqueue, suri, next);
	STATS_DECFIFO;

	/* Copy small to large uri */
	uri->uri = suri->uri;
	uri->flags = suri->flags;
	uri->depth = suri->depth;

	/* Free small uri */
	suri->uri = NULL;
	uri_small_free(suri);

	return (0);

 out:
	if (uri != NULL)
		uri_free(uri);
	return (-1);
}

int
http_add(u_short type, char *url, u_short depth, int bfs)
{
	struct uri_small *uri;

	uri = uri_small_new();

	if (uri == NULL) {
		warn(__FUNCTION__": malloc");
		return (-1);
	}

	uri->uri = strdup(url);
	if (uri->uri == NULL) {
		warn(__FUNCTION__": malloc");
		uri_small_free(uri);
		return (-1);
	}

	/* GET or HEAD */
	uri->flags = type;
	uri->depth = depth;

	if (bfs) {
		TAILQ_INSERT_TAIL(&uriqueue, uri, next);
	} else {
		TAILQ_INSERT_HEAD(&uriqueue, uri, next);
	}
	STATS_INCFIFO;

	http_postevent();

	return (0);
}

void
http_readheader(int fd, short event, void *arg)
{
	char line[2048], *p;
	ssize_t n, offset;
	struct timeval tv;
	struct uri *uri = arg;

	if (event == EV_TIMEOUT) {
		fprintf(stderr, __FUNCTION__": timeout %s\n", uri->uri);
		uri_free(uri);

		return;
	}

	n = read(uri->fd, line, sizeof(line));
	if (n == -1) {
		if (errno == EINTR || errno == EAGAIN)
			goto readmore;
		warn(__FUNCTION__": read");
		uri_free(uri);

		return;
	} else if (n == 0) {
		/* Uhm dum */
		fprintf(stderr, __FUNCTION__": finished read on %s?\n",
			uri->uri);

		uri_free(uri);
		return;
	}
	
	p = realloc(uri->header, uri->hdlen + n + 1);
	if (p == NULL) {
		warn(__FUNCTION__": realloc");
		uri_free(uri);

		return;
	}

	uri->header = p;
	memcpy(uri->header + uri->hdlen, line, n);
	uri->hdlen += n;
	uri->header[uri->hdlen] = '\0';

	p = strstr(uri->header, HTTP_HEADEREND);
	if (p == NULL)
		goto readmore;

	offset = p + strlen(HTTP_HEADEREND) - uri->header;

	if (offset < uri->hdlen) {
		uri->bdlen = uri->hdlen - offset;
		uri->body = malloc(uri->bdlen + 1);
		if (uri->body == NULL) {
			warn(__FUNCTION__": malloc");
			uri_free(uri);

			return;
		}
		memcpy(uri->body, uri->header + offset, uri->bdlen);
		uri->body[uri->bdlen] = '\0';

		/* Adjust header */
		uri->hdlen = offset;
		uri->header[offset] = '\0';
	}

	http_parseheader(uri);
	return;

 readmore:
	timerclear(&tv);
	tv.tv_sec = HTTP_READTIMEOUT;
	event_add(&uri->ev, &tv);
	return;

}

void
http_parseheader(struct uri *uri)
{
	char *p, *end;
	int major, minor, code;
	char *type = NULL, *length = NULL, *location = NULL;
	struct header parse[] = {
		{"Content-Type: ", &type},
		{"Content-Length: ", &length},
		{"Location: ", &location},
		{NULL, NULL}
	};
	struct header *hdr;
	struct timeval tv;

	if (sscanf(uri->header, "HTTP/%d.%d %d",
		   &major, &minor, &code) != 3 ||
	    major != 1 ||
	    (minor != 0 && minor != 1)) {
		fprintf(stderr, __FUNCTION__": illegal header in %s\n",
			uri->uri);

		uri_free(uri);
		return;
	}

	/* Parse header */
	p = uri->header;
	end = p + uri->hdlen;
	while(p < end) {
		char *lend;

		lend = strstr(p, "\r\n");
		if (lend == NULL) {
			fprintf(stderr, __FUNCTION__": illegal header in %s\n",
				uri->uri);

			uri_free(uri);
			return;
		}

		for (hdr = &parse[0]; hdr->name; hdr++) {
			if (strncasecmp(p, hdr->name,
					strlen(hdr->name)) == 0) {
				*hdr->where = p + strlen(hdr->name);
				break;
			}
		}

		p = lend + 2;
	}

	if (length == NULL)
		uri->length = -1;
	else
		uri->length = atoi(length);

	if (type != NULL) {
		end = strstr(type, "\r\n");

		uri->format = malloc(end - type + 1);
		if (uri->format == NULL) {
			warn(__FUNCTION__": malloc");
			uri_free(uri);
			return;
		}

		memcpy(uri->format, type, end - type);
		uri->format[end - type] = '\0';
	}

	uri->code = code;

	switch (code) {
	case HTTP_OK:
		break;

	case HTTP_MOVETEMP:
	case HTTP_MOVEPERM:
		if (location == NULL || http_movecb == NULL)
			goto error;

		p = location;
		end = strstr(p, "\r\n");

		if ((location = malloc(end - p + 1)) == NULL) {
			warn(__FUNCTION__": malloc");
			uri_free(uri);
			return;
		}

		memcpy(location, p, end - p);
		location[end - p] = '\0';

		/* User call back for move */
		(*http_movecb)(uri, location);

		free(location);
		uri_free(uri);
		return;

	default:
		goto error;
	}

	/* If we just had a HEAD request, we are done now */
	if (uri->flags & HTTP_REQUEST_HEAD) {
		http_dispatch(uri, HTTPDIS_FREE);
		return;
	}
	
	if (uri->length != -1) {
		size_t len;
		u_char *p;

		len = uri->length;
		if (len > HTTP_MAXMEM)
			len = HTTP_MAXMEM;
		
		p = realloc(uri->body, len + 1);
		if (p == NULL) {
			warn(__FUNCTION__": malloc");
			uri_free(uri);
			return;
		}
		uri->body = p;
		uri->body[len] = '\0';
		uri->bdmemlen = len;
		uri->bdread = uri->bdlen;
	}

	event_set(&uri->ev, uri->fd, EV_READ,
		  http_readbody, uri);
	timerclear(&tv);
	tv.tv_sec = HTTP_READTIMEOUT;
	event_add(&uri->ev, &tv);
	return;

 error:
	fprintf(stderr, __FUNCTION__": abort %s: %d\n",	uri->uri, code);
	uri_free(uri);
	return;
}

void
http_readbody(int fd, short event, void *arg)
{
	struct uri *uri = arg;
	struct timeval tv;
	ssize_t n;
	u_char *where;
	ssize_t len;

	if (event == EV_TIMEOUT) {
		fprintf(stderr, __FUNCTION__": timeout %s\n", uri->uri);
		uri_free(uri);
		return;
	}

	if (uri->length == -1) {
		where = body;
		len = sizeof(body);
	} else {
		where = uri->body + uri->bdread;
		len = uri->length - uri->bdlen;
		if (len > uri->bdmemlen - uri->bdread)
			len = uri->bdmemlen - uri->bdread;
	}

	n = read(uri->fd, where, len);
	if (n == -1) {
		if (errno == EINTR || errno == EAGAIN)
			goto readmore;
		warn(__FUNCTION__": read");
		uri_free(uri);

		return;
	} else if (n == 0) {
		if (uri->length != -1 &&
		    uri->length != uri->bdlen) {
			fprintf(stderr, __FUNCTION__
				": short read on %s\n", uri->uri);
			uri_free(uri);
			return;
		}

		uri->length = uri->bdlen;
		goto done;
	}

	if (uri->length == -1) {
		u_char *p;

		p = realloc(uri->body, uri->bdlen + n + 1);
		if (p == NULL) {
			warn(__FUNCTION__": realloc");
			uri_free(uri);

			return;
		}

		uri->body = p;
		memcpy(uri->body + uri->bdlen, body, n);
		uri->bdlen += n;
		uri->body[uri->bdlen] = '\0';
	} else {
		uri->bdlen += n;
		uri->bdread += n;
	}

	if (uri->length == -1 || uri->bdlen < uri->length)
		goto readmore;

	/* We are done with this document */

 done:
	http_dispatch(uri, HTTPDIS_FREE);
	return;

 readmore:
	timerclear(&tv);
	tv.tv_sec = HTTP_READTIMEOUT;
	event_add(&uri->ev, &tv);

	if (uri->length != -1 && uri->bdmemlen == uri->bdread)
		http_dispatch(uri, HTTPDIS_KEEP);
	
	return;
}

int
http_isrobotstxt(struct uri *uri)
{
	char *file;

	if (uri->uri == NULL)
		return (0);

	if (http_hostportfile(uri->uri, NULL, NULL, &file) != -1 &&
	    !strcmp(file, "/robots.txt")) {
		http_robots_response(uri);
		return (1);
	}

	return (0);
}



void
http_dispatch(struct uri *uri, int mustfree)
{
	struct dispatch *dispatch;
	char *format;

	format = uri->format;
	if (format == NULL)
		format = "text/html";

	http_stats.headerbytes += uri->hdlen;
	http_stats.bodybytes += uri->bdlen;

	if (uri->flags & HTTP_REQUEST_GET)
		http_stats.gets++;
	else
		http_stats.heads++;

	if (use_robots && http_isrobotstxt(uri)) {
		uri_free(uri);
		return;
	}
		
	TAILQ_FOREACH(dispatch, &dispatchqueue, next) {
		if (strncasecmp(format, dispatch->type,
				strlen(dispatch->type)) == 0) {
			/* Found an interested party */
			if (dispatch->cb(uri) != -1)
				break;
		}
	}

	/* caller decides */
	if (mustfree || dispatch == NULL) {
		uri_free(uri);
		return;
	}

	/* object is still alive */
}

char *
http_normalize_uri(char *uri)
{
	static char normal[1024];
	char *host, *file, *p;
	u_short port;

	if (http_hostportfile(uri, &host, &port, &file) == -1)
		return (NULL);

	if ((p = strchr(file, '#')) != NULL)
		*p = '\0';
	
	/* Remove identities */
	p = file;
	while ((p = strstr(p, "/./")) != NULL) {
		memmove(p, p + 2, strlen(p + 2) + 1);
	}

	p = file;
	while ((p = strstr(p, "//")) != NULL) {
		char *p2 = p + strspn(p, "/");

		memmove(p + 1, p2, strlen(p2) + 1);
	}
	
	/* Deal with ../ */
	while ((p = strstr(file, "/..")) != NULL) {
		char *p2;

		for (p2 = p - 1; p2 > file; p2--)
			if (*p2 == '/')
				break;

		if (p2 <= file)
			memmove(file, p + 3, strlen(p + 3) + 1);
		else
			memmove(p2, p + 3, strlen(p + 3) + 1);
	}

	if (port != HTTP_DEFAULTPORT)
		snprintf(normal, sizeof(normal), "http://%s:%d", host, port);
	else
		snprintf(normal, sizeof(normal), "http://%s", host);

	p = normal;
	while (*p)
		*p++ = tolower(*p);

	/* Unix file names should not be lowered */
	strlcat(normal, file, sizeof (normal));

	return (normal);
}

/* Depends on http_normalize_uri */

char *
http_basename(char *uri)
{
	char *p, *normal = http_normalize_uri(uri);

	if (normal == NULL)
		return (NULL);

	p = strrchr(normal, '/');

	/* This should never happen */
	if (p == NULL)
		return (NULL);

	if (p[1] != '\0')
		p[1] = '\0';

	return (normal);
}

char *
http_make_uri(char *base, char *rel)
{
	static char normal[1024];

	if (!strncasecmp(rel, HTTP_PREFIX, strlen(HTTP_PREFIX)) ||
	    strchr(rel, ':') != NULL)
		return (rel);

	if (rel[0] == '/') {
		char *host, *file;
		u_short port;

		if (http_hostportfile(base, &host, &port, &file) == -1)
			return (NULL);

		if (port != HTTP_DEFAULTPORT)
			snprintf(normal, sizeof (normal), "http://%s:%d%s",
				 host, port, rel);
		else
			snprintf(normal, sizeof (normal), "http://%s%s",
				 host, rel);
	} else if (rel[0] == '#')
		return (NULL);
	else
		snprintf(normal, sizeof (normal), "%s%s", base, rel);

	return (http_normalize_uri(normal));
}

int
http_mark_seen(char *line)
{
	char *normal = http_normalize_uri(line);

	return (normal != NULL ? db_seen(normal) : 1);
}
