/*
 * Copyright (c) 2003-2011
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 */

/*
 * uproxy - micro HTTP proxy
 * Invoked as a CGI, the request (which must be a GET) is forwarded and the
 * reply is returned.
 *
 * The request URI looks like:
 *   .../dacs_uproxy/<proxied-hostname>[/<proxied-path>][?<query-args>]
 * where <proxied-hostname> is a domain name with an optional port number
 *
 * For proxying to be enabled to a given host, a UPROXY_APPROVED directive
 * must specify the <proxied-hostname>.  The "approved list" consists of zero
 * or more directives, each with the following syntax:
 *    [{'http' | 'https'} '://'] hostname [:port] [path]
 * If no scheme is specified, http is used.
 * If a path is given, it is used as a prefix to the <proxied-path>
 * E.g., example.com,https://foo.example.com,http://bar.example.com:8080,
 *         https://baz.example.com:8443/some/path
 *
 * The <proxied-hostname> is matched against each
 * hostname; the first case-insensitive match is used.
 * If <proxied-hostname> includes a port number, that port number must
 * explicitly appear in the approved list entry for it to match.
 * Proxying is rejected if no match is found.
 * E.g., given the request .../dacs_uproxy/foo.example.com/cgi-bin/auggie
 * a UPROXY_APPROVED directive must specify foo.example.com:
 *   UPROXY_APPROVED "http://foo.example.com:8080"
 * This will map the request to http://foo.example.com:8080/cgi-bin/auggie
 *
 * Query arguments apply to the proxied request, not this program,
 * except for DACS_UPROXY (which is not passed on in the forwarded request).
 * Cookies for the forwarded request are not handled.
 */

#ifndef lint
static const char copyright[] =
"Copyright (c) 2003-2011\n\
Distributed Systems Software.  All rights reserved.";
static const char revid[] =
  "$Id: uproxy.c 2528 2011-09-23 21:54:05Z brachman $";
#endif

#include "dacs.h"

static char *log_module_name = "dacs_uproxy";

static void
usage(void)
{

  fprintf(stderr, "uproxy %s\n", standard_command_line_usage);

  exit(1);
}

static Dsvec *approved_hosts = NULL;

/*
 * 
 */
static int
init_approved_hosts(void)
{
  Kwv_pair *v;

  approved_hosts = NULL;

  if ((v = conf_var(CONF_UPROXY_APPROVED)) == NULL) {
	log_msg((LOG_ERROR_LEVEL, "UPROXY_APPROVED is not configured"));
	return(-1);
  }

  for (; v != NULL; v = v->next) {
	char *h;
	Uri *uri;

	h = v->val;
	log_msg((LOG_DEBUG_LEVEL, "UPROXY_APPROVED is \"%s\"", h));

	if (strchr(h, (int) ':') == NULL)
	  h = ds_xprintf("http://%s", h);

	if ((uri = uri_parse(h)) == NULL) {
	  log_msg((LOG_ERROR_LEVEL, "Error parsing UPROXY_APPROVED entry"));
	  return(-1);
	}

	if (approved_hosts == NULL)
	  approved_hosts = dsvec_init(NULL, sizeof(Uri *));

	dsvec_add_ptr(approved_hosts, uri);
  }

  return(0);
}

int
main(int argc, char **argv)
{
  int debug, i, st, status_code;
  unsigned long clen;
  char *errmsg, *method_str, *path, *path_info, *query_string;
  char *content_type, *http_host, *p, *proxied_via, *qs, *via;
  char *proxied_host, *proxied_port, *proxied_hostname;
  Ds *content, target, via_header;
  Dsvec *dsv;
  Http *h;
  Http_method method;
  Kwv *args, *kwv;
  Uri *uri;

  debug = 0;
  if (dacs_init(DACS_WEB_SERVICE, &argc, &argv, &kwv, &errmsg) == -1) {
    emit_plain_header(stdout);
	if (errmsg == NULL)
	  errmsg = "Unknown error";
  fail:
	fprintf(stderr, "uproxy: %s\n", errmsg);
	log_msg((LOG_ERROR_LEVEL, "%s", errmsg));

	emit_html_header(stdout, NULL);
	printf("%s\n", errmsg);
	emit_html_trailer(stdout);

	exit(1);
	/*NOTREACHED*/
  }

  if (init_approved_hosts() == -1) {
	errmsg = "No approved hosts";
	goto fail;
  }

  if ((method_str = getenv("REQUEST_METHOD")) == NULL) {
	errmsg = "No REQUEST_METHOD?";
	goto fail;
  }

  if ((method = http_string_to_method(method_str)) == HTTP_UNKNOWN_METHOD) {
	errmsg = "Unrecognized HTTP method?";
	goto fail;
  }

  if (method != HTTP_GET_METHOD && method != HTTP_POST_METHOD) {
	errmsg = "Unsupported HTTP method";
	goto fail;
  }

  content = NULL;
  content_type = NULL;
  if ((st = cgiparse_get_content_length(&clen)) == -1) {
	errmsg = "Error getting Content-Length";
	goto fail;
  }
  else if (st == 1) {
    if ((content_type = getenv("CONTENT_TYPE")) == NULL)
	  content_type = "application/octet-stream";
	content = ds_init(NULL);
	dsio_set(content, stdin, NULL, clen, 1);
	if (dsio_load(content) == -1) {
	  errmsg = "Error reading content";
	  goto fail;
	}
  }

  if ((path_info = getenv("PATH_INFO")) == NULL) {
	errmsg = "No PATH_INFO?";
	goto fail;
  }

  if (*path_info++ != '/') {
	errmsg = "Invalid PATH_INFO?";
	goto fail;
  }

  if ((dsv = strsplit(path_info, "/", 0)) == NULL) {
	errmsg = "Error parsing PATH_INFO?";
	goto fail;
  }

  if (dsvec_len(dsv) == 0) {
	errmsg = "Zero length PATH_INFO?";
	goto fail;
  }

  args = NULL;
  if ((query_string = getenv("QUERY_STRING")) != NULL) {
	if (*query_string == '\0')
	  query_string = NULL;
	else {
	  args = cgiparse_string(query_string, NULL, NULL);
	  if ((p = kwv_lookup_value(args, "DACS_UPROXY")) != NULL) {
		if (streq(p, "DEBUG"))
		  debug = 1;
		kwv_delete(args, "DACS_UPROXY");
	  }
	}
  }

  if (debug) {
    Html_header_conf *hc;

    hc = emit_html_header_conf(NULL);
    hc->html = 1;
    emit_html_header(stdout, hc);
  }

  if (query_string != NULL && debug)
    printf("QUERY_STRING = \"%s\"<br/>\n", query_string);

  if (debug)
    printf("path[0] = \"%s\"<br/>\n", (char *) dsvec_ptr_index(dsv, 0));

  path = "";
  for (i = 1; i < dsvec_len(dsv); i++) {
    if (debug)
      printf("path[%d] = \"%s\"<br/>\n", i, (char *) dsvec_ptr_index(dsv, i));
    path = ds_xprintf("%s/%s", path, (char *) dsvec_ptr_index(dsv, i));
  }

  proxied_hostname = (char *) dsvec_ptr_index(dsv, 0);
  proxied_port = NULL;
  st = net_parse_hostname_port(proxied_hostname, &proxied_host, &proxied_port,
							   NULL);
  if (st == -1) {
	errmsg = "Invalid proxied hostname";
	goto fail;
  }

  for (i = 0; i < dsvec_len(approved_hosts); i++) {
	uri = (Uri *) dsvec_ptr_index(approved_hosts, i);
    if (strcaseeq(proxied_host, uri->host)) {
	  if (proxied_port == NULL
		  || (uri->port_given != NULL && streq(uri->port_given, proxied_port)))
		break;
	}
  }
  if (i == dsvec_len(approved_hosts)) {
	errmsg = ds_xprintf("Request for unapproved host: \"%s\"",
						proxied_hostname);
	goto fail;
  }

  ds_init(&target);
  if (uri->port_given != NULL)
	ds_asprintf(&target, "%s://%s:%s%s%s", uri->scheme, uri->host,
				uri->port_given,
				(uri->path != NULL) ? uri->path : "", path);
  else
	ds_asprintf(&target, "%s://%s%s%s", uri->scheme, uri->host,
				(uri->path != NULL) ? uri->path : "", path);

  if ((qs = kwv_to_query_string(args)) != NULL && *qs != '\0')
    ds_asprintf(&target, "?%s", qs);

  if (debug)
    printf("target=\"%s\"<br/>\n", ds_buf(&target));

  if ((uri = uri_parse(ds_buf(&target))) == NULL) {
	errmsg = "Invalid target host URI?";
	goto fail;
  }

  h = http_init(NULL);
  h->method = method;
  h->message_body = content;
  h->content_type = content_type;
  h->response_headers = dsvec_init(NULL, sizeof(char *));
  h->response_kwv = kwv_init(16);

  http_host = getenv("HTTP_HOST");
  if ((proxied_via = current_uri_script()) != NULL) {
	h->message_headers = dsvec_init(NULL, sizeof(char *));
	dsvec_add_ptr(h->message_headers,
				  ds_xprintf("DACS-Uproxy-Via: %s/%s",
							 proxied_via, proxied_hostname));
  }

  ds_init(&via_header);
  if ((via = getenv("VIA")) != NULL)
	ds_asprintf(&via_header, "Via: %s, ", via);
  else
	ds_asprintf(&via_header, "Via: ");

  if ((p = getenv("SERVER_PROTOCOL")) != NULL)
	ds_asprintf(&via_header, "%s ", p);
  else
	ds_asprintf(&via_header, "1.0 ");
  if (http_host != NULL)
	ds_asprintf(&via_header, "(%s)", http_host);
  else
	ds_asprintf(&via_header, "dacs_uproxy");
  dsvec_add_ptr(h->message_headers, ds_buf(&via_header));

  if ((p = getenv("HTTP_USER_AGENT")) != NULL)
	dsvec_add_ptr(h->message_headers,
				  ds_xprintf("User-Agent: %s", p));

  if ((p = getenv("DACS_APPROVAL")) != NULL)
	dsvec_add_ptr(h->message_headers,
				  ds_xprintf("DACS-Uproxy-Approval: %s", p));

#ifdef NOTDEF
  /* XXX Apache will append this to the header value?? */
  if (http_host != NULL)
	dsvec_add_ptr(h->message_headers,
				  ds_xprintf("Host: %s", http_host));
#endif

  st = http_invoke_request(h, ds_buf(&target), HTTP_SSL_URL_SCHEME, &errmsg);
  if (st != -1 && h->status_code == 200) {
	int reply_len;
	char *reply;

	reply = NULL;
	reply_len = -1;
	st = http_get_response_body(h, &reply, &reply_len);

	printf("Status: 200\n");
#ifdef NOTDEF
	if ((p = kwv_lookup_value(h->response_kwv, "Content-Type")) != NULL)
	  printf("Content-Type: %s\n", p);
#endif

	for (i = 0; i < dsvec_len(h->response_headers); i++) {
	  p = (char *) dsvec_ptr_index(h->response_headers, i);
	  printf("%s\n", p);
	}

	printf("\n");
	fflush(stdout);

	if (reply != NULL)
	  write_buffer(fileno(stdout), reply, reply_len);
  }
  else {
	/* An error occurred or the returned status code was not 200 OK... */
	if (st == -1)
	  printf("Status: 500\n");
	else {
	  printf("Status: %d\n", h->status_code);
	  if ((p = kwv_lookup_value(h->response_kwv, "Location")) != NULL)
		printf("Location: %s\n", p);
	  if ((p = kwv_lookup_value(h->response_kwv, "Server")) != NULL)
		printf("Server: %s\n", p);
	}

	emit_html_header(stdout, NULL);

	printf("<b>The following request could not be proxied ");
	printf("(HTTP status code %d):</b><br/>\n",
		   (st == -1) ? 500 : h->status_code);
	printf("<tt><b>%s</b></tt><br/>\n", ds_buf(&target));

	emit_html_trailer(stdout);

	exit(1);
  }

  http_close(h);

  if (debug)
    emit_html_trailer(stdout);

  exit(0);
}
