/*
** Copyright (C) 2001-2008 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/
#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <ctype.h>

#include "gnuvd.h"

static const char* VANDALE_SERVER = "www.vandale.nl";
static const char* VANDALE_PORT   = "80";

/*
 * http stuff; the HTTP_REQUEST / HTTP_REQUEST_PROXY have _2_
 * %s fields (for the search term)
 */
#define HTTP_HEADERS \
"Host: www.vandale.nl\r\n" \
"User-Agent:  GNUVD/" VERSION "\r\n" \
"Accept: text/xml,application/xml,application/xhtml+xml,text/html;" \
"q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;" \
"q=0.2,text/css,*/*;q=0.1\r\n" \
"Accept-Language: nl, en\r\n" \
"Accept-Charset: UTF-8\r\n" \
"Referer: http://www.vandale.nl/vandale/opzoeken/woordenboek/?zoekwoord=%s\r\n" \
"\r\n"

#define HTTP_REQUEST \
"GET /vandale/opzoeken/woordenboek/?zoekwoord=%s HTTP/1.0\r\n" HTTP_HEADERS

#define HTTP_REQUEST_PROXY \
"GET http://www.vandale.nl/vandale/opzoeken/woordenboek/?zoekwoord=%s HTTP/1.0\r\n" HTTP_HEADERS

static char* HTTP_PROXY_HOST      = NULL;
static char* HTTP_PROXY_PORT      = NULL;

static struct addrinfo*   vd_resolve              (const char* dnsname, const char* server, 
						   VDError *ex);
static int                vd_socket_to_site       (const struct addrinfo* dest_sa, 
						   VDError *ex);
static char*              vd_construct_http_query (const char *search_term, VDError *ex);
static int	          vd_perform_http_query   (int sock, char *query, VDError *ex);
static char*              vd_get_http_results     (int sock, size_t *bytes_read, 
						   VDCallback vd_callback, 
					           /*size_t chunk_size,*/ VDError *ex);
static void               vd_filter_http_buffer   (Buffer *buffer, VDError *ex);
static void               vd_parse_http_proxy     (const char* http_proxy, char **host, 
						   char **port, VDError *ex);

#if HAVE_HSTRERROR
/*OK*/
#else
#define hstrerror(e) ((const char*)NULL)
#endif /*HAVE_HSTRERROR*/

#if HAVE_STRNDUP
/*OK*/
#else
#define strndup(s,n) strdup(s) 
#endif /*HAVE_STRNDUP*/

#ifndef VERSION
#define VERSION "?.?"
#endif /*VERSION*/

#ifndef SA_LEN
#define SA_LEN(x) (sizeof(struct sockaddr_in))
#endif /*SA_LEN*/


VDQuery*
vd_query_new (const char *word, VDError *ex)
{
	VDQuery *vd_query;
	
	assert (word);
	assert (ex);

	if (getenv ("http_proxy")) {
		vd_parse_http_proxy (getenv ("http_proxy"),&HTTP_PROXY_HOST, 
				     &HTTP_PROXY_PORT, ex);
		if (*ex != VD_ERR_OK) 
			return NULL;
	}

	vd_query = (VDQuery*) malloc (sizeof(VDQuery));
	if (!vd_query) {
		*ex   = VD_ERR_MEM;
		return NULL;
	}
	
	vd_query->_search_term = strdup (word);
	if (!vd_query->_search_term) {
		free (vd_query);
		return NULL;
	}

	vd_query->_result_buffer = buffer_new ();
	if (!vd_query->_result_buffer) {
		free (vd_query);
		return NULL;
	}

	return vd_query;
}


void
vd_query_destroy (VDQuery* vd_query)
{
	if (!vd_query)
		return;
	
	free (vd_query->_search_term);
	buffer_destroy (vd_query->_result_buffer);

	free (HTTP_PROXY_HOST);
	HTTP_PROXY_HOST = NULL;

	free (vd_query);
}



const Buffer*
vd_query_results (const VDQuery *vd_query)
{
	assert (vd_query);
	return vd_query->_result_buffer;
}



const char *
vd_query_search_term (const VDQuery *vd_query)
{
	assert (vd_query);
	return vd_query->_search_term;
}



void
vd_query_perform (const VDQuery *vd_query, VDError *ex)
{
	vd_query_perform_progressive (vd_query, NULL, ex);
}




void
vd_query_perform_progressive (const VDQuery *vd_query, VDCallback call_back,
			      VDError *ex)
{
	int sock = -1;
	struct addrinfo *ai, *aip;
	
	int err;
	size_t bytes_read;

	char *server, *port;
	char *query, *result;
		
	assert (vd_query);
	assert (ex);

	*ex = VD_ERR_OK;
	
	/* use a proxy? */
	if (HTTP_PROXY_HOST) {
		server = (char*)HTTP_PROXY_HOST;
		port   = (char*)HTTP_PROXY_PORT;
	} else {
		server = (char*)VANDALE_SERVER;
		port   = (char*)VANDALE_PORT;
	}

	ai = vd_resolve (server, port, ex);
	if (!ai) {
		*ex = VD_ERR_CONNECT;
		return;
	}

	/* find a socket */
	for (aip = ai, sock = -1; aip; aip = aip->ai_next) {
		sock = vd_socket_to_site (aip, ex);
		if (sock >= 0)
			break;
	}

	freeaddrinfo (ai);

	if (sock == -1) {
		*ex = VD_ERR_CONNECT;
		return;
	}

	query = vd_construct_http_query (vd_query->_search_term, ex);
	if (!query) {
		*ex = VD_ERR_MEM;
		goto cleanup;
	}
	
	err = vd_perform_http_query (sock, query, ex);
	if (err==-1) {
		*ex = VD_ERR_HTTP;
		goto cleanup;
	}

	result = vd_get_http_results (sock, &bytes_read, 
			              call_back, ex);
	if (!result) 
		*ex = VD_ERR_HTTP;
	else {
		buffer_manage_data (vd_query->_result_buffer, 
				    result, bytes_read);
		vd_filter_http_buffer (vd_query->_result_buffer, ex);
	}
	
 cleanup:
	free (query);
	shutdown (sock, 2);
}


static struct addrinfo* 
vd_resolve (const char* dnsname, const char* server, VDError *ex)
{
	struct addrinfo hints;
	struct addrinfo *ai    = NULL;

	memset (&hints, 0, sizeof(hints));
	
	hints.ai_family   = AF_UNSPEC;
	hints.ai_socktype = SOCK_STREAM;
	hints.ai_protocol = IPPROTO_TCP;
	
	if (getaddrinfo (dnsname, server, &hints, &ai) != 0) {
		*ex = VD_ERR_DNS;
		return NULL;
	}
	
	return ai;
}


static int
vd_socket_to_site (const struct addrinfo *dest_sa, VDError *ex)
{
	int sock, conn;

	sock = socket (dest_sa->ai_family, dest_sa->ai_socktype, 0);
	if (sock == -1) {
		*ex = VD_ERR_SOCKET;
		return -1;
	}

	conn = connect (sock, dest_sa->ai_addr, dest_sa->ai_addrlen);
	if (conn == -1) {
		*ex   = VD_ERR_CONNECT;
		return -1;
	}

	return sock;
}


static char*
vd_construct_http_query (const char *search_term, VDError *ex)
{
	char *query, *encoded_search_term, *pos;
	int len, i;

	/* The worst that can happen is that all chars need encoding */
	encoded_search_term = 
	    (char *) malloc((3 * strlen(search_term) + 1) * sizeof(char));
	pos = encoded_search_term;

	if (!pos) {
		*ex = VD_ERR_MEM;
		return NULL;
	}

	for (i=0; i<strlen(search_term); i++) 
		if (isalnum(search_term[i])) 
			*(pos++) = search_term[i];
		 else 
			pos += sprintf(pos, "%%%2x", search_term[i]);
	*pos = 0;
	
	len = strlen(HTTP_PROXY_HOST ? HTTP_REQUEST_PROXY : HTTP_REQUEST) + 
		strlen(encoded_search_term) * 2;
	
	query = (char*) malloc (len + 1);
	if (!query) {
		*ex = VD_ERR_MEM;
		return NULL;
	}	
	
	snprintf (query, len, (HTTP_PROXY_HOST ? HTTP_REQUEST_PROXY : HTTP_REQUEST),
		  encoded_search_term,encoded_search_term);

	free(encoded_search_term);

	return query;
}


static int
vd_perform_http_query (int sock, char *query, VDError *ex)
{
	return write (sock, query, strlen (query));
}



static char*
vd_get_http_results (int sock, size_t *bytes_read, 
		     VDCallback vd_callback, /*size_t chunk_size*/ /*FIXME*/
		     VDError *ex)
{
	const int max_buf    = 50000;
	const int chunk_size = 512;

	char *buf;
	int result;


	buf = (char*) malloc (max_buf);
	if (!buf) {
		*ex   = VD_ERR_MEM;
		return NULL;
	}
	
	*bytes_read = 0;
	while (*bytes_read < max_buf) {
		
		result = read (sock, buf + *bytes_read, chunk_size);
		if (result == -1 || result == 0)
			break;
		
		*bytes_read += result;
	}
			
	if (result == -1) {		
		free (buf);
		*ex   = VD_ERR_READ;
		return NULL;
	}

	return buf;
}



static void
vd_filter_http_buffer (Buffer *http_buffer, VDError* ex)
{
	const size_t min_length = 100;

	int pos;
	
	static const char* word        = "<span class=\"g1v\">";
	static const char* notfound    = "Geen resultaat.";
	static const char* metanav     = "class=\"metaNav\">";
	static const char *href        = "href=";

	assert (http_buffer);
	assert (ex);
	
	pos = buffer_find (http_buffer, word, strlen(word));
	if (pos <  buffer_end (http_buffer)) {
		buffer_erase (http_buffer, 0, pos);
	} else {
		/* nothing was found or not available*/
		pos = buffer_find (http_buffer, notfound, strlen(notfound));
		if (pos < buffer_end (http_buffer)) 
			*ex = VD_ERR_NOT_FOUND;
		else
			*ex = VD_ERR_UNAVAILABLE;
		return;
	}

	/* cut trailing stuff; we try metaNav first, then href, to be
	*  safe against tiny changes in the webpage...
	*/
	pos = buffer_find (http_buffer, metanav, strlen(metanav));
	if (pos == buffer_end (http_buffer)) 
		pos = buffer_find (http_buffer, href, strlen(href));
	
	if (pos < buffer_end (http_buffer))
		buffer_erase (http_buffer, pos, buffer_end(http_buffer) - pos);
	


	/* find the last non-blank */
	for (pos = buffer_end(http_buffer) - 1; pos >= buffer_begin(http_buffer); --pos) {
		char c = buffer_at (http_buffer, pos);
		if (isalnum(c))
			break;
	}
	if (pos != buffer_begin(http_buffer) && pos < buffer_end(http_buffer))
		buffer_erase (http_buffer, pos, buffer_end(http_buffer) - pos);
	
	if (buffer_length (http_buffer) < min_length)
		*ex = VD_ERR_NOT_FOUND;
} 



static void
vd_parse_http_proxy (const char* http_proxy, char **host, char **port, VDError *ex)
{
	int i;
	int len;

	assert (host);
	assert (port);
	assert (ex);

	if (!http_proxy || !(len = strlen (http_proxy))) {
		*ex = VD_ERR_PROXY;
		return;
	}

	/* ignore the closing '/', if any */
	if (http_proxy[len - 1] == '/')
		--len;

	for (i = len - 1; i; --i) 
		if (http_proxy[i] == ':') {
			*port = strndup (http_proxy + i + 1, len - i - 1);
			break;
		}

	/* port not found, or no space left for host */
	if (i == 0) {
		*ex = VD_ERR_PROXY;
		return;
	}
	
	/* rest of string is host */
	if (i >= 7 && strncmp (http_proxy, "http://", 7) == 0) 
		*host = strndup (http_proxy + 7, i - 7);
	else 
		*host = strndup (http_proxy, i);
	
	*ex = *host ? VD_ERR_OK : VD_ERR_PROXY; 
}


const char*
vd_error_string (int err)
{
	switch (err) {
	case VD_ERR_OK:
		return "No error";
	case VD_ERR_NOT_FOUND:
		return "Not found";
	case VD_ERR_UNAVAILABLE:
		return "Dictionary is currently unavailable";
	case VD_ERR_CONNECT:
		return "Error connecting to site";
	case VD_ERR_SOCKET:
		return "Error creating socket";
	case VD_ERR_PROXY:
		return "Proxy error";
	case VD_ERR_DNS:
		return "DNS error";
	case VD_ERR_READ:
		return  "Read error";
	case VD_ERR_HTTP:
		return "HTTP error";
	case VD_ERR_MEM:
		return "Memory error";
	case VD_ERR_POINTER:
		return "Invalid pointer";
         case VD_ERR_PARAM:
		return "Parameter error";
	default:
		return "Unknown error";
	}
}
