/* $Id: http_gw.c,v 1.11 1997/05/03 11:57:21 lexa Exp $ */
/*  
    (C) 1996, Alex Tutubalin, lexa@lexa.ru
       HTTP ( cyrpoxy)

*/



#include	<stdio.h>
#include	<ctype.h>
#include 	<string.h>
#include	<syslog.h>
#include 	<stdlib.h>
#include 	<stdio.h>
#include 	<sys/errno.h>
#include	<stdarg.h>
#include	<unistd.h>
#ifdef _AIX
#include <fcntl.h>
#else
#include	<sys/fcntl.h>
#endif
#include	<sys/stat.h>
#include 	<sys/types.h>
#include 	<sys/time.h>


#include 	"policy.h"
#include 	"readconf.h"
#include 	"gateway.h"


static int  IsHTTP_1_0=0;

#define cpBUFSIZE (cpBSIZE/2)
static char urlbuf[cpBUFSIZE*3+1]; /* urltranslate() will put results in this buffer */
#define mmin(a,b) ((a)<(b)?(a):(b))
#define mmax(a,b) ((a)>(b)?(a):(b))
#define strws(a)  strtok(a," \t\r\n")

#define TEXT	1
#define BINARY	0


static int    
empty_line(char *word)
{
  if(!word) return 1;
  while(*word && strchr(" \t\r\n",*word)) word++;
  if(*word) return 0; else return 1;
}


/* 
   unescape convert all %xx to appropriate characters and escape all 
   normal characters into '\char' form (so we can do reverce operation 
   in correct way)
 */
static void 
unescape(unsigned char *dest,unsigned char *str)
{
  char *p,*dst;
  char buf[] = "0x00";

  for(p=str,dst = dest;*p;) 
    {
      if(*p=='%')
	{
	  if(isxdigit(buf[2]=*(p+1)) && isxdigit(buf[3]=*(p+2)))
	    {
	      *dst++=strtol(buf,NULL,16); p+=3;
	      continue;
	    }
	  else
	    { 
	      /* 
		 we have '%' in *p and _NO_ hexdigit in following two symbols 
		 this is error, so we simply SKIP 3 chars. This is
		 INCORRECT, but starting data is also incorrect 
	      */
	      p+=3;
	    }
	}
      else /* Escape non-escaped char */
	{
	  *dst++='\\';
	  *dst++=*p++;
	}
    }
  *dst=0;
}


#define c2h(v) ((v)<10?(v)+'0':(v)-10+'A')
static void
chr2hex(unsigned char *dst,unsigned int val)
{
  dst[0]='%'; dst[1]=c2h(val>>4); dst[2]=c2h(val&0xf); dst[3]=0;
}


/* complementary function to unescape() */
static void
escape(unsigned char *dest, unsigned char *str)
{
  unsigned char *dst,*p;
  for(p=str,dst=dest;*p;p++)
    {
      if(*p=='\\') /* early escaped char -> skip backslash */
	*dst++=*++p;
      else
	{
	  chr2hex((unsigned char *) dst,(unsigned int)*p);
	  dst+=3;
	}
    }
  *dst=0;
}


void
url_translate(char *url,encoding *client_enc)
{
  unsigned char wrkbuf[cpBUFSIZE*3+1];
  unescape(wrkbuf,url);
  TranslateBuf(wrkbuf,strlen(wrkbuf),client_enc->tablefrom);
  escape(urlbuf,wrkbuf);
}

static int 
strmatch(char *string, char *pat)
{
  if(!string || !*string) return 0;
  return !strncasecmp(string,pat,strlen(pat));
}

static int
writestr(int file, char *str)
{
  return write(file,str,strlen(str));
}

/* Is this header string must be passed to server ?*/
char *rejectedClientHeaders[]= 
{
  "Connection:"
};

#define NrejectedClientHeaders (sizeof(rejectedClientHeaders)/sizeof(rejectedClientHeaders[0]))

static int 
PassToServer(char *str)
{
  int i;
  for (i=0;i<NrejectedClientHeaders;i++)
    if(strmatch(str,rejectedClientHeaders[i])) return 0;
  return 1;
}

/* Is this header string must be passed to client ?*/
char *rejectedServerHeaders[]=
{
  "Connection:",
  "Keep-Alive:"
};
#define NrejectedServerHeaders (sizeof(rejectedServerHeaders)/sizeof(rejectedServerHeaders[0]))

static int 
PassToClient(char *str)
{
  int i;
  for (i=0;i<NrejectedServerHeaders;i++)
    if(strmatch(str,rejectedServerHeaders[i])) return 0;
  return 1;
}



/* 
   reads data from stdin (descriptor 0), writes modified data to netfd
*/
static enum GWerrors
http_request(int netfd, encoding *enc)
{
  char *url,*Proto,*z;
  int n = 0,m,ClientAcceptKoi8=0;
  
  static char *HTTP_Method=NULL;
  static int RequestLength = 0, RequestType = BINARY;
  
  if (HTTP_Method) { 
    /* We've read and parse header at previous pass(es)*/

    n = h_read(0,buf,cpBUFSIZE);
    NetworkDebug("http_request(1): read returns %d",n);
    if(n>0){
      TranslateBuf(buf,n,0);
      m = write(netfd,buf,n);
      NetworkDebug("http_request(2): write: %d",m);
      if(m!=n) 
	return ERR_WRTERR;
    }else 
      return ERR_RDERR;
    return ERR_NOERROR;
  }

  if (h_fgets(buf, cpBUFSIZE,0,0) == NULL)
    {
      NetworkDebug("http_request(3): gets: NULL");
      return ERR_RDERR;
    }
  
  HTTP_Method = strdup(strws(buf));
  url = strws(NULL);
  Proto = strws(NULL);

  IsHTTP_1_0=strmatch(Proto,"HTTP/1");

  if (empty_line(HTTP_Method) || empty_line(url))    
    return ERR_RDERR;
  url_translate(url,enc); 
  sprintf(buf, "%s %s HTTP/1.0\n",HTTP_Method,urlbuf);
 
  m = writestr(netfd,buf);
  NetworkDebug("http_request(4): write: %d",m);
  if (m <= 0) 
    return ERR_WRTERR;
  
  if (IsHTTP_1_0) {
    while ((z=h_fgets(buf, cpBUFSIZE, 0, 0)) != NULL) {
      ClientAcceptKoi8 += 
	strmatch(buf,"Accept:") && strstr(buf, "x-cyrillic-koi8");
      if (strmatch(buf, "User-Agent:")) {
	unsigned char *bp;
	for (bp = buf + strlen(buf); 
	     bp > buf && (strchr("\n\r",*bp) || *bp == 0); 
	     bp--)
	  *bp = 0;
	strcat(buf, " Modified via cyrproxy gateway\r\n");
      }
      if (strmatch(buf, "Content-Length:")) 	{
	char *w1,*w2;
	strcpy(urlbuf,buf); /* leave buf untouched */
	w1 = strws(urlbuf);
	w2 = strws(NULL);
	RequestLength = atoi(w2);
      }
      if (strmatch(buf, "Content-Type:")  
	  && (strstr(buf, "text/") 
	      || strstr(buf, "application/x-www-form-urlencoded")))
	RequestType = TEXT;
      if (empty_line(buf)) 
	break; /* end of headers */
      if (PassToServer(buf) && (writestr(netfd, buf) <= 0)) return ERR_WRTERR;
    }
    if(z == NULL) 
      {
	NetworkDebug("http_request(5): gets: NULL,err: %d",h_checkerr(0));
	return h_checkerr(0)?ERR_RDERR:ERR_NODATA;
      }

    if(writestr(netfd,ClientAcceptKoi8 ? 
		"\r\n":"Accept: text/x-cyrillic-koi8\r\n\r\n") < 0)
      {
	NetworkDebug("http_request(6): write error");
	return ERR_WRTERR;
      }

    if (RequestLength) {
      if (RequestType == TEXT) {	/* text */
	while (RequestLength)  {
	  n = mmin(RequestLength,cpBUFSIZE);
	  if (!(n = h_read(0,buf, n)))
	    {
	      NetworkDebug("http_request(7): h_read: %d");
	      return ERR_RDERR;
	    } 
	  else 
	    NetworkDebug("http_request(8): h_read: %d",n);
	  RequestLength -= n;
	  url_translate(buf,enc); /* We must handle all escapes -> it may be POST request */
	  m=writestr(netfd, urlbuf);
	  NetworkDebug("http_request(9): writestr: %d",m);
	  if(m!=strlen(urlbuf))
	    return ERR_WRTERR;
	}
      }  else { /*binary  data */
	while (RequestLength)  {
	  n = mmin(RequestLength,cpBUFSIZE);
	  m = h_read(0,buf,n);
	  NetworkDebug("http_request(10): h_read: expected %d, got %d",n,m);
	  if (m<=0) 
	    return ERR_RDERR;
	  if (write(netfd, buf, n) < 0) 
	    return ERR_WRTERR;
	  RequestLength -= m;
	}
      }
    }
  }  else    { 
    /* Not HTTP 1.x */
    if (writestr(netfd, "Accept: */*\r\nAccept: text/x-cyrillic-koi8\r\nUser-Agent: cyrproxy/UNIX\r\n\r\n")	  < 0)
      {
	NetworkDebug("http_request(11): writestr failed");
	return ERR_WRTERR; 
      }
  }
  return ERR_NOERROR;
}


/* reads server response from netfd and write it to filedesc. 1 (stdout) */

static enum GWerrors
http_response(int netfd,encoding *enc)
{
  int		haveContLen=0, n;
  static int gh = 0;
  static int ResponseLength = 0;
  static int ResponseType = BINARY;
  char *z;
  
 if(!gh)
 {
  while ((z=h_fgets(buf, cpBUFSIZE,netfd ,0)) != NULL) 
    { 
      /* Header */
      n = strlen(buf);
      if (strmatch(buf,"Content-Type:") && strstr(buf, "text/") && strstr(buf,enc->servercharset))
	ResponseType = TEXT;
      else if (strmatch(buf, "Content-Length:")) 
	{
	  strcpy(urlbuf,buf);
	  strws(urlbuf);
	  ResponseLength = atoi(strws(NULL));
	  haveContLen = 1;
	}
      /* change charset for text/ documents */
      if (enc->changecharset && *(enc->clientcharset)
	  && strmatch(buf,"Content-Type") 
	  && strstr(buf,"text/") )
	{
	  char *p = strstr(buf,"charset=");
	  if(p)
	    sprintf(p,"charset=%s\n",enc->clientcharset);
#if 0 /* commented out. I'm not sure, that we must force charset addition
	 for documents, which not contains it! */
	  else
	    {
	      p=strchr(buf,'\n');
	      if(p)
		{
		  if(*(p-1)=='\r') p--;
		  sprintf(p,"; charset=%s\n",enc->clientcharset);
		}
	      else 
		{ 
		  strcat(buf,"; charset=");strcat(buf,enc->clientcharset);
		  strcat(buf,"\n");
		}
	    }
#endif
	  n=strlen(buf);
	}

      if (IsHTTP_1_0 && PassToClient(buf) && (write(1, buf, n)) < 0) 
	return ERR_WRTERR;
      if (empty_line(buf))
	{
	gh=1;
	break;
	}
    }
  if(!z)
    return ERR_RDERR;
  }

  /* Response Body */
  if (ResponseType== TEXT) 
    {
      int i=0;	
      while ((n = h_read(netfd,buf,cpBUFSIZE))>0) 
	{
	i++;
	  TranslateBuf(buf,n,enc->tableto);
	  if (write(1, buf,n) !=n) 
	    return ERR_WRTERR;
	}
	 
      if(!i||h_checkerr(netfd)){ 
       close(netfd);
//      return ERR_RDERR;
	exit(0);
      }
    }
  else  /* Binary response */
    {
      if(!haveContLen) 
	{
      	int i=0;	
	  while ((n = h_read(netfd,buf,cpBSIZE)) > 0) {
	    i++;
	    if (write(1, buf, n) < n) 
	      return ERR_WRTERR;
	  }
	  if(!i||h_checkerr(netfd)){ 
	  return ERR_RDERR;
	  }
	} 
      else /* have ContentLen */
	while (ResponseLength) 
	  {
	    n = mmin(ResponseLength,cpBSIZE);
	    if ((n = h_read(netfd,buf,n))<=0) return ERR_RDERR;
	    if(write(1,buf,n) != n) return ERR_WRTERR;
	    ResponseLength -=n;
	  }
    }
  return ERR_NOERROR; /* all data processed ? */
}

void 
HttpGW (int netfd, encoding *enc, int timeout)
{
  simple_gw(netfd,http_request,http_response,enc,timeout);
}


