/*
  jugtailVctl.c, written by Rhett "Jonzy" Jones 

  Jonzy's Universal Gopher Hierarchy Excavation And Display.
  Excavates through gopher menus and displays the hierarchy
  of the menus encountered

  Copyright (C) 1993, 1994 University of Utah Computer Center.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program (look for the file called COPYING);
  if not, write to the Free Software Foundation, Inc.,
  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA           
*/


/* Description:	Contains routines needed to handle the "veronica.ctl" file,
 *	which is the control file for veronica, by handling the
 *	"Disallow" gopher paths and creating a file to pass to
 *	veronica which does not contain these "Disallow" paths.
 */

#include "stdinc.h"
#include "jugtail.h"
#include "sockets.h"
#include "tree.h"
#include "utils.h"

static void TooMuchTime4Read (int sig);
static int ReadCtlFile (char *hostStr, char *portStr);
static void BuildDisallowList (TreeType *node);
static int StrInStr (char *s1, char *s2);
static short DisallowItem (char *sStr, char *hStr, char *pStr, List *list);
static short Item4Veronica (char *sStr, char *hStr, char *pStr);
void PrintTheList (void);
void CreateVeronicaFile (TreeType *root);

/* from jugtail.c */
extern int Unlink ();

/* from jugtailConf.c */
extern int ReadLine ();

#define BUFFERLIMIT	2048

#define READTIMEOUT	(5 * 60)
static jmp_buf env;

#if(0)				/* which one to use?  Check with Steve. */
static char *ctlName[] = { "0/etc/veronica.ctl",
  "0/veronica.ctl",
  "0/robots.txt",
  0
};
#else
static char *ctlName[] = { "0/veronica.ctl",
  "0/robots.txt",
  "0/etc/veronica.ctl",
  0
};
#endif

List *disallowHead = (List *) NULL,	/* Head of the "Disallow" paths list. */
  *disallowTail = (List *) NULL;	/* Tail of the "Disallow" paths list. */

char *vctlHost = EMPTYSTRING,	/* The veronica control file host. */
  *vctlPort = EMPTYSTRING;	/* The veronica control file port. */

extern short Host2Search ();	/* Defined in "jugtailConf.c". */
extern int ReadConfFile ();	/* Defined in "jugtailConf.c". */
extern char *GetString ();	/* Defined in "tree.c". */

/* These variables are defined in "jugtailConf.c". */
extern FILE *wtPtr, *rdPtr, *rptPtr;

/*****************************************************************************
 * TooMuchTime4Read gets called only when the time period has elapsed when
 * waiting to read from our input..
 ****************************************************************************/
static void
TooMuchTime4Read (int sig)
{
  if (debug)
    fprintf (rptPtr, "In TooMuchTime4Read\n");

  longjmp (env, 1);

}				/* TooMuchTime4Read */

/*****************************************************************************
 * ReadCtlFile checks if the gopher server "hostStr" running out port
 * "portStr" has "0/etc/veronica.ctl", "0/veronica.ctl", or "0/robots.txt"
 * and if so acquires this veronica control file, adds the host/port to
 * the table of control files we have checked and/or acquired, and then
 * adds the "Disallow" lines to the list of gopher paths to disallow.
 ****************************************************************************/
static int
ReadCtlFile (char *hostStr, char *portStr)
     /* hostStr: The host to get the control file from.
        portStr: The port the host is listening to. */
{
  int error = 0,		/* Did we get an error? */
    gotIt = 0;			/* Did we get the control file? */
  FILE *fPtr;			/* Pointer to the control file. */
  char *rsltLine,		/* The resultant line of data. */
    **controlFile;		/* The control file to retrieve. */

  for (controlFile = ctlName; !error && *controlFile && !gotIt; controlFile++)
    if (!(error = ContactHost (hostStr, Str2Int (portStr))))
      if ((fPtr = fopen (tmpfilename, "w")))
	{
	  SendString (*controlFile);
	  SendString ("\r\n");

	  /* Set things up so we don't wait for ever waiting to read. */
	  signal (SIGALRM, TooMuchTime4Read);
	  alarm (READTIMEOUT);
	  if (setjmp (env))
	    {
	      if (debug)
		fprintf (rptPtr, "Too much time waiting to read\n");
	      CloseReadNwriter ();
	      fclose (fPtr);
	      return (0);
	    }

	  while ((rsltLine = GetString (rdPtr)))
	    {
	      fprintf (fPtr, "%s", rsltLine);
	      if (strstr (rsltLine, "User-agent")
		  && strstr (rsltLine, "veronica"))
		gotIt = 1;
	    }

	  /* We got our request to deactivate the alarm. */
	  alarm (0);
	  signal (SIGALRM, SIG_IGN);

	  CloseReadNwriter ();
	  fclose (fPtr);
	  if (gotIt)
	    return (1);
	}
      else
	error =
	  fprintf (rptPtr, "error: ReadCtlFile cannot create [%s]\n",
		   tmpfilename);
    else
      PostContactHostError (error, hostStr, portStr);
  return (0);

}				/* ReadCtlFile */

/*****************************************************************************
 * BuildDisallowList builds the 'disallow' list with gopher paths to
 * not include in the data base for veronica.
 ****************************************************************************/
static void
BuildDisallowList (TreeType *node)
     /* node: The node to have printed. */
{
  char *tab,			/* Position of the tab. */
   *hStr,			/* The host string. */
   *pStr;			/* The port string. */

  if (!node)
    return;
  else
    {
      BuildDisallowList (node->left);

      /* Break the string up into the host and port parts. */
      hStr = node->word;
      tab = strchr (hStr, '\t');
      *tab = '\0';
      pStr = (char *) (tab + 1);

      if (debug)
	fprintf (rptPtr, "Processing [%s]\n", hStr);

      if (Host2Search (hStr))
	{
	  if (ReadCtlFile (hStr, pStr))
	    {
	      if (debug)
		fprintf (rptPtr, " SUCCESS with ctl");
	      vctlHost = hStr;
	      vctlPort = pStr;
	      ReadConfFile (tmpfilename);
	      if (Unlink (tmpfilename))
		fprintf (rptPtr, "error: %d could delete [%s]\n", errno,
			 tmpfilename);
	      vctlHost = vctlPort = EMPTYSTRING;
	    }
	  else if (debug)
	    fprintf (rptPtr, " FAILURE with ctl");
	  if (debug)
	    fprintf (rptPtr, "\n");
	}
      else if (debug)
	fprintf (rptPtr, " NEVER MIND\n");

      /* Restore the information in case we want to use it again. */
      *tab = '\t';

      BuildDisallowList (node->right);
    }

}				/* BuildDisallowList */

/*****************************************************************************
 * StrInStr returns true if 's1' is the first part of 's2', and false
 * otherwise.  This routine is used to cull the gopher pathway 's2' if
 * it is a pathway under the pathway 's1'.
 ****************************************************************************/
static int
StrInStr (char *s1, char *s2)
{

  for (; *s1 == *s2; s1++, s2++)
    if (!*s1)
      return (1);

  if (!isalnum (*s2) && !isspace (*s2))
    return (1);

  return (0);

}				/* StrInStr */

/*****************************************************************************
 * DisallowItem returns true if the gopher item specified by 'sStr', 'hStr',
 * and 'pStr' is a "Disallow:" item as specified in the veronica.ctl file.
 ****************************************************************************/
static short
DisallowItem (char *sStr, char *hStr, char *pStr, List *list)
     /* sStr: The selector string.
        hStr: The host string.
	pStr: The port string.
	list: Head of the list.    */
{

  if (!*sStr)
    return (0);

  while (list)			/* See if we have been here before. */
    {
      if (StrInStr (list->info.sStr, sStr))
	if (!strcmp (list->info.hStr, hStr))
	  if (!strcmp (list->info.pStr, pStr))
	    return (1);
      list = list->next;
    }
  return (0);

}				/* DisallowItem */

/*****************************************************************************
 * Item4Veronica returns true if the gopher item 'sStr', 'hStr', and 'pStr'
 * is to be included in the data base for veronica.
 * An item is to be included if:
 *  1) It is served from one of the hosts jugtail was told to search.
 *  2) It is not in the "Disallow:" list of paths to avoid.
 ****************************************************************************/
static short
Item4Veronica (char *sStr, char *hStr, char *pStr)
{
  return (Host2Search (hStr)
	  && !DisallowItem (sStr, hStr, pStr, disallowHead));

}				/* Item4Veronica */

/*****************************************************************************
 * PrintTheList simply prints the list 'l' and is used solely for
 * debugging.
 ****************************************************************************/
void
PrintTheList (void)
{
  List *l;			/* The list to print. */

  if (debug)
    {
      fprintf (rptPtr, "Printing the disallow items ...\n");
      for (l = disallowHead; l; l = l->next)
	fprintf (rptPtr, "[%s] [%s] [%s]\n",
		 l->info.sStr, l->info.hStr, l->info.pStr);
    }

}				/* PrintTheList */

/*****************************************************************************
 * CreateVeronicaFile builds the data file which will be passed off
 * to veronica.  This data file will not contain any gopher paths as
 * specified in the appropriate "veronica.ctl" file, nor any items from
 * a gopher server not specified as a host to search.
 ****************************************************************************/
void
CreateVeronicaFile (TreeType *root)
     /* root: Servers we need to get the veronica.ctl file from. */
{
  int dataLines = 0,		/* Number of lines in the data file. */
    veronicaLines = 0;		/* Number of lines in the veronica file. */
  FILE *inFp,			/* Pointer to the data file. */
   *outFp;			/* Pointer to the veronica file. */
  char line[BUFFERLIMIT],	/* Line from the data file. */
   *dStr,			/* Display string from data file. */
   *sStr,			/* Selector string from data file. */
   *hStr,			/* Host string from data file. */
   *pStr,			/* Port string from data file. */
   *gPlus,			/* Is this a gopher plus item? */
   *remainder;			/* Any gopher plus specs. */

  if (debug)
    {
      int i;			/* A loop counter. */
      fprintf (rptPtr, "In CreateVeronicaFile()\n");
      PrintTree (root, 2);
      fprintf (rptPtr, "Printing searchHosts ...\n");
      for (i = 0; i < numSearchHosts; i++)
	fprintf (rptPtr, "searchHosts[%2i] = [%s]\n", i, searchHosts[i]);
    }

  BuildDisallowList (root);

  if (debug)
    PrintTheList ();

  if (!fileName || !*fileName || !veronica || !*veronica)
    {
      fprintf (rptPtr, "Major problems: null file names.  Aborting\n");
      exit (1);
    }

  if ((inFp = fopen (fileName, "r")))
    {
      if ((outFp = fopen (veronica, "w")))
	{
	  while (ReadLine (inFp, line, BUFFERLIMIT) != EOF)
	    {
	      dataLines++;

	      /* Parse the line. */
	      dStr = MyStrTok (line, '\t');
	      sStr = MyStrTok ((char *) NULL, '\t');
	      hStr = MyStrTok ((char *) NULL, '\t');
	      gPlus = strchr (remainder =
			      MyStrTok ((char *) NULL, '\0'), '\t');
	      pStr = OnlyDigits (remainder);
	      remainder = pStr + strlen (pStr) + 1;

	      if (Item4Veronica (sStr, hStr, pStr))
		{
		  if (gPlus)
		    fprintf (outFp, "%s\t%s\t%s\t%s\t%s\n", dStr, sStr,
			     hStr, pStr, remainder);
		  else
		    fprintf (outFp, "%s\t%s\t%s\t%s\r\n", dStr, sStr,
			     hStr, pStr);
		  veronicaLines++;
		}
	    }
	  fclose (outFp);
	  if (debug)
	    fprintf (rptPtr,
		     "%s contains %d items, %s contains %d items, with %d items culled\n",
		     fileName, dataLines, veronica, veronicaLines,
		     dataLines - veronicaLines);
	}
      else
	fprintf (rptPtr, "error: could not open [%s] for writing.\n",
		 veronica);
      fclose (inFp);
    }
  else
    fprintf (rptPtr, "error: could not open [%s] for reading.\n", fileName);

}				/* CreateVeronicaFile */
