///////
   //
   //    ht://Check main function
   //
   //    Part of the ht://Check package
   //
   //    Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
   //    Some Portions Copyright (c) 1995-2000 The ht://Dig Group <www.htdig.org>
   //    Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
   //
   //    For copyright details, see the file COPYING in your distribution
   //    or the GNU General Public License version 2 or later 
   //    <http://www.gnu.org/copyleft/gpl.html>
   //
   //    $Id: htcheck.cc,v 1.26 2003/12/30 09:38:47 angusgb Exp $
   //
///////

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#include <signal.h>
#include <errno.h>

#include "HtDefaults.h"
#include "HtDateTime.h"
#include "Scheduler.h"

// If we have this, we probably want it.
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

// We want to get system information, right?
// Do we have the sys/utsname.h include file?
#ifdef HAVE_SYS_UTSNAME_H
#include <sys/utsname.h>
#endif

#include "htcheck.h"

//
// Global variables
//
//

   Scheduler   *main_scheduler;
   String      UserAgent;
   String      SysInfo;
   

// Debug

   int         debug=0;

// Function prototypes

static void usage();
static void version();
static void reportError(char *msg);
static bool set_sys_info();

static void htcheck_exit(int);
static void ShowInfo(const Configuration &config);

int main(int ac, char **av)
{

///////
   //	Local variables
///////

   String configFile = DEFAULT_CONFIG_FILE;	// Configuration file
   Configuration config;   // Configuration dictionary
   
   int c;                  // Character for get_opt function
   String Start_URL = 0;   // Start URL
   String DB_Name = 0;     // Database name

   const String options_list="vsikc:D:hr";

   // Set the global pointer to the main scheduler object
   if (! (main_scheduler = new Scheduler()))
      reportError("Scheduler creation failed");

#ifdef HAVE_GETOPT_H
   const struct option long_options[] =
   {
      { "help", 0, 0, 'h'},
      { "version", 0, 0, 'r'},
      { 0, 0, 0, 0},
   };
#endif

   int stats = 0;
   int erase = 0;
   bool drop_database = true;

///////
   //	Set the system information
///////

   if (!set_sys_info())
      SysInfo = "not recognized system";

///////
   //	Retrieving options from command line with getopt
///////

#ifdef HAVE_GETOPT_H
   while((c = getopt_long(ac, av, (const char *)options_list, long_options, 0)) != -1)
#else
   while((c = getopt(ac, av, (const char *)options_list)) != -1)
#endif
   {
      switch (c)
      {
         case 'h':
            usage();
            break;
         case 'r':
            version();
            break;
         case 'v':
            debug++;
            break;
         case 's':
            stats++;
            break;
         case 'c':
            configFile=optarg;
            break;
         case 'i':
            erase=1;
            break;
         case 'k':
            erase=1; // drop the tables, but ...
            drop_database=false; // ... don't drop the database
            break;
         //case 'U':
            //Start_URL=optarg;
            //break;
         case 'D':
            DB_Name=optarg;
            break;
         case '?':
            usage();
            break;
      }
   }

   if(debug>0)
   {
      cout << "ht://Check " << VERSION << endl;
      cout << "Initialization" << endl;
   }


///////
   //	Default configuration
///////

   if(debug>0)
      cout << " Assigning configuration default values" << endl;

   config.Defaults(defaults);


///////
   //	Configuration file reading
///////

   if(debug>0)
      cout << " Reading configuration file " << configFile << endl;

   if(access((char *)configFile, R_OK) < 0)
   {
      reportError(form("Unable to find configuration file '%s'", configFile.get()));
   }

   config.Read(configFile);

///////
   //   Set Options
///////

   if(debug>0)
      cout << " Setting options " << endl;

   // Set the argument list info
   main_scheduler->SetArgumentList(&ac, &av);

   // Set the debug level of the scheduler
   main_scheduler->SetDebugLevel(debug);

   // Set the scheduler stats level
   main_scheduler->SetStatsLevel(stats);

   // Set the scheduler initialization level
   main_scheduler->SetInitializationLevel(erase);

   // Set the scheduler flag for keeping (or dropping) the existant database
   main_scheduler->SetDropDatabase(drop_database);

   // Set the scheduler options
   main_scheduler->SetOptions(config);

   // Set the User agent string to be used by the spider
   UserAgent = config["user_agent"];
   UserAgent << "/" << VERSION
      << " (" << SysInfo << ")";
   main_scheduler->SetUserAgent(UserAgent);

   // Catch the initial URL list
   if (Start_URL.length()==0)
      Start_URL=config["start_url"];

   // Determine the name of the Database
   if (DB_Name.length()==0)
      DB_Name=config["db_name"];

   // Checks for a database name prepend string
   if (config["db_name_prepend"].length())
   {
      String db_name_tmp = config["db_name_prepend"];
      db_name_tmp << DB_Name;
      DB_Name = db_name_tmp;
   }
      


///////
   //   Initialization
///////

   // Create the database
   
   switch (main_scheduler->SelectDatabase(DB_Name))
   {
      // Database error
      case (Scheduler::Scheduler_DBError):
         main_scheduler->GetDB()->DisplayError();
         reportError("Database error");
         break;
         
      // Memory error
      case (Scheduler::Scheduler_MemoryError):
         reportError(strerror(errno));
         break;
         
      // All right
      case (Scheduler::Scheduler_Interrupted):
      case (Scheduler::Scheduler_OK):
         break;
   }


   if(debug>0 || stats>0)
      cout << "Started ht://Check-ing     "
         << main_scheduler->GetStartTime()->GetAscTime() << endl;

   if (main_scheduler->GetInitializationLevel())
   {
   
      // We gotta perform a new dig.
   
      main_scheduler->Initial(Start_URL);

      if(debug>0)
         cout << "Ready to start the 'crawl'" << endl;

      // Set the signal masks

      struct sigaction action;
      struct sigaction old_action;

      memset((char*)&action, '\0', sizeof(struct sigaction));
      memset((char*)&old_action, '\0', sizeof(struct sigaction));
   
      action.sa_handler = htcheck_exit;

      sigaction(SIGINT, &action, &old_action);

      // Start the process
   
      switch (main_scheduler->Run())
      {
         // Database error
         case (Scheduler::Scheduler_DBError):
            main_scheduler->GetDB()->DisplayError();
            reportError("Database error");
            break;
         
         // Memory error
         case (Scheduler::Scheduler_MemoryError):
            reportError(strerror(errno));
            break;
         
         // Interrupted
         case (Scheduler::Scheduler_Interrupted):
            cout << "Interrupted ... Closing gracefully" << endl;
            break;
         
         // All right
         case (Scheduler::Scheduler_OK):
            if (stats>0) ShowInfo(config);
            break;
      }
   }
   else
   {
      // We use a previous database. We only show again the results.

      if(debug>0)
         cout << "Using existent database " << DB_Name << endl;

      ShowInfo(config);
      
      main_scheduler->SetFinishTime();
   
   }
      
   // That's the end

   if(debug>0 || stats>0)
   {
      cout << "Finished ht://Check-ing    "
         << main_scheduler->GetFinishTime()->GetAscTime() << endl;

      int seconds = main_scheduler->GetRunningTime();
      cout << "ht://Check running for " << seconds << " seconds";

      if (seconds >= 60)
      {
      
         int hours = seconds / 3600;
         int seconds2 = seconds - hours * 3600;
         int minutes = seconds2 / 60;
         seconds2 -= minutes * 60;

         String SecStr = " (";

         if (hours) SecStr << hours << " hrs ";
         if (minutes) SecStr << minutes << " min ";
         SecStr << seconds2 << " sec)";

         cout << SecStr;

      }
      
      cout << endl;
      
   }

   // Frees the main scheduler object - dynamically created
   if (main_scheduler)
      delete main_scheduler;

}


void usage()
{
	cout << "usage: htcheck  [-isvhr] [-c configfile] [-D dbname]"
            << " [--help] [--version]" << endl;
            
	cout << "ht://Check " << VERSION 
           << " - " << SysInfo << endl << endl;

	cout << "Options:" << endl;

	cout << "\t-v\tVerbose mode (more 'v's increment verbosity)"
         << endl << endl;

	cout << "\t-s\tStatistics (broken links, etc...) available"
		<< endl << endl;

	cout << "\t-i\tInitialize the database (completely drop a previous db)" << endl << endl;

	cout << "\t-k\tInitialize the database (drop tables, keep the db)" << endl << endl;

	cout << "\t-c configfile" << endl;
	cout << "\t\tConfiguration file" << endl << endl;

	cout << "\t-D dbname" << endl;
	cout << "\t\tName of the database" << endl << endl;

	cout << "\t--help\tDisplay this"
         << endl;
	cout << "\t-h\tSame as --help"
         << endl << endl;

	cout << "\t--version\tDisplay version"
         << endl;
	cout << "\t-r\tSame as --version"
         << endl << endl;

	exit(0);
}

void version()
{
	cout << "ht://Check " << VERSION 
           << " - " << SysInfo << endl;
	exit(0);
}

void reportError(char *msg)
{
	cout << "! htcheck: " << msg << "\n\n";
	exit(1);
}

void htcheck_exit(int i)
{
   cout << "Program interrupted. Please wait for a graceful close." << endl;
   main_scheduler->Stop();
}

void ShowInfo(const Configuration &config)
{
   // We want to show the info retrieved so far


   // Status Codes
   switch (main_scheduler->ShowStatusCode())
   {
      // Database error
      case (Scheduler::Scheduler_DBError):
         main_scheduler->GetDB()->DisplayError();
         reportError("Database error");
         break;
         
      // Memory error
      case (Scheduler::Scheduler_MemoryError):
         reportError(strerror(errno));
         break;
         
      // All right
      case (Scheduler::Scheduler_Interrupted):
      case (Scheduler::Scheduler_OK):
         break;
   }


   // Broken Links
   switch (main_scheduler->ShowBrokenLinks())
   {
      // Database error
      case (Scheduler::Scheduler_DBError):
         main_scheduler->GetDB()->DisplayError();
         reportError("Database error");
         break;
         
      // Memory error
      case (Scheduler::Scheduler_MemoryError):
         reportError(strerror(errno));
         break;
         
      // All right
      case (Scheduler::Scheduler_Interrupted):
      case (Scheduler::Scheduler_OK):
         break;
   }
   

   // Broken Anchors
   if (config.Boolean("summary_anchor_not_found"))
   switch (main_scheduler->ShowAnchorNotFound())
   {
      // Database error
      case (Scheduler::Scheduler_DBError):
         main_scheduler->GetDB()->DisplayError();
         reportError("Database error");
         break;
         
      // Memory error
      case (Scheduler::Scheduler_MemoryError):
         reportError(strerror(errno));
         break;
         
      // All right
      case (Scheduler::Scheduler_Interrupted):
      case (Scheduler::Scheduler_OK):
         break;
   }


   // ContentTypes of successfully retrieved or checked Urls
   switch (main_scheduler->ShowContentTypesPerServer())
   {
      // Database error
      case (Scheduler::Scheduler_DBError):
         main_scheduler->GetDB()->DisplayError();
         reportError("Database error");
         break;
         
      // Memory error
      case (Scheduler::Scheduler_MemoryError):
         reportError(strerror(errno));
         break;
         
      // All right
      case (Scheduler::Scheduler_Interrupted):
      case (Scheduler::Scheduler_OK):
         break;
   }
   
}

bool set_sys_info()
{

#ifndef HAVE_SYS_UTSNAME_H
      return false;
#else
   struct utsname sysinfo;

   if (uname (&sysinfo) == -1)
      return false;

   // Erase the sys info string
   SysInfo.trunc();

   SysInfo << sysinfo.sysname << " "
      << sysinfo.release << " "
      << sysinfo.machine;

   return true;

#endif

}
