#!/usr/bin/perl
$|=1;
# Copyright 1999 Jose M. Vidal
# Jose M. Vidal, vidal@multiagent.com, http://jmvidal.ece.sc.edu
#
# This program is free software.  You can redistribute it and/or modify
# it under the terms of the GNU General Public License
#
# $Id: search.pl,v 1.29 2000/05/24 14:29:54 jmvidal Exp $

use CGI param, header, escape;
use Fcntl;

$dbase   = "urls.db";      ## Name of db file.
$otherbase  = "/etc/bk2site/searchbase.html";  ##Name of the template file
$searchprog = "/cgi-bin/bk2site/search.pl"; ##CHANGE this if your program is somewhere else.

print header();
 
$database = param("db");      ## Name of db file.
if ($database eq "") {
  $database = $dbase;
};
unless ($database =~ /^[\w\.\/]+/ ) {
  print "Bad database $database name";
  exit;
}

$query = param("q"); 

$num = param("num");
if ($num eq "") {
  $num = 20;
};
unless ($num =~ /^[\w0-9]*/ ) {
  print "Bad num";
  exit;
}
$stq = param("stq");
unless ($stq =~ /^[\w0-9]*/ ) {
  print "Bad stq $stq";
  exit;
}
$escquery = escape($query);

# this was not secure
# open(DB,$database) ||
sysopen(DB, $database, O_RDONLY) ||
  die ("Can't open database(urls.db)= $database");
$checkDB = <DB>;
chop($checkDB);
$usingmetadata = 0;
if ($checkDB eq "#bk2site urls.db generated file metadata") {
  $numlines = 10;
  $usingmetadata = 1;
}
else {
  if ($checkDB ne "#bk2site urls.db generated file") {
    die ("Invalid database: $database");
  }
  else {
    $numlines = 9;
  }
}


$urltemplate = <DB>;
chop($urltemplate);
$newgif = <DB>;
chop($newgif);
$timecutoff = <DB>;
chop($timecutoff);
$oldesttime = time - $timecutoff * 86400;

open(OTHERBASE,$otherbase)||
  die ("Can't open $otherbase");

$temp = $urltemplate;
while ( $temp =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){
  push @directives, $1;
#   if ($1 eq $query) {
#     $query = $query;
#   }
  $temp =~ s/%IFCOMHAS\(.*?\)\(.*?\)//;
};


p1: while (<OTHERBASE>) {
  if (/(.*)%QUERY(.*)/){
    $_ =~ s/%QUERY/$query/g;
  }
  if (/(.*)%ESCQUERY(.*)/){
    $_ =~ s/%ESCQUERY/$escquery/g;
  }
  if (/(.*)%NUMBER(.*)/){
    $_ =~ s/%NUMBER/$num/g;
  }
  if (/(.*)<!--bk2site:search-->(.*)/){
    $_ =~ s/<!--bk2site:search-->/$database/g;
  }
  if (/(.*)<!--bk2site-->(.*)/){
    print "$1";
    $afterComment = $2;
    last p1;
  }
  print;
}
print "<!-- Code below was automatically generated by bk2site-->\n";

@allRecords = <DB>; #there are 8 lines per record, so mutiply index by 8
## 0 is the type
## 1 is the ParentTitle
## 2 is the url or relative dir
## 3 is the Title
## 4 is the comment
## 5 is the creation time
## 6 is the last modified time
## 7 is the last visit time
## 8 is the number of user hits
## 9 is meta data

$typeN = 0;
$parentTitleN = 1;
$urlN = 2;
$titleN = 3;
$commentN = 4;
$creationtimeN = 5;
$modifiedtimeN = 6;
$visittimeN = 7;
$urlhitsN = 8;
$metaN = 9;

$numRecords = ($#allRecords + 1)/$numlines;

for ($i=0; $i <= $numRecords; ++$i){ #initialize hits to 0
  $numHits[$i] = 0; ##keyword hits
  $order[$i] = $i; ## given them an initial ordering
}

$categoryMatches = 0;
$siteMatches = 0;

if (length($query) > 1) {
  for ($i=0; $i < $numRecords; ++$i){ #do search
    $cr = ($i * $numlines);
    @words = split(/ +/, $query);
    $title = $allRecords[$cr + $titleN];
    $comment = $allRecords[$cr + $commentN];
    $comment =~ s/<BR>/ /g;
    if ($usingmetadata){
      $meta = $allRecords[$cr + $metaN];
    };

    @hasdirectives = ();
    foreach $dir (@directives) { #directives are strings that appear %IFCOMHAS(here)(new stuff)
      $temp = $dir;
      $temp =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties, to allow *cool* to be a directive
      if ($usingmetadata) {
	if ($comment =~ s/$temp//g || $meta =~ s/$temp//g ) {
	push @hasdirectives, $dir;
	};
      }
      else {
	if ($comment =~ s/$temp//g) {  #take *cool* out of comment and place it in hasdirectives.
	  push @hasdirectives, $dir;
	};
      };
    };
    
    foreach $q2 (@words) {
      if (length($q2) < 2) { #ignore 1-char queries
	next;
      };
	
      $q = $q2;
      $oldNumHits = $numHits[$i];

      foreach $dir (@hasdirectives){ #search also on the directives that were taken out of the comment
	if ($q eq $dir) {
	  $numHits[$i]++;
	}
      };

      $q =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties 
    
      $numHits[$i] += ($title =~ s/($q)/<B>\1<\/B>/gi);

      if ($usingmetadata){
	$numHits[$i] += ($comment =~ s/($q)/<B>\1<\/B>/gi || $meta =~ s/($q)/<B>\1<\/B>/gi);
      }
      else {
	$numHits[$i] += ($comment =~ s/($q)/<B>\1<\/B>/gi);
      };

      if (($oldNumHits == 0) && ($numHits[$i] > 0)) {
	if (($allRecords[$cr + $typeN]) eq "FOLDER\n") {
	  $categoryMatches++; }
	else {
	  if (($allRecords[$cr + $typeN]) eq "LEAF\n") {
	  $siteMatches++;
	}
	  else {
	    print "ERROR: bad format in database file\n";
	    exit;
	};
	};
      };
    };
    $allRecords[$cr + $titleN] = $title;
    if (@hasdirectives){
      foreach $dir (@hasdirectives) {
	$comment .= $dir . " ";
      };
    };
    $allRecords[$cr + $commentN] = $comment;
  };
};

@order = sort {$numHits[$b] <=> $numHits[$a]} @order;

if ($stq eq "") {
  $stq = 1;
};

print "<CENTER><B>$categoryMatches category matches and $siteMatches site matches.";
if (($categoryMatches + $siteMatches) > $num) {
  $endpoint = $stq + $num;
  if (($categoryMatches + $siteMatches) < ($stq + $num)) {
    $endpointm1 = $categoryMatches + $siteMatches;
  }
  else {
    $endpointm1 = $endpoint -1 ;
  }
  print " Showing $stq -- $endpointm1.";
}
print "</B></CENTER>\n";

$count = 0;
$printed = 0;

if (($categoryMatches > 0) && ($categoryMatches >= $stq)) {
  print "<B>Category Matches: $categoryMatches</B><BR>\n<UL>\n";
 cont1:  for ($i=0; $i < $numRecords; ++$i){
    $rn = $order[$i];
    if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "FOLDER\n")) {
      $count++;
      if ($count < $stq) {
	next;}
      $pt = $allRecords[($rn*$numlines)+$parentTitleN];
      chop($pt);
      $url = $allRecords[($rn*$numlines)+$urlN];
      $url =~ s/\?/%3F/g;
      chop($url);
      $title = $allRecords[($rn*$numlines)+$titleN];
      chop($title);
#      $title =~ tr/_/ /;
#      print "<LI>$pt<A HREF =\"../$url\">$title</A>\n";
#urls for directories have searchtorootpath already prepended.
      print "<LI>$pt<A HREF =\"$url\">$title</A>\n";
      $printed++;
      if ($printed >= $num) {
	goto end;
      };
    }
  }
  print "</UL>\n";
}


if (($siteMatches > 0) && ($siteMatches >= $stq)){
  print "<B>Site Matches: $siteMatches</B><BR>\n<UL>\n";
 cont2:  for ($i=0; $i < $numRecords; ++$i){
    $rn = $order[$i];
    if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n")) {
      $pt = $allRecords[($rn*$numlines)+$parentTitleN];
      chop($pt);
      $notprinteddir = 1;
      for ($j=0; $j < $numRecords; ++$j){
	$rn = $order[$j];
	$npt = $allRecords[($rn*$numlines)+$parentTitleN]; 
	chop($npt);
	if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n") &&
	    ($npt eq $pt)) {
	  $numHits[$rn] = 0; #so we dont get chosen again.
	  $count++;
	  if ($count < $stq) {
	    next;
	  };
	  if ($notprinteddir){
	    print "<LI>$pt\n<UL>\n";
	    $notprinteddir =0;
	  };
	  $url = $allRecords[($rn*$numlines)+$urlN];
	  chop($url);
	  $title = $allRecords[($rn*$numlines)+$titleN];
	  chop($title);
	  $title =~ tr/_/ /;
	  $comment = $allRecords[($rn*$numlines)+$commentN];
	  chop($comment);
	  $numberofhits = $allRecords[($rn*$numlines)+$urlhitsN];
	  $creationtime = $allRecords[($rn*$numlines)+$creationtimeN];
	  ($ctsec, $ctmin, $cthour, $ctday, $ctmon, $ctyear, $ctwday, $ctyday, $ctisdst) = gmtime $creationtime;
	  $ctyearf = $ctyear + 1900;
	  if ($ctyear >= 100){
	    $ctyear -= 100;
	  }
	  $ctfull = localtime $creationtime;
	  $ctmon++;
	  $visittime = $allRecords[($rn*$numlines)+$visittimeN];
	  ($vtsec, $vtmin, $vthour, $vtday, $vtmon, $vtyear, $vtwday, $vtyday, $vtisdst) = gmtime $visittime;
	  $vtyearf = $vtyear + 1900;
	  if ($vtyear >= 100){
	    $vtyear -= 100;
	  }
	  $vtfull = localtime $visittime;
	  $vtmon++;
	  $modtime = $allRecords[($rn*$numlines)+$modifiedtimeN];
	  ($mtsec, $mtmin, $mthour, $mtday, $mtmon, $mtyear, $mtwday, $mtyday, $mtisdst) = gmtime $modtime;
	  $mtyearf = $mtyear + 1900;
	  if ($mtyear >= 100){
	    $mtyear -= 100;
	  }
	  $mtfull = localtime $modtime;
	  $mtmon++;
	  $urlhtml = $urltemplate;
	  $urlhtml =~ s/%URL/$url/g;
	  $urlhtml =~ s/%TITLE/$title/g;
	  $urlhtml =~ s/%HITS/$numberofhits/g;
	  $urlhtml =~ s/%DAYCRE/$ctday/g;
	  $urlhtml =~ s/%DAYVIS/$vtday/g;
	  $urlhtml =~ s/%DAYMOD/$mtday/g;
	  $urlhtml =~ s/%MONTHCRE1/$ctmon/g;
	  $urlhtml =~ s/%MONTHVIS1/$vtmon/g;
	  $urlhtml =~ s/%MONTHMOD1/$mtmon/g;
	  $urlhtml =~ s/%YEARCRE/$ctyear/g;
	  $urlhtml =~ s/%YEARFCRE/$ctyearf/g;
	  $urlhtml =~ s/%YEARVIS/$vtyear/g;
	  $urlhtml =~ s/%YEARFVIS/$vtyearf/g;
	  $urlhtml =~ s/%YEARMOD/$mtyear/g;
	  $urlhtml =~ s/%YEARFMOD/$mtyearf/g;
	  $urlhtml =~ s/%TIMEFCRE/$ctfull/g;
	  $urlhtml =~ s/%TIMEFVIS/$vtfull/g;
	  $urlhtml =~ s/%TIMEFMOD/$mtfull/g;
	  while ( $urlhtml =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){
	    $v1 = $1;
	    $v2 = $2;
	    $v1 =~ s/([*+?$.^])/\\$1/g; 
	    if ($comment =~ s/$v1//g) {
	      $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)/$v2/;
	    }
	    else {
	      $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)//;
	    };
	  };
	    
	  if ($creationtime > $oldesttime) {
	    $timestring = "$ctmon/$ctday/$ctyear";
	    $newgifhtml = $newgif;
	    $newgifhtml =~ s/%URL/$url/g;
	    $newgifhtml =~ s/%TITLE/$title/g;
	    $newgifhtml =~ s/%HITS/$numberofhits/g;
	    $newgifhtml =~ s/%DAYCRE/$ctday/g;
	    $newgifhtml =~ s/%DAYVIS/$vtday/g;
	    $newgifhtml =~ s/%DAYMOD/$mtday/g;
	    $newgifhtml =~ s/%MONTHCRE1/$ctmon/g;
	    $newgifhtml =~ s/%MONTHVIS1/$vtmon/g;
	    $newgifhtml =~ s/%MONTHMOD1/$mtmon/g;
	    $newgifhtml =~ s/%YEARCRE/$ctyear/g;
	    $newgifhtml =~ s/%YEARFCRE/$ctyearf/g;
	    $newgifhtml =~ s/%YEARVIS/$vtyear/g;
	    $newgifhtml =~ s/%YEARFVIS/$vtyearf/g;
	    $newgifhtml =~ s/%YEARMOD/$mtyear/g;
	    $newgifhtml =~ s/%YEARFMOD/$mtyearf/g;
	    $newgifhtml =~ s/%TIMEFCRE/$ctfull/g;
	    $newgifhtml =~ s/%TIMEFVIS/$vtfull/g;
	    $newgifhtml =~ s/%TIMEFMOD/$mtfull/g;
	    $urlhtml =~ s/%NEW/$newgifhtml/g;
	  }
	  else {
	    $urlhtml =~ s/%NEW//g;
	  };
	  if ($comment ne "") {
	    $urlhtml =~ s/%CONDDASH/-/g;
	  }
	  else {
	    $urlhtml =~ s/%CONDDASH//g;
	  };
	  $urlhtml =~ s/%COMMENT/$comment/g;
	  print "<LI>$urlhtml";
	  $printed++;
	  if ($printed >= $num) {
	    print "</UL>\n</UL>\n";
	    goto end;
	  };
	}
      }
      print "</UL>\n";
    }
  }
  print "</UL>\n";
}

end: 

if (($siteMatches + $categoryMatches) > $num){
  print "<center>";
  for ($i =0; $i < ($siteMatches + $categoryMatches); $i += $num){
    if (($stq >= $i) && ($stq < ($i + $num))){
      print " <b>$i</b>";
    }
    else {
      $ip1 = $i + 1;
      print " <a href=\"$searchprog?num=$num&q=$escquery&stq=$i&db=$database\">$i</a> ";
    }
  }
  print "</center>";
};
print "<!-- Code above was automatically generated by bk2site-->\n";
print $afterComment;

p2: while (<OTHERBASE>){
  if (/(.*)%QUERY(.*)/){
    $_ =~ s/%QUERY/$query/g;
  }
  if (/(.*)%ESCQUERY(.*)/){
    $_ =~ s/%ESCQUERY/$escquery/g;
  }
  if (/(.*)%NUMBER(.*)/){
    $_ =~ s/%NUMBER/$num/g;
  }
  print;
}

close(DB);
