#!/usr/bin/perl -w
#----------------------------------------------------------------------------
# This script grabs the web pages associating with arachNIDS IDS numbers, and 
# extracts the reference information associated with them.  Current references
# include CVE (and CAN) and Bugtraq identfiers. It then grabs the latest 
# vision.rules file from www.whitehats.com, and appends the reference 
# information to the rules in that file.  The output is sent to stdout.
#
# Usage: 
# 
# fetch_arachNIDS.pl -l <file> [-s <start number>] [-I] [-m <max number file>]
#
#    Options: -l <file>            : Write output to the file <file>
#             -s <start number>    : IDS number to start retrieving rules for
#                                    in ascending order.
#             -I                   : Insert IDMEF information in the rules.
#             -m <max number file> : The file that contains the highest
#                                    IDS number that was received from the
#                                    last execution of this script.  If
#                                    the file does not exist, it will be
#                                    created.  Regardless, the highest IDS
#                                    number received during this execution
#                                    will be placed into the file before
#                                    exit.
#
#
# The -m option may require a little more explaination.  It is basically
# used to store the IDS number of the last rule received (n), so that the
# next time the script is run, rule number (n+1) will be queried.  This way,
# we can avoid downloading, parsing, and constructing rules that we already
# have.
#
# Note: This script downloads and searches web pages for patterns.
#       Consequently, this is a time consuming process.
#
# Joe McAlerney, Silicon Defense, joey@silicondefense.com
#-----------------------------------------------------------------------------

#$count = $ARGV[0];
#$idmef = $ARGV[1];

$data = "";
%ref_hash = ();

$vision_rules_url = "http://www.whitehats.com/ids/vision.rules";
$url_head = "http://www.whitehats.com/IDS/";
$idmef_default = " idmef: default;";
$idmef_web = " idmef: web;";

$| = 1;

$debug = 0;
$idmef = 0;
$log_to = "";

unless(defined $ARGV[0]) { 
  usage(); 
  exit;
}

while($ARGV[0])
{
    $arg = shift @ARGV;
    if($arg eq '-s') {       # Grab the start number
	$count = shift @ARGV;
    }
    elsif($arg eq '-I') {    # Are we building rules for the log_idmef plugin?
        $idmef = 1;
    }
    elsif($arg eq '-l') {        # File to write the new rules to
        $log_to = shift @ARGV;
    }
    elsif($arg eq '-m') {
        $max_IDS_num_file = shift @ARGV; # The -s value will take precedence
    }
    else {
	print "Invalid command line option \'$arg\'.\n\n";
        usage();
        exit;
    }

}

# Do some initial setup with the arguments

open(OUT,"> $log_to") || die "ERROR: Can not open output file $log_to\n";

# Open the max IDS number file and try to extract the last number.  If it 
# exists, and -c wasn't one of the arguments to this script, assign the last
# max IDS number to $count.
if(defined $max_IDS_num_file) {
   open(MAX,"+>> $max_IDS_num_file") || die "ERROR: Can not open max IDS number file $max_IDS_num_file\n";
   seek(MAX,0,0);
   $last_max = <MAX>;
   if(defined $last_max) { 
       unless(defined $count) {
           $count = $last_max;
       }
   }
   close(MAX);
}

# If -c wasn't used, or there was no stored max IDS number
unless(defined $count) {
  $count = 1;
}
# Grab the latest vision.rules file from whitehats.com and store it in a file
# apropriately named vision.rules

if($debug) { print "\n\nGetting vision rules...\n"; }
$vision_rules = `lynx -source $vision_rules_url`;
open(VISION_RULES, "+> ./vision.rules") || die "Can't open vision.rules\n";
print VISION_RULES $vision_rules;
seek(VISION_RULES, 0, 0);

# Now find the largest IDS number
$max = 0;
while(<VISION_RULES>) {
   $_ =~ /IDS(\d+)/;
   if(defined $1) {
      if($1 > $max) { $max = $1; }
   }
}
seek(VISION_RULES, 0, 0);            # reset back to the first line.

if($max == $count){ 
   die "There are no new records in the arachNIDS database.\n";
   clean_exit();
}

# Fill our hash with reference information
LOOP: while ($count <= $max) {  

  $url = "$url_head$count";
  $text_data = `lynx -dump $url`;

  # Not all numbers are IDS numbers
  if($text_data =~ /Problem:\sCan't\sopen\srecord/) { 
     if($debug) { print "IDS number url returned can't open record\n"; }
     $count++;
     next LOOP;                                      # Ok, try the next one
  }

  $data = " reference: arachNIDS,IDS$count;";

  if($text_data =~ /CVE\s.+(C\w{2}\-.+)\n/)          # Look for CVE information
  {
    $data .= " reference: cve,$1;";
  }
   
  if($text_data =~ /BUGTRAQ\sID.*BugtraqID\s*(\d+)\n/) # Bugtraq information
  {  
    $data .= " reference: bugtraq,$1;"; 
  }
 
  $ref_hash{$count} = $data;

  $count++;
}

$max_rule_num = 0;

while(<VISION_RULES>) {
    $rule = $_;
    ($rule_num) = $rule =~ /IDS(\d+)/;
    if(defined $rule_num) {           # weeding out things that are not rules
      if(exists $ref_hash{$rule_num}) {  # This should always be the case
           if($idmef) {       # Append idmef keyword and value if appropriate
	       if($rule =~ /WEB|ColdFusion|FrontPage|IIS/i) { # web based rules
                  $rule =~ s/\)/$ref_hash{$rule_num}$idmef_web\)/; 
               }
               else {
                  $rule =~ s/\)/$ref_hash{$rule_num}$idmef_default\)/;
	       }
           }
           else { $rule =~ s/\)/$ref_hash{$rule_num}\)/; }
           if($rule_num > $max_rule_num) { $max_rule_num = $rule_num; }
	   print OUT $rule;                 # output the new rule
       }
       else { 
          if($debug) { print "IDS$rule_num does not exist in ref_hash for RULE:\n\t$rule\n"; }
       }
    }
    else { 
       if($debug) { print "rule_num not defined for RULE:\n\t$rule\n"; } 
    }
    if($debug) { print "------------------------------------------------\n"; }
}

clean_exit();




################### Subroutines #################
#################################################

sub clean_exit {
  close(VISION_RULES);
  close(OUT);
  if(defined $max_IDS_num_file) { 
     open(MAX,"> $max_IDS_num_file") || die "ERROR: Can not open max IDS number file $max_IDS_num_file\n";
     if($max_rule_num > 0) {
        print MAX $max_rule_num;
     }
     close(MAX)
  }
}

sub usage {
   print "USAGE: \n\n" .
   "# fetch_arachNIDS.pl -l <file> [-s <start number>] [-I] [-m <max number file>]\n" .
   "\n Options:\n" .
  "            -l <file>            : Write output to the file <file>\n" .
  "            -s <start number>    : IDS number to start retrieving rules\n" .
  "                                   for in ascending order.\n" .
  "            -I                   : Insert IDMEF information in the rules.\n" .
 "            -m <max number file> : The file that contains the highest\n" .
"                                   IDS number that was received from the\n" .
"                                   last execution of this script.  If\n" .
"                                   the file does not exist, it will be\n" .
"                                   created.  Regardless, the highest IDS\n" .
"                                   number received during this execution\n" .
"                                   will be placed into the file before\n" .
    "                                   exit.\n";
}                              

