#!/usr/bin/perl

# snort_alert_parse.pl, distributed as part of Snortsnarf v111500.1
# Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
# copyright (c) 2000 by Silicon Defense (http://www.silicondefense.com/)
# Released under GNU General Public License, see the COPYING file included
# with the distribution or http://www.silicondefense.com/snortsnarf/ for
# details.

# snort_alert_parse.pl is a file that contains helpful functions for
#   parsing snort alert files and decomposing alerts.

# Portions of this file are based on code in snortsnarf.pl by Stuart
#   Staniford that was based on code wrttien by Joe McAlerney.

# Please send complaints, kudos, and especially improvements and bugfixes to
# hoagland@SiliconDefense.com.  As described in GNU General Public License, no
# warranty is expressed for this program.

# Used by: snortsnarf.pl, sel_to_add.pl, extr_alerts.pl
# Depends on: nada

$::sap_version = "v111500.1";

my %monthnum=('Jan' => 1,
			 'Feb' => 2,
			 'Mar' => 3,
			 'Apr' => 4,
			 'May' => 5,
			 'Jun' => 6,
			 'Jul' => 7,
			 'Aug' => 8,
			 'Sep' => 9,
			 'Oct' => 10,
			 'Nov' => 11,
			 'Dec' => 12);

# next_alert takes a file handle and returns the next snort alert found by reading that file handle
# along with the format of the alert ('fullalert', 'fastalert', 'syslog', or
# 'portscan') or returns undef if EOF was encountered (i.e., here are no more
# alerts)
# If possible, this should really auto-detect the format of the alert without
# hints
sub next_alert {
	my($fh)= @_;
	
	while (<$fh>) {
		next if /^\s*$/;	  # ignore blank lines
		if (/^\s*\[\*\*\]/) { # full alert format
			if(/spp_portscan/ || /spp_anomsensor: Threshold adjusted/) {
				<$fh>;						 # line that follows
				next;						 # ok, lets try this again...
			}
			my $alert= $_;
			while (<$fh>) {
				last if /^\s*$/;
				$alert.= $_;
			}
			return ($alert,'fullalert');
		} 
		if(/spp_portscan/ || /spp_anomsensor: Threshold adjusted/) {
			next; # only full alert format has second line for non-packet alerts
		}
		if (/^\s*\d+\/\d+\-[\d\:\.]+\s+\[\*\*\]/) { # fast alert format
			return ($_,'fastalert');
		}
		#Nov 14 11:39:37 host6 last message repeated 2 times
		next if m/^\w+\s+\d+\s+[\d:]+\s+\S+\s+last\s+message\s+repeated/i;
		if (/^\w+\s+\d+\s+[\d:]+\s+\S+\s+([^\[]+)\[\d+\]/) { # syslog format
			my $prog= $1;
			next unless $prog =~ /snort/;
			return ($_,'syslog');
		}
  		if (/^\w+\s+\d+\s+\d\d\:\d\d:\d\d\s+\d+\.\d+\.\d+\.\d+\:\d+\s+->/) { # portscan log format
			return ($_,'portscan');
  		}
  		chomp;
		warn "unknown alert format for line: $_; skipping\n";
	}
	return undef unless defined($_);
}

# retrieve the alerts from the given files and number (position)
# ["file:pos","file:pos"]
# the last argument is reference to a hash encoding information about the
# files mentioned; the filename used in the list is the key and the value is
# a list ref of [the file format, the full file path]
sub get_alerts_parsed {
	my $fileinfo= pop(@_);
#warn "to get: ",join(',',@_),"\n";
	my %to_get= ();
	foreach (@_) {
		s/:(\d+)$//;
		push(@{$to_get{$_}},$1); # add pos to list for file
	}
	my(@alerts)= ();
	my $file;
	foreach $file (keys %to_get) {
		my ($fileformat,$filepath)= @{$fileinfo->{$file}};
		my (@poss)= sort {$a <=> $b} @{$to_get{$file}};
		$fh='fh00';
		open($fh,"<$filepath") || die "could not open $filepath for reading alerts from";
		my $count= 0;
		while (1) {
			my ($alert,$format)= &next_alert($fh,$fileformat);
			unless (defined($alert)) {
				warn "could not find $poss[0] alerts in $filepath, only $count";
				last;
			}
			$count++;
			if ($count == $poss[0]) {
				shift(@poss);
				push(@alerts,&parse_alert($alert,$format));
				last unless (@poss);
			}
		}
		close $fh;
	}
	return @alerts;
}

# parses given alert text assuming that it is in a given format ('fullalert',
# 'fastalert', 'syslog', or 'portscan').  A hash reference is returned with
# 'field name' => 'contents' for various fields of interest.  Note that not
# all fields are extracted and recorded, just those we have needed.
sub parse_alert {
	my($alerttext,$format)= @_;
	$alerttext =~ s/\s+$//;
	my %alert= ('text' => $alerttext,'format' => $format);
	$_= $alerttext;
	
	if ($format eq 'fullalert') {
		my(@lines)= split("\n",$alerttext);
		$_= shift(@lines);
		
		# ---- Process the first line -----
		#
		# the first line just holds the attack id
		s/^\[\*\*\]\s*//; s/\s*\[\*\*\]\s*$//;
		$alert{'sig'} = $_;
			# Note: does not handle preprocessor log output

		# ---- Process the second line -----		
		#
		$_= shift(@lines);
				
		$alert{'month'} = substr($_,0,2);
		$alert{'date'} = substr($_,3,2);
		$alert{'time'} = substr($_,6,15);
		my $remainder =	 substr($_,22,(length $_));	 # grab the rest for regex matching

		my $e_option;
		if ($remainder =~ /^[\dA-Fa-f]+\:/)
		{
			# Looks like an ethernet address - assume -e was set in snort command line
			$e_option = 1;
			# We could parse for ethernet stuff here but we don't 
			# feel like it right now.
		}
		else
		{
			# No -e option
			$e_option = 0;
			$remainder =~ s/ \-\> /-/; 
			my ($source,$destination) = split('-',$remainder);
			($alert{'src'},$alert{'sport'}) = split(':',$source);
			($alert{'dest'},$alert{'dport'}) = split(':',$destination);
		}
		
		# ---- Process the third line -----
		#
		$_= shift(@lines);
				
		if($e_option)
		{
			# Ethernet stuff was on the previous line and now the IP source
			# and destination are here at the beginning of the third line.
			($alert{'src'},$alert{'sport'},$alert{'dest'},$alert{'dport'},$remainder) = /^(\d+\.\d+\.\d+\.\d+)\:(\d+)\s+\-\>\s+(\d+\.\d+\.\d+\.\d+)\:(\d+)\s+(.*)$/;
			unless(defined $alert{'src'})
			{
				#ICMP case
				($alert{'src'},$alert{'dest'},$remainder) = /^(\d+\.\d+\.\d+\.\d+)\s+\-\>\s+(\d+\.\d+\.\d+\.\d+)\s+(.*)$/;
			}		   
			$_ = $remainder;			  
		}
		($alert{'proto'}) = /^(\w*)\s+/;
		#my($ttl,$tos,$id,$df); # not stored
		# new format: TCP TTL:60 TOS:0x0 ID:3260 IpLen:20 DgmLen:77 DF
		# old format: TCP TTL:128 TOS:0x0 ID:50079  DF
		#($alert{'proto'},$ttl,$tos,$id,$df) = /^(\w*)\sTTL\:(\d*)\sTOS\:(\w*)\sID\:(\d*)\s?\s?(DF)?$/;

		# ---- Process the fourth line -----
		#
		$_= shift(@lines);				
				
		if ($alert{'proto'} eq "TCP") {
			($alert{'flags'}) = /^([SFRUAP12\*]*)\s+/;
			#my($seq,$ack,$win); # not stored
			# old format: *****PA* Seq: 0x82A8A42   Ack: 0xDA791923   Win: 0x2238
			# new format: ***AP*** Seq: 0xBDEE451F  Ack: 0xAC995B17  Win: 0x3EBC  TcpLen: 20
			#($alert{'flags'},$seq,$ack,$win) = /^([SFRUAP12\*]*)\sSeq\:\s(\w*)\s*Ack\:\s(\w*)\s*Win\:\s(\w*)$/;
		} elsif ($alert{'proto'} eq "UDP") {
			# my($UDPlength); # not stored
			# ($UDPlength) = /^Len\:\s(\d*)$/;
		} elsif ($alert{'proto'} eq "ICMP") {
			my($ICMPid,$seq); # not stored
			if (/^ID/) {
				($ICMPid,$seq,$alert{'ICMP_type'}) = /^ID\:(\d*)\s*Seq\:(\d*)\s*(.*)/;
			} else {
				($alert{'ICMP_type'}) = /^(.*)/;
				# ($ICMPid,$seq) = (undef,undef); 
			}
		}

		# ---- Process the fifth line if there is one -----
		#
		$_= shift(@lines);				
				
		#if(defined($_) && $_ ne '') {
			#my $TCPoptions = "";  # not stored
			#$TCPoptions = substr($line5,16,(length $line5));
		#}
	} elsif ($format eq 'fastalert') {
		s/^\s*(\d+)\/(\d+)\-(\S*)//;
		($alert{'month'},$alert{'date'},$alert{'time'})= ($1,$2,$3);
		s/^\s+\[\*\*\]\s*(.+)\s*\[\*\*\]\s*//;
		$alert{'sig'}= $1;
		    # note: not handled: just a message (i.e. no packet) (e.g., from spp_portscan) -- ignore
		$alert{'sig'} =~ s/\s+$//;
		if (/:/) { 
			($alert{'src'},$alert{'sport'},$alert{'dest'},$alert{'dport'})= /^([\d\.]+):(\d+)\s*->\s*([\d\.]+):(\d+)/;
		} else { # just addresses
			($alert{'src'},$alert{'dest'})= /^([\d\.]+)\s*->\s*([\d\.]+)/;
			$alert{'proto'}= 'ICMP';
		}
	} elsif ($format eq 'syslog') {
		s/(\w+)\s+(\d+)\s+([\d:]+)\s+(\S+)\s+([^\[]+)\[\d+\]:\s*//;
		my($logginghost,$loggingprog); # not stored
		($alert{'month'},$alert{'date'},$alert{'time'},$logginghost,$loggingprog)=($1,$2,$3,$4,$5);
		$alert{'month'}= $monthnum{$alert{'month'}};
		
		my($src,$sport,$dest,$dport,$msg);
		if (s/\s*:\s*([\d\.]+)\:(\d+)\s*->\s*([\d\.]+)\:(\d+)\s*$//) {
			($alert{'src'},$alert{'sport'},$alert{'dest'},$alert{'dport'})= ($1,$2,$3,$4);
		} elsif (s/\s*:\s*([\d\.]+)\s*->\s*([\d\.]+)\s*$//) {
			($alert{'src'},$alert{'dest'})= ($1,$2);
			$alert{'proto'}= 'ICMP';
		} else {
			# must be just a message (not really handled)
		}
		$alert{'sig'}= $_;
	} else { # $format eq 'portscan'
  		s/^(\w+)\s+(\d+)\s+(\d\d\:\d\d:\d\d)\s+//;
		($alert{'month'},$alert{'date'},$alert{'time'})= ($1,$2,$3);
 		$alert{'month'}= $monthnum{$alert{'month'}};
 		s/^(\d+\.\d+\.\d+\.\d+)\:(\d+)\s+\-\>\s+(\d+\.\d+\.\d+\.\d+)\:(\d+)\s+//;
 		($alert{'src'},$alert{'sport'},$alert{'dest'},$alert{'dport'})= ($1,$2,$3,$4);
 		s/^(\w+)\s*//;
 		$alert{'proto'} = $1;
		$alert{'proto'}= 'TCP' unless ($alert{'proto'} eq 'UDP') || ($alert{'proto'} eq 'ICMP');  # was SYN, etc
 		if ($alert{'proto'} eq 'TCP') {
	 		($alert{'flags'}) = /^(\S+)/;
	 	}
		
		$alert{'sig'} = "$alert{'proto'} $alert{'flags'} scan";
	}
	
	# unlike in the old snortsnarf code, we do not record 'file' and we record format, not file type
	
	return \%alert;
}

sub grab_alerts_of_type {
	my($ip,$end,$include,@logs)= @_;
	my @alerts= ();
	my $fh= 'gfh00';
	my ($log,$file_type,$fh,$count,$alert_text,$format);
	foreach $log (@logs) {
		if($log =~ /scan/) { # file type not really needed but provided in @alerts for backwards compatability
			$file_type = 'scan';
		} elsif($log =~ /syslog/) {
			$file_type = 'syslog';
	  	} elsif($log =~ /messages/) {
			$file_type = 'syslog';
		} else {
			$file_type = 'alert';
		}
	   
		$fh++;
		open($fh,"<$log") || die "could not open log \"$log\" for reading";
		$count= 0;
		my $isanom;
		while (1) {
			($alert_text,$format)= &next_alert($fh);
			last unless defined($alert_text);
			$count++;
			if (($end eq 'src' && $alert_text =~ /$ip(|:\d+)\s+->/) || ($end eq 'dest' && $alert_text =~ /->\s+$ip/)) {
				$isanom= &is_anom_rept($alert_text);
				if (!defined($include) || (defined($isanom) && $include =~ /a/) || (!defined($isanom) && $include =~ /g/)) {
					$alert_text =~ s/\s+$//;
					push(@alerts,[$alert_text,$count,$log,$file_type,$format]);
				}
			}
		}
		close $fh;
	}
	return @alerts;
}

sub date_time_from_alert_text {
	my($alert,$format)= @_;
	
	if ($format eq 'fullalert') {
		$alert =~ /^.*\n(.*)/;
		$_= $1;
		return(substr($_,0,2),substr($_,3,2),substr($_,6,15));
	} elsif ($format eq 'fastalert') {
		$alert =~ /^\s*(\d+)\/(\d+)\-(\S*)/;
		return($1,$2,$3);
	} elsif ($format eq 'syslog') {
		$alert =~ /(\w+)\s+(\d+)\s+([\d:]+)/;
		return($monthnum{$1},$2,$3);
	} else { # $format eq 'portscan'
  		$alert =~ /^(\w+)\s+(\d+)\s+(\d\d\:\d\d:\d\d)/;
		return($monthnum{$1},$2,$3);
	}
}

sub alert_time_index {
	my($alert,$format)= @_;
	
	my($mo,$day,$time);
	if (ref($alert)) {
		$mo= $alert->{'month'};
		$day= $alert->{'date'};
		$time= $alert->{'time'};
	} else {
		($mo,$day,$time)= &date_time_from_alert_text($alert,$format);
	}
	
	my @mostartday= (0,0,31,60,91,121,152,182,212,243,273,304,334); # correct if leap year
	my $dayno= $mostartday[$mo]+($day-1);
	my($h,$m,$s)= split(':',$time,3);
	return ((($dayno*24+$h)*60 + $m)*60 +$s);
}

# if this parsed or unparsed alert is an anomaly report, return the anomaly
# score otherwise return undef
sub is_anom_rept {
	my($alert)= shift;
	my $text= ref($alert) ? $alert->{'sig'} : $alert;
	if ($text =~ /spp_anomsensor:\s*Anom[ao]ly threshold exceeded:\s*(\d+\.\d+)/) {
	 	return $1;
	} else {
		return undef;
	}
}

1;

# $Id: snort_alert_parse.pl,v 1.16 2000/11/15 22:40:28 jim Exp $
