#!/usr/local/bin/php
<?php

/*
 * Gaby Vanhegan <gaby@vanhegan.net> 2004-12-03
 *
 * Munge logs from spamassassin and put them into an rrd file
 * for handling by rrdtool.
 */

// Sample log line:
// Feb  8 00:44:14 trim spamd[15044]: clean message (2.2/6.5) for gaby:32767 in 6.1 seconds, 2114 bytes. 
// Feb  8 00:45:16 trim spamd[7020]: identified spam (17.0/6.5) for druid:32767 in 6.6 seconds, 3475 bytes. 

function log_msg ( $input ) {
	$msg	= trim( $input );
	$lines	= explode( "\n", $msg );
	foreach ( $lines as $line ) {
		$when	= date( "Y-m-d H:i:s" );
		print "[$when] $line\n";
		}
	}

// Where are we?  Where do RRD's go?  Where do the logs come from?
$BASE		= "/home/gaby/prel/rrd";
$BASE_RRD	= "/var/symon/rrds/localhost";
$LOG		= "/var/log/spamassassin.0";
$LOG_TMP	= tempnam( "/tmp", "rrd-spamassassin" );

// No log?  Nothing to do.
if ( !( file_exists( $LOG ) ) ) { exit; }

// Move the log file out of active duty
copy( $LOG, $LOG_TMP );
$fh		= fopen( $LOG_TMP, "r" );

// Information about our lists
$lists		= array();

$count		= 0;

// Now going through our log data...
while( $line = fgets( $fh ) ) {

	$hit	= false;

	// Single up the whitespace in the line
	$hit	= preg_match( "/^(.+) (\d+:\d+:\d+) trim spamd\[.+\]: (.+) \((\d+\.\d+)\/\d+\.\d+\) for .+ in (\d+\.\d+) seconds, (\d+) bytes.+$/", $line, $matches );

	// If it matches a line
	if ( $hit ) { 
	
		$count++;

		//if ( !( $count % 50 ) ) { log_msg( "$count hits" ); }

		// Normalise the time to the nearest half hour (1800 seconds)
		$the_date	= $matches[1] . " " . date( "Y" );
		$the_hour	= $matches[2];

		// Ensure we're not working in the future here
		$the_time	= strtotime( $the_date . " " . $the_hour );
		if ( $the_time > time() ) { $the_time = time(); }

		// Lock down to the nearest 1800 seconds, or half hour.
		$the_time	= ( $the_time - ( $the_time % 1800 ) );

		$spam		= 0;
		$ham		= 0;

		// Get the score, time and bytes
		$time		= ( $matches[5] + 0 );
		$bytes		= ( $matches[6] + 0 );
		$score		= ( $matches[4] + 0 );

		$ham_score	= 0;
		$ham_time	= 0;
		$ham_bytes	= 0;

		$spam_score	= 0;
		$spam_time	= 0;
		$spam_bytes	= 0;

		// What was it?
		if ( eregi( ".*spam.*", $matches[3] ) ) {
			$spam		= 1;
			$spam_score	= $score;
			$spam_time	= $time;
			$spam_bytes	= $bytes;
			}
		else {
			$ham		= 1;
			$ham_score	= $score;
			$ham_time	= $time;
			$ham_bytes	= $bytes;
			}
		

		// Also do this for the global all list
		$lists[ $the_time ][ "ham" ]		+= $ham;
		$lists[ $the_time ][ "spam" ]		+= $spam;
		$lists[ $the_time ][ "ham_score" ]	+= $ham_score;
		$lists[ $the_time ][ "spam_score" ]	+= $spam_score;
		$lists[ $the_time ][ "ham_time" ]	+= $ham_time;
		$lists[ $the_time ][ "spam_time" ]	+= $spam_time;
		$lists[ $the_time ][ "ham_bytes" ]	+= $ham_bytes;
		$lists[ $the_time ][ "spam_bytes" ]	+= $spam_bytes;
		}

	}

// Close and remove the temp file please
fclose( $fh );
unlink( $LOG_TMP );

// Sort out data in ascending date please
ksort( $lists );

$rrd_file		= "spamassassin.rrd";
$command_base	= "/usr/local/bin/rrdtool update $BASE_RRD/$rrd_file";
$command		= $command_base;
$count			= 1;

foreach ( $lists as $the_time => $values ) {

	$count++;

	$ham		= $values[ "ham" ];
	$spam		= $values[ "spam" ];
	$s_score	= $values[ "spam_score" ];
	$h_score	= $values[ "ham_score" ];
	$s_time		= $values[ "spam_time" ];
	$h_time		= $values[ "ham_time" ];
	$s_bytes	= $values[ "spam_bytes" ];
	$h_bytes	= $values[ "ham_bytes" ];

	if ( ( $count % 30 ) < 1 ) {
		//log_msg( $command );
		print trim( shell_exec( $command ) );
		$command	= $command_base;
		}

	//log_msg( "Added 30 rows..." );

	$command	.= " $the_time:$ham:$spam:$s_time:$h_time:$s_score:$h_score:$s_bytes:$h_bytes";
	}

//log_msg( $command );
print trim( shell_exec( $command ) );

?>
