#!/bin/sh
#
# Copyright (c) 2010-2012, The OpenDKIM Project.  All rights reserved.
#
# $Id: opendkim-genstats,v 1.26 2010/10/27 06:18:45 cm-msk Exp $
#
# Script to generate some HTML-ized statistics for OpenDKIM

###
### Setup
###

# Command to perform a MySQL query and output HTML
MYSQL_CMD="mysql --html"

# Database name
DB=${OPENDKIM_DB:-opendkim}

# Database user
USER=${OPENDKIM_USER:-opendkim}

# Database user's password
PASSWORD=${OPENDKIM_PASSWORD:-password}

# Where to write the report
REALOUTPUT=${OPENDKIM_OUTPUT:-/var/www/docs/opendkim/report.html}
OUTPUT=${REALOUTPUT}.$$

###
### NO user-serviceable parts beyond this point
###

MYSQL="$MYSQL_CMD --user=$USER --password=$PASSWORD --database=$DB"

# output a header
cat > $OUTPUT << EOF
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
  <head>
   <meta http-equiv="content-type" content="text/html;charset=us-ascii">
   <title>
    OpenDKIM Statistics Report
   </title>
  </head>

  <body>
EOF

echo "<h1>OpenDKIM Statistics Report generated at `date` </h1>" >> $OUTPUT
echo "<hr>" >> $OUTPUT
echo "<ul>" >> $OUTPUT
echo '<li> <a href="#total_records">Total messages received</a>' >> $OUTPUT
echo '<li> <a href="#reporters">Reporting hosts and record counts</a>' >> $OUTPUT
echo '<li> <a href="#signature_counts">Overall message signature counts</a>' >> $OUTPUT
echo '<li> <a href="#dnssec">DNSSEC results</a>' >> $OUTPUT
echo '<li> <a href="#dnssec_trend">DNSSEC trend</a>' >> $OUTPUT
echo '<li> <a href="#passfail">Overall pass/fail rates for signed mail</a>' >> $OUTPUT
echo '<li> <a href="#error_codes">Top ten error codes</a>' >> $OUTPUT
echo '<li> <a href="#third_party">Author vs. third-party signatures, non-MLM traffic</a>' >> $OUTPUT
echo '<li> <a href="#unique_domains">Count of unique signed From: domains in sample</a>' >> $OUTPUT
echo '<li> <a href="#top_signing_domains">Top 10 signing domains by signature count</a>' >> $OUTPUT
echo '<li> <a href="#top_passing_domains">Top 10 signing domains by passing signature percentage</a>' >> $OUTPUT
echo '<li> <a href="#top_failing_domains">Top 10 signing domains by failed signature percentage</a>' >> $OUTPUT
echo '<li> <a href="#ip_correlation">Correlation of IP addresses to signed From: domains</a>' >> $OUTPUT
echo '<li> <a href="#use_counts">Signing domain use counts</a>' >> $OUTPUT
echo '<li> <a href="#signing_trend_msg">Trend in message signing rates</a>' >> $OUTPUT
echo '<li> <a href="#signing_trend_domain">Trend in domain signing rates</a>' >> $OUTPUT
echo '<li> <a href="#atps_use">ATPS use</a>' >> $OUTPUT
echo "</ul>" >> $OUTPUT
echo "<hr>" >> $OUTPUT

echo '<a name="total_records"></a>' >> $OUTPUT
echo "<p> Total records (messages) received: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(*) as count, SUM(SIGN(sigcount)) AS signed FROM messages JOIN reporters ON messages.reporter = reporters.id WHERE enabled = 1" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="reporters"></a>' >> $OUTPUT
echo "<p> Reporting hosts and record counts: </p>" >> $OUTPUT
$MYSQL --execute="SELECT reporters.name AS reporter, COUNT(*) AS messages, reporters.firstseen AS since, MAX(msgtime) AS last FROM messages JOIN reporters ON messages.reporter = reporters.id GROUP BY messages.reporter ORDER BY COUNT(*) DESC" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="signature_counts"></a>' >> $OUTPUT
echo "<p> Overall message signature counts: </p>" >> $OUTPUT
$MYSQL --execute="SELECT sigcount, COUNT(*) AS messages FROM messages JOIN reporters ON messages.reporter = reporters.id WHERE enabled = 1 GROUP BY sigcount" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="dnssec"></a>' >> $OUTPUT
echo "<p> DNSSEC results (see libopendkim/dkim.h): </p>" >> $OUTPUT
$MYSQL --execute="SELECT dnssec AS 'DNSSEC code', COUNT(*) AS count FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON messages.reporter = reporters.id WHERE enabled = 1 GROUP BY dnssec ORDER BY dnssec" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="dnssec_trend"></a>' >> $OUTPUT
echo "<p> DNSSEC trend: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(DISTINCT domain) AS domains, YEAR(messages.msgtime) AS year, MONTHNAME(messages.msgtime) AS month FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND (dnssec = 0 OR dnssec = 2) GROUP BY YEAR(messages.msgtime), MONTH(messages.msgtime)" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="passfail"></a>' >> $OUTPUT
echo "<p> Overall pass/fail rate for signed mail: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(*) AS signatures, SUM(pass) AS passed, SUM(fail_body) AS 'failed(body)' FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="error_codes"></a>' >> $OUTPUT
echo "<p> Top ten error codes (see libopendkim/dkim.h): </p>" >> $OUTPUT
$MYSQL --execute="SELECT sigerrorcodes.name AS error, COUNT(*) AS instances FROM signatures JOIN sigerrorcodes ON sigerrorcodes.id = signatures.sigerror JOIN messages ON messages.id = signatures.message JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND NOT sigerror = 0 AND NOT sigerror = 28 GROUP BY sigerror ORDER BY instances DESC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="third_party"></a>' >> $OUTPUT
echo "<p> Author vs. third-party signatures, non-MLM traffic: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(*) AS 'author sigs', SUM(pass) AS 'passed' FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON messages.reporter = reporters.id WHERE enabled = 1 AND from_domain = domain" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(*) AS 'third-party sigs', SUM(pass) AS 'passed' FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON reporters.id = messages.reporter WHERE reporters.enabled = 1 AND NOT messages.from_domain = signatures.domain" >> $OUTPUT

echo '<a name="unique_domains"></a>' >> $OUTPUT
echo "<p> Count of unique signed From: domains in sample: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(DISTINCT from_domain) AS domains FROM messages JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND anonymized = 0" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="top_signing_domains"></a>' >> $OUTPUT
echo "<p> Top 10 signing domains by signature count: </p>" >> $OUTPUT
$MYSQL --execute="SELECT domains.name, SUM(sigcount) AS signatures FROM messages JOIN domains ON domains.id = messages.from_domain JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND anonymized = 0 GROUP BY from_domain ORDER BY SUM(sigcount) DESC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="top_passing_domains"></a>' >> $OUTPUT
echo "<p> Top 10 signing domains by passing signature percentage: </p>" >> $OUTPUT
$MYSQL --execute="SELECT domains.name, COUNT(*) AS signatures, 100*SUM(pass)/COUNT(*) AS pct_passed FROM signatures JOIN domains ON domains.id = signatures.domain JOIN messages ON signatures.message = messages.id JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND anonymized = 0 GROUP BY signatures.domain ORDER BY pct_passed DESC, signatures DESC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="top_failing_domains"></a>' >> $OUTPUT
echo "<p> Top 10 signing domains by failed signature percentage: </p>" >> $OUTPUT
$MYSQL --execute="SELECT domains.name, COUNT(*) AS signatures, 100-100*SUM(pass)/COUNT(*) AS pct_failed FROM signatures JOIN domains ON domains.id = signatures.domain JOIN messages ON signatures.message = messages.id JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND anonymized = 0 AND ignored = 0 GROUP BY signatures.domain ORDER BY pct_failed DESC, signatures DESC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="ip_correlation"></a>' >> $OUTPUT
echo "<p> Correlation of IP addresses to signed From: domains: </p>" >> $OUTPUT
$MYSQL --execute="SELECT ipaddrs.addr, COUNT(DISTINCT from_domain) AS domains FROM messages JOIN ipaddrs ON messages.ip = ipaddrs.id JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND sigcount > 0 AND anonymized = 0 GROUP BY messages.ip ORDER BY domains DESC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="use_counts"></a>' >> $OUTPUT
echo "<p> Signing domain use counts: </p>" >> $OUTPUT
$MYSQL --execute="SELECT msgcount, COUNT(*) AS domains FROM (SELECT domain, COUNT(*) AS msgcount FROM signatures JOIN messages ON messages.id = signatures.message JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 GROUP BY 1) a GROUP BY 1 ORDER BY 1 ASC LIMIT 10" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="signing_trend_msg"></a>' >> $OUTPUT
echo "<p> Trend in message signing rates: </p>" >> $OUTPUT
$MYSQL --execute="SELECT YEAR(msgtime) AS year, MONTHNAME(msgtime) AS month, (100. * SUM(SIGN(sigcount)) / COUNT(*)) AS '% signed' FROM messages JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 GROUP BY year,MONTH(msgtime) ORDER BY year,MONTH(msgtime)" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="signing_trend_domain"></a>' >> $OUTPUT
echo "<p> Trend in domain signing rates: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(DISTINCT domain) AS domains, YEAR(msgtime) AS year, MONTHNAME(msgtime) AS month FROM signatures JOIN messages ON signatures.message = messages.id JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 GROUP BY year, MONTH(msgtime) ORDER BY year, MONTH(msgtime)" >> $OUTPUT
echo "" >> $OUTPUT

echo '<a name="atps_use"></a>' >> $OUTPUT
echo "<p> ATPS use: </p>" >> $OUTPUT
$MYSQL --execute="SELECT COUNT(DISTINCT from_domain) AS domains FROM messages JOIN reporters ON reporters.id = messages.reporter WHERE enabled = 1 AND NOT atps = -1" >> $OUTPUT
echo "" >> $OUTPUT

# output a footer
echo "<p> Report completed `date` </p>" >> $OUTPUT

echo "<hr>" >> $OUTPUT
echo '<font size="-1"> <i> $Id: opendkim-genstats,v 1.26 2010/10/27 06:18:45 cm-msk Exp $ </i> </font>' >> $OUTPUT
cat >> $OUTPUT << EOF
  </body>
</html>
EOF

# all done!
mv $OUTPUT $REALOUTPUT
exit 0
