use FindBin;
die unless ($FindBin::Script =~ m/^(.*?)\.PL$/);
open(STDOUT, ">$1") || die;
print <DATA>
__END__
#! /usr/local/bin/perl

#  flowdumper - a grep(1)-like utility for raw flow files
#  Copyright (C) 1998-2001  Dave Plonka
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# $Id: flowdumper.PL,v 1.32 2001/06/11 16:18:09 dplonka Exp $
# Dave Plonka <plonka@doit.wisc.edu>

use FindBin;
use Cflow qw(:flowvars :tcpflags :icmptypes :icmpcodes 1.028);
use POSIX; # for strftime
use Getopt::Std;
use IO::File;
use Socket; # for inet_aton, inet_ntoa

if (!getopts('havI:e:E:nsScrRp:') ||
    $opt_h ||
    1 < ($opt_s + $opt_S + $opt_r + $opt_R) ||
    1 < ($opt_n + $opt_p) ||
    1 < ($opt_n + ($opt_r || $opt_R))) {
   print STDERR <<_EOF_
usage: $FindBin::Script [-h] [-v] [-s|S|r|R] [-a|n] [[-I expr] -e expr [-E expr]] [-c] [flow_file [...]]
       -h - shows this usage information (mnemonic: 'h'elp)
       -a - show all flows, implied if "-e" is not specified (mnemonic: 'a'll)
       -v - verbose - show warnings (mnemonic: 'v'erbose)
       -I expr - evaluate expression initially, before flow processing
		 (mnemonic: 'I'nitial expression)
       -e expr - evaluate expression once per flow (mnemonic: 'e'xpression)
       -E expr - evaluate expression after flow processing is complete
	         (mnemonic: 'E'ND expression)
       -c - print number of flows matched in input (mnemonic: 'c'ount) 
       -s - print flows in short (one-line) format, ignored with "-n"
            (mnemonic: 's'hort)
       -S - print flows in the "old" short (one-line) format, ignored with "-n"
            (mnemonic: 'S'hort)
       -r - print flows in the raw/binary flow file format, ignored with "-n"
	    (mnemonic: 'r'aw)
       -R - "repack" print flows in the raw/binary flow file format,
	    requires "-e", ignored with "-n" (mnemonic: 'R'eraw)
       -n - don't print matching flows (mnemonic: like "perl -n" or "sed -n")
       -p prefix_mappings_file - read file containing IPv4 prefix mappings
	                         (mnemonic: 'p'refixes)

       If no flow file arguments are specified, standard input will be used.

   Do "perldoc $FindBin::Script" for full details.
_EOF_
   ;
   exit($opt_h? 0 : 2)
}

if ($opt_S) {
   $opt_s = 1
}

Cflow::verbose($opt_v);

if ($opt_p) {
   eval "use Net::Patricia";
   die("$@") if "$@";
   eval "use Net::Netmask";
   die("$@") if "$@";

   $pt = new Net::Patricia;
   die unless ref($pt);

   my $fh = new IO::File $opt_p, 'r';
   die "open \"$opt_p\": $!\n" unless ref($fh);

   while (<$fh>) {
      if (!m|(\d+\.\d+\.\d+\.\d+/\d+)\s+->\s+(\d+\.\d+\.\d+\.\d+/\d+)|) {
	 warn "$opt_p: can't grok this line: $_";
	 next
      }
      my($prefix, $encprefix) = ($1, $2);
      my $netmask = new Net::Netmask $encprefix;
      die unless ref($netmask);
      my $hosts = new Net::Patricia;
      die unless ref($hosts);
      die unless $pt->add_string($prefix, { encnet => $netmask,
		                            n      => 0,
		                            hosts  => $hosts });
   }
   if ($opt_v) {
      $pt->climb(sub { warn("$opt_p: ", $_[0]->{encnet}->desc, "\n") })
   }
}

if (!$opt_e) {
   if ($opt_n) {
      $wanted = \&wantall
   } else {
      if ($opt_s) {
         $wanted = \&shortprintflow
      } elsif ($opt_r) {
         $wanted = \&printrawflow
      } elsif ($opt_R) {
         $wanted = \&printrerawflow
      } else {
         $wanted = \&printflow
      }
   }
} else {
   if ($opt_n) {
      $wanted = eval "sub () { $opt_e }";
   } else {
      if ($opt_s) {
         $wanted = eval "sub () { ($opt_e) && &shortprintflow }";
      } elsif ($opt_r) {
         $wanted = eval "sub () { ($opt_e) && &printrawflow }";
      } elsif ($opt_R) {
         $wanted = eval "sub () { ($opt_e) && &printrerawflow }";
      } else {
         $wanted = eval "sub () { ($opt_e) && &printflow }";
      }
   }
   die $@ if $@
}

if ($opt_I) {
   eval $opt_I
}

$ratio = Cflow::find($wanted, (-1 != $#ARGV)? @ARGV : '-');

if ($opt_c) {
   my ($count, $total) = split('/', $ratio);
   printf STDERR "matched %d of %d flows\n", $count, $total
}

if ($opt_E) {
   eval $opt_E
}

exit 0;

sub wantall () {
   return 1
}

sub shortprintflow () {
   if (!$opt_S && 1 == $protocol) {
   printf "%s %.15s -> %.15s ICMP_%s %u %u\n",
      $localtime,
      $srcip,
      $dstip,
      $ICMPTypeCode,
      $pkts,
      $bytes
   } elsif (!$opt_S && 6 == $protocol) {
   printf "%s %.15s.%hu -> %.15s.%hu %hu$TCPFlags %u %u\n",
      $localtime,
      $srcip,
      $srcport,
      $dstip,
      $dstport,
      $protocol,
      $pkts,
      $bytes
   } else {
   printf "%s %.15s.%hu -> %.15s.%hu %hu %u %u\n",
      $localtime,
      $srcip,
      $srcport,
      $dstip,
      $dstport,
      $protocol,
      $pkts,
      $bytes
   }
}

sub printflow () {
printf <<_EOF_
FLOW
  index:          0x%x
  router:         %s
  src IP:         %s
  dst IP:         %s
  input ifIndex:  %d
  output ifIndex: %d
  src port:       %d
  dst port:       %d%s
  pkts:           %d
  bytes:          %d
  IP nexthop:     %s
  start time:     %s
  end time:       %s
  protocol:       %d
  tos:            %d
  src AS:         %d
  dst AS:         %d
  src masklen:    %d
  dst masklen:    %d
  TCP flags:      0x%x%s
  engine type:    %d
  engine id:      %d
_EOF_
   ,
   $Cflow::index,
   $exporterip,
   $srcip,
   $dstip,
   $input_if,
   $output_if,
   $srcport,
   $dstport,
   1 == $protocol? " $ICMPTypeCode" : '',
   $pkts,
   $bytes,
   $nexthopip,
   scalar(localtime($startime)),
   scalar(localtime($endtime)),
   $protocol,
   $tos,
   $src_as,
   $dst_as,
   $src_mask,
   $dst_mask,
   $tcp_flags,
   6 == $protocol? " $TCPFlags" : '',
   $engine_type,
   $engine_id
}

sub printrawflow () {
   syswrite(STDOUT, $Cflow::raw, length $Cflow::raw)
}

sub printrerawflow () {
   syswrite(STDOUT, $Cflow::reraw, length $Cflow::raw)
}

sub ENCODE() {
   my $net;

   return(1) unless $opt_p;

   foreach my $ipref (\$Cflow::exporter,
		      \$Cflow::srcaddr,
		      \$Cflow::dstaddr,
		      \$Cflow::nexthop) {
      # do a lookup to see if its a routable destionation:
      if ($net = $pt->match_integer($$ipref)) {

	 die unless 'HASH' eq ref($net);
	 die unless ref($net->{encnet});
	 die unless ref($net->{hosts});
	 die unless defined($net->{n});
  
         # do a lookup in the $net->{hosts} p-trie to determine if we've
         # already determined a mapping for this host IP address:
         my $encip;
         if (!($encip = $net->{hosts}->match_exact_integer($$ipref))) {
            # this is a "new" ip, create the new mapping...
            # inc `n' to get the next available encoded host addr in this net:
            $encip = unpack("N", inet_aton($net->{encnet}->nth($net->{n})));
            $net->{hosts}->add_string(inet_ntoa(pack("N", $$ipref)), $encip)
	       or die;
            $net->{n}++;
         }

         # at this point $encip is the host-ordered long encoded IP address:
         $$ipref = $encip
      } else {  
         warn(inet_ntoa(pack("N", $$ipref)), " was non-routable - skipped.\n");
	 return 0
      }
   }

   return 1
}

=head1 NAME

flowdumper - a grep(1)-like utility for raw flow files

=head1 SYNOPSIS

   flowdumper [-h] [-v] [-s|S|r] [-a|n] [[-I expr] -e expr [-E expr]] [-c] [flow_file [...]]

but usually just:

   flowdumper [-s] -e expr flow_file [...]

=head1 DESCRIPTION

B<flowdumper> is a grep(1)-like utility for selecting and processing
flows from cflowd or flow-tools raw flow files.  The selection criteria
are specified by using the C<-e> option described below.

B<flowdumper>'s primary features are the ability to:

=over 4

=item *

Print the content of raw flow files in one of two built-in formats or a
format of the users own.  The built-in "long" format is much like that
produced by the flowdump command supplied with cflowd.  The "short",
single-line format is suitable for subsequent post-processing by
line-oriented filters like sed(1).

=item *

Act as a filter, reading raw flow input from either file(s) or standard
input, and producing filtered raw flow output on standard output.  This
is similar to how grep(1) is often used on text files.

=item *

Select flows according to practically any criteria that can be
expressed in perl syntax.

=back

The "flow variables" and other symbols available for use in the C<-e>
expression are those made available by the Cflow module when used like
this:

   use Cflow qw(:flowvars :tcpflags :icmptypes :icmpcodes);

See the Cflow perl documentation for full details on these values (i.e.
"perldoc Cflow".)

Most perl syntax is allowed in the expressions specified with the C<-e>,
C<-I>, and C<-E> options.  See the perl man pages for full details on
operators ("man perlop") and functions ("man perlfunc") available for
use in those expressions.

If run with no arguments, filters standard input to standard output.

The options and their arguments, roughly in order of usefulness, are:

=over 4

=item C<-h>

shows the usage information

mnemonic: 'h'elp

=item C<-a>

print all flows

implied if C<-e> is not specified

mnemonic: 'a'll

=item C<-e> expr

evaluate this expression once per flow

mnemonic: 'e'xpression

=item C<-c>

print number of flows matched in input

mnemonic: 'c'ount

=item C<-s>

print flows in short (one-line) format, ignored with C<-n>

mnemonic: 's'hort

=item C<-r>

print flows in the raw/binary flow file format

ignored with C<-n>

mnemonic: 'r'aw

=item C<-R>

"repacks" and print flows in the raw/binary flow file format

requires C<-e>, ignored with C<-n>, useful with C<-p>

mnemonic: 'R'epack raw

=item C<-n>

don't print matching flows

mnemonic: like "perl C<-n>" or "sed C<-n>"

=item C<-S>

print flows in the "old" short (one-line) format

ignored with C<-n>

mnemonic: 'S'hort

=item C<-v>

be verbose with messages

mnemonic: 'v'erbose

=item C<-I> expr

eval expression initially, before flow processing

practically useless without C<-e>

mnemonic: 'I'nitial expression

=item C<-E> expr

eval expression after flow processing is complete

practically useless without C<-e>

mnemonic: 'E'ND expression

=item C<-p> prefix_mappings_file

read file containing IPv4 prefix mappings in this format (one per line):

   10.42.69.0/24 -> 10.69.42.0/24
   ...

When specifying this option, you can, and should at some point, call
the ENCODE subroutine in your expressions to have it encode the IP
address flowvars such as $Cflow::exporter, $Cflow::srcaddr,
$Cflow::dstaddr, and $Cflow::nexthop.

mnemonic: 'p'refixes

=back

=head1 EXAMPLES

Print all flows, in a multi-line format, to a pager:

   $ flowdumper -a flows.* |less

Print all the UDP flows to another file using the raw binary flow format:

   $ flowdumper -re '17 == $protocol' flows.current > udp_flows.current

Print all TCP flows which have the SYN bit set in the TCP flags:

   $ flowdumper -se '6 == $protocol && ($TH_SYN & $tcp_flags)' flows.*

Print the first 10 flows to another file using the raw binary flow format:

   $ flowdumper -I '$n = 10' -re '$n-- or exit' flows.*0 > head.cflow

Print all flows with the start and end time using a two-line format:

   $ flowdumper -se 'print scalar(localtime($startime)), "\n"' flows.*

Print all flows with the specified source address using a short,
single-line format:

   $ flowdumper -se '"10.42.42.42" eq $srcip' flows.*

Do the same thing in a quicker, but less obvious, way:

   $ flowdumper -I'
      use Socket;
      $addr = unpack("N", Socket::inet_aton("10.42.42.42"));
   ' -se '$addr == $srcaddr'  flows.*

(This latter method runs quicker because inet_aton(3) is only called
once, instead of once per flow.)

Print all flows with a source address within the specifed network/subnet:

   $ flowdumper \
   -I'use Socket;
      $mask = unpack("N", Socket::inet_aton("10.42.0.0"));
      $width = 16' \
   -se '$mask == ((0xffffffff << (32-$width) & $srcaddr)' flows.*

Print all flows where either the source or the destination address, but
not both, is within the specified set of networks or subnets:

   $ flowdumper \
   -I 'use Net::Patricia;
       $pt = Net::Patricia->new;
       map { $pt->add_string($_, 1) } qw( 10.42.0.0/16
					  10.69.0.0/16 )' \
   -se '1 == ($pt->match_integer($srcaddr) +
	      $pt->match_integer($dstaddr))' flows.*

Count the total number of "talkers" (unique source host addresses) by
piping them to sort(1) and wc(1) to count them:

   $ flowdumper \
   -I 'use Net::Patricia;
       $pt = Net::Patricia->new;
       map { $pt->add_string($_, 1) } qw( 10.42.0.0/16
					  10.69.0.0/16 )' \
   -ne '$pt->match_integer($srcaddr) and print "$srcip\n"' flows.* \
   |sort -u |wc -l

Count the total number of "talkers" (unique source host addresses) that
are within a the specified networks or subnets:

   $ flowdumper \
   -I 'use Net::Patricia;
       $pt = new Net::Patricia;
       map { $pt->add_string($_, 1) } qw( 10.42.0.0/16
					  10.69.0.0/16 );
       $talkers = new Net::Patricia' \
   -ne '$pt->match_integer($srcaddr) &&
        ($talkers->match_integer($srcaddr) or
         $talkers->add_string($srcip, 1))' \
   -E 'printf("%d\n", $talkers->climb( sub { 1 } ))' flows.*

(For large numbers of flows, this latter method is quicker because it
populates a Net::Patricia trie with the unique addresses and counts the
resulting nodes rather than having to print them to standard output and
then having to sort them to determine how many are unique.)

Select the TCP flows and "ENCODE" the IP addresses according to the
prefix encodings specified in "prefix_encodings.txt":

   $ flowdumper -p prefix_encodings.txt -se '6 == $protocol && ENCODE'

Produce a new raw flow file with the IP addresses ENCODEd according to
the prefix encodings specified in "prefix_encodings.txt":

   $ flowdumper -p prefix_encodings.txt -Re 'ENCODE' flows > flows.enc

=head1 NOTES

This utility was inspired by Daniel McRobb's B<flowdump> utility which
is supplied with cflowd.  B<flowdumper> was originally written as
merely a sample of what can be done with the Cflow perl module, but has
since been developed into a more complete tool.

=head1 BUGS

There are perhaps some pathological combinations of options that
currently do not produce usage error messages, but should.

Since the expression syntax is that of perl itself, there are lots of
useless expressions that will happily be accepted without complaint.
This is particular troublesome when trying to track down typos, for
instance, with the flow variable names.

This script probably has the same bugs as the Cflow module, since it's
based upon it.

=head1 AUTHOR

Dave Plonka <plonka@doit.wisc.edu>

Copyright (C) 1998-2001  Dave Plonka.  This program is free
software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License,
or (at your option) any later version.

=head1 SEE ALSO

perl(1), Socket, Net::Netmask, Net::Patricia, Cflow.

=cut
