#!/usr/bin/env perl

#
# Use example: find -name '*.so' | finterpose
#

my %common_symbols = (
		      '__bss_start' => 1,
		      '__invoke_dynamic_linker__' => 1,
		      '_DYNAMIC' => 1,
		      '_edata' => 1,
		      '_end' => 1,
		      '_fini' => 1,
		      '_init' => 1,
		      '__bss_start@Base' => 1,
		      '__invoke_dynamic_linker__@Base' => 1,
		      '_DYNAMIC@Base' => 1,
		      '_edata@Base' => 1,
		      '_end@Base' => 1,
		      '_fini@Base' => 1,
		      '_init@Base' => 1,
		      );

my @exception_regexps;

import Strict;
use File::Basename;

# misc. argument options / defaults
my $opt_debug = 0;

sub add_sym($$$)
{
    my ($symbol_hash, $symbol, $file) = @_;
    if (!defined $symbol_hash->{$symbol}) {
	my @libs;
	$symbol_hash->{$symbol} = \@libs;
    }
    # versioning can create duplicates
    for my $fname (@{$symbol_hash->{$symbol}}) {
	return if ($fname eq $file);
    }
    push @{$symbol_hash->{$symbol}}, $file;
}

sub read_symbols($$)
{
    my $file = shift;
    my $symbol_hash = shift;
    my $pipe;

#    print "Read '$file'\n";

    $dumpsw = '-T';

    open ($pipe, "objdump $dumpsw $file |") || die "Can't objdump $dumpsw $file: $!";
    while (<$pipe>) {
	/([0-9a-f]*)\s+([gw ])\s+..\s+(\S*)\s*([0-9a-f]+)(.*)$/; # || next;

	my ($address, $linkage, $type, $size, $symbol_n_ver) = ($1, $2, $3, $4, $5);
	my ($opt_ver, $symbol, $version) = ( '', '', '' );

	next if (!$symbol_n_ver || !$type);

	if ($symbol_n_ver =~ m/\s([\s\(])(\S+)[\)\s]+(\S+)/) {
	    $opt_ver = $1; $version = $2; $symbol = $3;
	} else {
	    $symbol = $symbol_n_ver;
	}
	$symbol =~ s/^\s*//;
	$symbol =~ s/\s*$//;

	next if (!$symbol || !$type);

#	print "Symbol '$symbol' version '$version' type '$type' '$linkage' addr $address, size $size\n";

# Filter out things we're not interested in

# remove undefined / external references
	next if ($type eq '*UND*');
# remove weak symbols
	next if ($linkage =~ m/w/);
# remove section names
	next if ($symbol =~ m/^\./);
# remove version symbols
	next if ($symbol eq $version);

# FIXME - Ignores versioning - too nasty for now ...
	add_sym ($symbol_hash, $symbol, $file);
    }
    close ($pipe);
}

sub add_deps ($$$$)
{
    my $lib_dups = shift;
    my $lib = shift;
    my $libs = shift;
    my $symbol = shift;

#    print "Add deps for $lib: @{$libs}\n";
    for my $other_lib (@{$libs}) {
	($other_lib eq $lib) && next;
#	print "Add deps from $lib: to $other_lib for $symbol\n";
	if (!defined $lib_dups->{$lib}->{$other_lib}) {
	    my @list;
	    $lib_dups->{$lib}->{$other_lib} = \@list;
#	    my $cnt = keys %{$lib_dups->{$lib}};
#	    print "Add other lib $cnt\n";
	}
	push @{$lib_dups->{$lib}->{$other_lib}}, $symbol;
    }
}

sub ignore_symbol($)
{
    my $symbol = shift;
    return 1 if (defined $common_symbols{$symbol});
    for my $regex (@exception_regexps) {
#	print "Match '$symbol' vs '$regex'\n";
	if ($symbol =~ m/^$regex$/) {
	    return 1;
	}
    }
    return 0;
}

# Linux / gcc only - so far.
sub disp_sym($)
{
    my $raw = shift;
    return $raw if (! $raw =~ /^_Z/);

    my $filtered = `c++filt $raw`;
    $filtered =~ s/[\r\n]*$//;
    return $filtered;
}

my %symbol_table;
my %lib_dups;

my $option_cross_lib = 0;

my @exception_files;

for my $arg (@ARGV) {
    if ($arg eq '--help' || $arg eq '-h') {
	print "finterpose [options] [list-of-exception-files]\n";
	print " finds genuine interposing uses across many libraries read from stdin\n";
	print " options:\n";
	print "  --per-symbol: dump on symbol basis [default]\n";
	print "  --cross-lib: dump on a per-library basis\n";
    } elsif ($arg eq '--cross-lib') {
	$option_cross_lib = 1;
    } elsif ($arg eq '--per-symbol') {
	$option_cross_lib = 0;
    } else {
	push @exception_files, $arg;
    }
}

print "Reading exceptions ";
for my $fname (@exception_files) {
    my $fh;
    open ($fh, $fname) || die "Can't open $fname: $!";
    print ".";
    while (<$fh>) {
	s/[\r\n]*$//;
	m/^\#/ && next;
	m/^$/ && next;
#	print "Exception '$_'\n";
	push @exception_regexps, $_;
    }
    close ($fh);
}
print "\n\n";

print "Reading symbols:\n";
while (<STDIN>) {
    my $fname = $_;
    $fname =~ s/[\r\n\t]*$//;
    next if (! -f $fname);
    if (-l $fname) {
	my $link = readlink ($fname);
	if ($link =~ /^\//) {
	    $fname = $link;
	} else {
	    $fname = dirname ($fname) . "/" . $link;
	}
    }
    next if (defined $lib_dups{$fname});

    print "$fname ";
    my %lib_stats;
    $lib_dups{$fname} = \%lib_stats;
    read_symbols ($fname, \%symbol_table);
}
print "\n\n";

print "Removing singletons & filtering exceptions...\n";
for my $symbol (keys %symbol_table) {
    my $libs = $symbol_table{$symbol};
    if (@{$libs} == 1 || ignore_symbol ($symbol)) {
	delete $symbol_table{$symbol};
	next;
    }
}
print "\n";

if (!$option_cross_lib) { # symbol-set dump

    # detect substantially identical libraries ?
    my %dup_syms;

    # normalise libs to a string
    for my $sym (keys %symbol_table) {
	my $lib_string = join (' ', @{$symbol_table{$sym}});
	if (!defined $dup_syms{$lib_string}) {
	    my @sym_list;
	    $dup_syms{$lib_string} = \@sym_list;
	}
	push @{$dup_syms{$lib_string}}, $sym;
    }

    # prune probable duplicates
    my @dup_libs;
    for my $lib_list (sort { $a cmp $b } keys %dup_syms) {
	if (@{$dup_syms{$lib_list}} > 10) {
	    push @dup_libs, $lib_list;
	    delete $dup_syms{$lib_list};
	}
    }

    print "Probable duplicate libraries:\n";
    for my $dup (@dup_libs) {
	print "\t$dup\n";
    }
    print "\n";

    print "Duplicate symbols:\n";

    for my $lib_list (sort { $a cmp $b } keys %dup_syms) {
	print "\t$lib_list implement:\n";
	print "\t\t";
	for my $sym (@{$dup_syms{$lib_list}}) {
	    print disp_sym ($sym) . " ";
	}
	print "\n";
    }
} else { # cross-library dump    
    print "Collating duplicates\n";

    for my $symbol (keys %symbol_table) {
	for my $lib (@{$libs}) {
	    add_deps ($lib_dups, $lib, $libs, $symbol);
	    my $cnt = keys %{($lib_dups->{$lib})};
	}
    }

    print "Duplicate summary\n";

    my $dups = keys %symbol_table;
    print "  count of duplicates: $dups\n";
    
    for my $lib (keys %lib_dups) {
	my $dupcnt = keys %{$lib_dups->{$lib}};
	$dupcnt || next;
	print "Library: '$lib':\n";
	
	my $these_libs = $lib_dups->{$lib};
	for my $dup_lib (sort { $a cmp $b } keys %{$these_libs}) {
	    print "    vs. '$dup_lib': ";
	    for my $sym (@{$these_libs->{$dup_lib}}) {
		print disp_sym($sym) . " ";
	    }
	    print "\n";
	}
    }
}
