package ASD::Function;

#     This file is part of asd.
    
#     asd is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     You should have received a copy of the GNU General Public License
#     along with this program; if not, write to the Free Software
#     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#     asd 0.2 Copyright 2004 Antonini Daniele <arpeda@gmail.com>

use strict;
use warnings;
use File::stat;

require Exporter;
require 5.005;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use ASD::Function ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw( get_man_page_dir 
				    insert_into_hash_title
				    get_man_page_id
				    merge 
				    expand_parameter
				    stat_man_page_to_analize
				    print_initial_statistics
				    print_updated_statistics
				    print_end_statistics
				    structure_pointers
				    store_without_occurrence
				    unzip_man_page ) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();

our $VERSION = '0.01';

my %color = ( 'green' => "\033[0;40;32m",
	      'normal' => "\033[0m",
	      'bold' => "\033[1m" );

sub unzip_man_page {
    my $path = shift;

    my @man_page;
    my @man_page_file = split( /\./, $path );

    my $extension = pop @man_page_file;

    if ( $extension =~ /gz/ ) {
	@man_page = `zcat $path`;
    }
    elsif ( $extension =~ /bz2/ ) {
	@man_page = `bzcat $path`;
    }
    elsif ( $extension =~ /\d/ ) {
	@man_page = `cat $path`;
    }
    else {
	print "Cannot analyze $path: is not a .bz2 .gz or plain text\n";
    }

    return \@man_page;

}

sub store_without_occurrence {
    my $ref_hash_man_page_title = shift;
    my $ref_inverted_list = shift;
    my $ref_index_file = shift;

    my $num_element_type;
    my $docID_type;
    my $docID_length_list = 0;
    my $freq_type;

    my @format_pointer;

    @format_pointer = structure_pointers( $ref_inverted_list, $ref_hash_man_page_title );

    $num_element_type = $format_pointer[1];
    $docID_type = $format_pointer[2];
    $freq_type = $format_pointer[3];

    _write_man_page_title( $ref_index_file->{'man_page_title'}, $ref_hash_man_page_title );

    #write_lessico_and_document();

    open LESSICO, ">", $ref_index_file->{'lessico'};
    open DOCUMENT, ">", $ref_index_file->{'document'};

    print DOCUMENT "$format_pointer[0]";

    foreach my $word ( sort keys %$ref_inverted_list ) {
	
	print LESSICO "$word ".tell(DOCUMENT)."\n";
	
	print DOCUMENT pack( $num_element_type,scalar(keys %{$ref_inverted_list->{$word}}) );
	foreach my $docID ( sort {$a <=> $b} keys %{$ref_inverted_list->{$word}} ) {
	    
	    $ref_inverted_list->{$word}{$docID} = 255 if ( $ref_inverted_list->{$word}{$docID} > 255);
	    print DOCUMENT pack( $docID_type."C",$docID,$ref_inverted_list->{$word}{$docID} );
	}
    }
    
    close DOCUMENT;
    close LESSICO;
}


# return an array
#  0) string for python
#  1) perl format for length
#  2) perl format for docID
sub structure_pointers {
    my $ref_inverted_list = shift;
    my $ref_hash_man_page_title = shift;

    my $num_element_type;
    my $docID_type;
    my $freq_type = "C";
    my $docID_length_list = 0;

    my $structure_string = "";
    my @format = ();

    #calculate max length of docID_list
    foreach my $word ( sort keys %$ref_inverted_list ) {
	$docID_length_list = keys( %{$ref_inverted_list->{$word}} ) if ( $docID_length_list < scalar(keys %{$ref_inverted_list->{$word}}) )
    }

    if ( $docID_length_list <= 255 ) { #Unsigned Byte
	$structure_string = $structure_string."B";
	$num_element_type = "C";
    }
    elsif ( $docID_length_list > 255 && $docID_length_list <= 65535 ) { # Unsigned Short
	$structure_string = $structure_string."H";
	$num_element_type = "S";
    }
    elsif ( $docID_length_list > 65535 ) {
	$structure_string = $structure_string."I";
	$num_element_type = "I";
    }

    if ( keys %$ref_hash_man_page_title <= 255 ) { #Unsigned Byte
	$structure_string = $structure_string."B";
	$docID_type = "C";
    }
    elsif (  keys %$ref_hash_man_page_title > 255 && keys %$ref_hash_man_page_title <= 65535 ) { # Unsigned Short
	$structure_string = $structure_string."H";
	$docID_type = "S";
    }
    elsif ( keys %$ref_hash_man_page_title > 65535 ) {
	$structure_string = $structure_string."I";
	$docID_type = "I";
    }

    $structure_string = $structure_string."B";
    
    push @format, $structure_string;
    push @format, $num_element_type;
    push @format, $docID_type;
    push @format, "C";

    return @format;

}

# updated statistics
sub print_updated_statistics {
    my $num_man_pages = shift;

    my $string_num_man_page = sprintf "%\ 6d",$$num_man_pages;

    syswrite STDOUT,"\b\b\b\b\b\b";
    syswrite STDOUT,"$string_num_man_page";
}

# print some statistics and return text 
sub print_initial_statistics {
    my $ref_statistics = shift;

    my $string_num_man_page = sprintf "%\ 6d",$ref_statistics->[0];
    my $string_size_total = sprintf "%\ 8d",($ref_statistics->[1]/1024);

    my $text =" man pages for about $color{'bold'}$string_size_total Kb$color{'normal'}";
    my $text_no_color=" man_pages for about $string_size_total Kb";
    
    syswrite STDOUT,"$color{'green'} *$color{'normal'} indexing: $string_num_man_page".$text;
    
    for my $i (0..length( $text_no_color )-1){
	syswrite STDOUT,"\b";
    }

    return $text;
}

sub print_end_statistics {
    
    my $num_man_pages = shift;
    my $index_file = shift;
    my $occurrence_get_opt = shift;
    
    print " $color{'green'}*$color{'normal'} indexed $color{'bold'}$num_man_pages$color{'normal'} man pages\n";

    _print_stat_with_occurrence( $index_file ) if ( $occurrence_get_opt);
    _print_stat_without_occurrence( $index_file ) unless ( $occurrence_get_opt);
}

sub _print_stat_without_occurrence {
    my $ref = shift;
    my $size = 0;
    my $st;
    
    print " $color{'green'}*$color{'normal'} size of asd-index:\n";

    $st = stat( $ref->{'man_page_title'} );
    $size = ($st->size)/1024;
    print "\tman_page_title: $color{'bold'}".sprintf("%.0f",$size)." Kb$color{'normal'}\n";
    
    $st = stat( $ref->{'lessico'} );
    $size = ($st->size)/1024;
    print "\tlessico: $color{'bold'}".sprintf("%.0f", $size)." Kb$color{'normal'}\n";

    $st = stat( $ref->{'document'} );
    $size = ($st->size)/1024;
    print "\tinverted list: $color{'bold'}".sprintf("%.0f",$size)." Kb$color{'normal'}\n";
}

sub _print_stat_with_occurrence {
    my $ref = shift;

    my $size = 0;
    my $st;

    _print_stat_without_occurrence( $ref );

    $st = stat( $ref->{'occurrence'} );
    $size = ($st->size)/1024;
    print "\toccurrence: $color{'bold'}".sprintf("%.0f",$size)." Kb$color{'normal'}\n";
}

#return an array with number man_page_to_analize and total size 
sub stat_man_page_to_analize {
    my $ref_manpath_dir = shift;
    my $dir_get_opt = shift;
    my $ref_file_get_opt = shift;

    my $size_total = 0;
    my $num_man_page = 0;

    my @statistics = ();

    foreach my $man_page_dir ( @$ref_manpath_dir ) {

	my @man_page_section = `ls $man_page_dir | grep man`;
	foreach my $section ( @man_page_section ) {

	    my $current_dir = undef;
	    my @man_page_to_examine = ();

	    chop($section);
	    $current_dir = $man_page_dir."/".$section;
	    $current_dir = $dir_get_opt if ( $dir_get_opt );
	    $num_man_page += `ls -1 $current_dir | wc -l`;
	    
	    @man_page_to_examine = `ls -1 $current_dir`;
	    if ( @$ref_file_get_opt ) {
		
		my @tmp;
		
		@man_page_to_examine = ();
		for my $i ( 0.. $#$ref_file_get_opt ) {
		    
		    @tmp = split( /\//, $ref_file_get_opt->[$i] );
		    $man_page_to_examine[$i] = pop @tmp;
		}
		
		$current_dir = join('/', @tmp );
		$num_man_page = scalar @$ref_file_get_opt;
	    }

	    foreach my $current_man_page ( @man_page_to_examine ) {

		my $st = undef;

		chop($current_man_page) unless ( @$ref_file_get_opt );
		next if ( -l $current_dir."/".$current_man_page );
		
		$st = stat( $current_dir."/".$current_man_page ) or die "$current_dir/$current_man_page";
		$size_total += ($st->size);
	    }
	    last if ( @$ref_file_get_opt );
	    
	}
	last if ( @$ref_file_get_opt || $dir_get_opt );
    }

    push @statistics, $num_man_page;
    push @statistics, $size_total;

    return @statistics;
}

## manage bash espansion
sub expand_parameter {
    my $parameter = shift;
    my $expansion = shift;

    if ( @$parameter ) {

	if ( $parameter->[0] =~ /--file/ && @$parameter > 2) {

	    for my $i ( 0..$#$parameter ) {

		next if ( $parameter->[$i] =~ /--file/ );
		push @$expansion,"--file";
		push @$expansion, $parameter->[$i];
	    }

	    @$parameter = @$expansion;
	}
    }
}

# Remove duplicate value in an array
sub _remove_dup {
    my $ref_array = shift;
    my %tmp_hash = ();

    foreach my $dir ( @$ref_array ) {
	$tmp_hash{$dir} = 0;
    }
    return sort keys %tmp_hash;
}

#execute manpath command and return an array of dir
sub get_man_page_dir {
    my $tmp_dir = `manpath 2>/dev/null`;

    $tmp_dir =~ s/:{2,}/:/g;
    chop($tmp_dir);

    my @dir = split( /:/, $tmp_dir );
    my @dir_unique = &_remove_dup( \@dir );

    return @dir_unique;
}

#Insert title into hash_title
sub insert_into_hash_title {
    my $ref_hash_man_page = shift;
    my $string = shift;
    my $ref_index = shift;
    my $value = undef;

    if( $string ) {
	if ( exists $ref_hash_man_page->{$string} ) {

	    return "";
	} else {
	    $value = ++ $$ref_index ;
	    $$ref_hash_man_page{$string} = $value;
	}
    } else {
	$string = "";
    }

}

sub get_man_page_id {
    my $ref_hash = shift;
    my $string = shift;

    if ( exists $$ref_hash{$string} ) {
	return $$ref_hash{$string};
    } else {
	return 0;
    }
}

sub merge {

    my $ref_hash_dict = shift;
    my $ref_hash_man_page = shift;
    my $section_man_page = shift;
    my $man_page_id = shift;
    
    foreach my $word( keys %$ref_hash_man_page ) {
	
	$ref_hash_dict->{$word}{"$man_page_id"} = $ref_hash_man_page->{$word};
    }

    #add section
    $ref_hash_dict->{"_{$section_man_page}_"}{"$man_page_id"} = 0;
    
}

sub _write_man_page_title {
    my $file = shift;
    my $ref_man_page_hash_title = shift;

    open FILE_MAN_PAGE, ">", $file;
    foreach my $word( sort keys %$ref_man_page_hash_title ) {
	print FILE_MAN_PAGE "$ref_man_page_hash_title->{$word} $word\n";
    } 
    close FILE_MAN_PAGE;
}

1;
__END__

=head1 NAME

ASD::Function - Perl extension for ASD. It is a collection of function

=head1 SYNOPSIS

  use ASD::Function ':all';

=head1 DESCRIPTION

Simple collection if function for ASD.

=head2 EXPORT

None by default.

=head1 AUTHOR

A. U. Thor, E<lt>antonini.daniele@gmail.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005 by A. U. Thor

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.


=cut
