package ASD::Indexer::Parsing;

#     This file is part of asd.
    
#     asd is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     You should have received a copy of the GNU General Public License
#     along with this program; if not, write to the Free Software
#     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#     asd 0.1 Copyright 2004 Antonini Daniele <arpeda@gmail.com>

use strict;
use warnings;

require Exporter;
require 5.005;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use ASD::Function ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw( get_title
				    get_clean_function
				    remove_man_page_section 
				    clean_man_page ) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();

our $VERSION = '0.01';

# Preloaded methods go here.

sub get_clean_function {
    my $filename = shift;
    
    my $function = undef;
    my $extension = undef;
    my @filename_splitted;
    
    @filename_splitted = split( /\./, $filename );

    $function = \&clean_man_page;

    return $function;
}

##    Get man page title
##
sub get_title {
    my $ref_man_page = shift;
    my $filename = shift;
    my $section = shift;

    my @tmp = undef;

    my $string = " ";
    my $title = "";
    my $start_name = 0;
    my $end_name = 0;
    my $begin_find = 0;
    my $find = 0;
    my $dot = 0; #if title must be cut

    #throught man page to find begin NAME section
    for my $i ( 0.. $#$ref_man_page ) {

	if ( $ref_man_page->[$i] =~ /^\.th/ ) { #if section name isn't present must use .TH tag
 	    $title = $ref_man_page->[$i];
 	} 
	
	if ( $ref_man_page->[$i] =~ /^\.sh|^\.ss/ ) { #search begin and end of section name

  	    #end NAME section
  	    if ( $begin_find ) {
  		$end_name = $i-1;
  		$find = 1;
  		last;
  	    }

  	    #begin NAME section
  	    if( $ref_man_page->[$i] =~ /name/ ) {
  		$start_name = $i+1;
  		$begin_find = 1;
  	    }
  	} 
    }

    #if NAME is present can elaborate title
    if ( $find ) {

	for my $i ( $start_name..$end_name ) {
	    $string = $string."$ref_man_page->[$i] ";
	}

#	print "_{$string}_\n";
	@tmp = split( /\s-\s/, $string, 2);

	if ( $#tmp ) {
	    if ( $tmp[1] ) {
		if ( length( $tmp[1] ) > 200 ) {
		    $tmp[1] = substr($string, 0, 200) . " ... ";
		}
		$string = $filename." ($section) - ".$tmp[1];
		$string =~ s/^ |$ //g;
	    }
	    else {
		$string = $filename." ($section) - ".$tmp[0];
		$string =~ s/^ |$ //g;
	    }
	}
	else {
	    $string = $filename." ($section) - ".$tmp[0];
	    $string =~ s/^ |$ //g;
	}

	@tmp = ();
    }
    elsif ( $title ) { 
	$title =~ s/^\.[T,t][H,h] //;
	$title =~ s/\n|\"//g;
	$string = $filename." ($section) - ".$title;
    }

    return $string;
}

sub remove_man_page_section {

    my $ref_man_page = shift;
    my $ref_man_page_field_to_delete = shift;

    my $field_to_delete = 0;
    my $tmp_start = 0;
    my $tmp_end = 0;
    my $i = 0;
    my $y = undef;

    while( $i <= $#$ref_man_page ) {

	if( $ref_man_page->[$i] =~ /^\.sh/ ) {

	    if( $field_to_delete ) {

		splice( @$ref_man_page, $tmp_start, ($i-$tmp_start) );
		$field_to_delete = 0;
		$i = $tmp_start;
	    } 

	    for $y (0..$#$ref_man_page_field_to_delete) {

		if(  $ref_man_page->[$i] =~ /$ref_man_page_field_to_delete->[$y]/ ) {

		    $field_to_delete = 1;
		    $tmp_start = $i;
		    last;
		}
	    }
	}
	$i ++;
    }

    #delete at the end of file
    splice( @$ref_man_page, $tmp_start ) if ( $field_to_delete );
}

#some regular expression must to be applied n-times
sub _apply_reg_exp {
    my $ref_string = shift;
    my $reg_exp = shift;
    my $sobstitute = shift;

    my $before;
    my $after;

    do {
	$before = length( $$ref_string );
	$$ref_string =~ s/$reg_exp/$sobstitute/g;
	$after = length( $$ref_string );
    } while ( $before - $after );
    
}

#    Clean man page:
#    - remove all tag of man page
#    - remove all number sequence
#    - double space
#    - convert all word in down case
sub clean_man_page {
    my $ref_man_page = shift;

    my $i = 0;

    while ($i < $#$ref_man_page+1  ) {

	$ref_man_page->[$i] =~ tr/[A-Z]/[a-z]/;
	
	if( $ref_man_page->[$i] =~ /^\.\\\"|^\.\n|^\n|^\.ix|^\.vb/ ) {
	    splice( @$ref_man_page, $i, 1 );
	}
	else {
	    my $before;
	    my $after;

	    $ref_man_page->[$i] =~ s/\\f\(cw\\\*\(c|\\\*\(c\'\\fr|\\\*\(r|\\\*\(l|\\f\(cw|\\f.|\\e|\\\(bu|\\s-1|\\s0|\\&//g;  #some tag \tag

	    $ref_man_page->[$i] =~ s/^\.Nd/-/;
	    _apply_reg_exp( \$ref_man_page->[$i], "\\\\-|\\\\\\(em|--", "-");

 	    $ref_man_page->[$i] =~ s/^\.(\w)+\s|^\.(\w)+\n|^\.\s//g if( $ref_man_page->[$i] !~ /^\.th\s/ && $ref_man_page->[$i] !~ /^\.s[h,s]\s/ ); # tag .TAG

	    $ref_man_page->[$i] =~ s/->/ /g;
	    $ref_man_page->[$i] =~ s/\(|\)/ /g;
 	    $ref_man_page->[$i] =~ s/[^(\w|:|\-|\.)]/ /g;       # tutto ci che non  alfanumerico : - .
	    $ref_man_page->[$i] =~ s/:+\s|\s:+/ /g;
 	    $ref_man_page->[$i] =~ s/\.\s/ /g;
	    $ref_man_page->[$i] =~ s/\.{2,}/ /g;

 	    $ref_man_page->[$i] =~ s/\n|\"|http:\/\///g;

	    _apply_reg_exp( \$ref_man_page->[$i], "\\s\\d((\\w)+(\\d)*)+\\s", " " ); #sequenze di numeri e lettere
	    _apply_reg_exp( \$ref_man_page->[$i], "^\\d((\\w)+(\\d)*)+\\s", " " );
	    
	    _apply_reg_exp( \$ref_man_page->[$i], "\\s(\\d)+-(\\w)+\\s", " " ); #numero-stringa
	    _apply_reg_exp( \$ref_man_page->[$i], "^(\\d)+-(\\w)+\\s", " " );
		
	    _apply_reg_exp( \$ref_man_page->[$i], "\\s(\\d)+(-(\\d)+)+\\s", " " ); #numeri intervallati da -
	    _apply_reg_exp( \$ref_man_page->[$i], "^(\\d)+(-(\\d)+)+\\s", " " );
	    
	    _apply_reg_exp( \$ref_man_page->[$i], "\\s(\\d)+(:(\\d)+)+\\s", " " ); #numeri intervallati da : che terminano con un numero
	    _apply_reg_exp( \$ref_man_page->[$i], "^(\\d)+(:(\\d)+)+\\s", " " );
	    
	    _apply_reg_exp( \$ref_man_page->[$i], "\\s((\\d)+:)+\\s", " " );  #numeri intervallati da : che terminano con :
	    _apply_reg_exp( \$ref_man_page->[$i], "^((\\d)+:)+\\s", " " ); 
	    
	    $ref_man_page->[$i] =~ s/^\s+|\s+$//g;
	    $ref_man_page->[$i] =~ s/ {2,}/ /g;


	    if ( $ref_man_page->[$i] =~ /^\.th\s/ ) {
		splice( @$ref_man_page, 0, $i );
		$i = 1;
		next;
	    }
	    
	    if ( $ref_man_page->[$i] =~ /^\.sh\s/ ) {
		$i++;
	        next;
	    }

	    if ( $ref_man_page->[$i] ) {
		$i++;
	    } else {
		splice( @$ref_man_page, $i, 1 );
	    }
	}
    }
}


1;
__END__
# Below is stub documentation for your module. You'd better edit it!

=head1 NAME

ASD::Function - Perl extension for ASD. It is a collection of function

=head1 SYNOPSIS

  use ASD::Function;

=head1 DESCRIPTION

Simple collection if function for ASD.

=head2 EXPORT

None by default.

=head1 SEE ALSO

Mention other useful documentation such as the documentation of
related modules or operating system documentation (such as man pages
in UNIX), or any relevant external documentation such as RFCs or
standards.

If you have a mailing list set up for your module, mention it here.

If you have a web site set up for your module, mention it here.

=head1 AUTHOR

A. U. Thor, E<lt>arpeda@(none)E<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005 by A. U. Thor

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.


=cut
