#!/bin/bash

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: analyses an omindex log for failure rates by file name extension

# Usage: 
#    * The current working directory must be this script's directory
#    * Details of synopsis in the usage function.
#    * Automatically outputs to log if there is no tty to log to
#    * To force output to log: export SET_HAVE_TTY_FALSE=true

# Function call tree
#    +
#    |
#    +-- initialise
#    |   |
#    |   +-- parse_cfg
#    |
#    +-- finalise
#
# Utility functions called from various places:
#    ck_file msg

# Function definitions in alphabetical order.  Execution begins after the last function definition.

#--------------------------
# Name: finalise
# Purpose: final logging and get out of here
#--------------------------
function finalise {

    local msg rc

    # When "not logging", delete temporary logs
    if [[ $have_tty ]]; then
        [[ ${parse_cfg_for_bash_rb_log_created:-$false} ]] && rm -f $parse_cfg_for_bash_rb_log_fn
    fi

    # Final logging
    # ~~~~~~~~~~~~~
    msg=
    rc=$1
    case $rc in 
        129 )
            msg I "$my_nam: finalising on SIGHUP"
            ;;
        130 )
            msg I "$my_nam: finalising on SIGINT"
            ;;
        131 )
            msg I "$my_nam: finalising on SIGQUIT"
            ;;
        143 )
            msg I "$my_nam: finalising on SIGTERM"
            ;;
    esac
    msg I "$my_nam: exiting with return code $rc"

    exit $rc

}  # end of function finalise

#--------------------------
# Name: initialise
# Purpose: sets up environment, parses command line, sets up logging and parses the config file
#--------------------------
function initialise {

    local bash_lib buf cfg_fn extra_log_text filter found i log_fn my_cfg_fn now

    # Source the bash library
    # ~~~~~~~~~~~~~~~~~~~~~~~
    bash_lib=./bash_lib.sh
    source $bash_lib
    if [[ $? -ne 0 ]]; then
        echo "Unable to read the bash library, '$bash_lib'. Exiting" >&2
        exit 1
    fi
    
    # Override tty status
    # ~~~~~~~~~~~~~~~~~~~
    [[ ${SET_HAVE_TTY_FALSE:-$false} ]] && have_tty=$false
    
    # Parse command line
    # ~~~~~~~~~~~~~~~~~~
    cfg_dir=unset
    collation_root_dir=unset
    emsg=
    omindex_log_fn=unset
    log_dir=unset
    while getopts c:hi:l: opt 2>/dev/null
    do
        case $opt in
            c )
                cfg_dir="$OPTARG"
                ;;
            h )
                usage verbose
                exit 0
                ;;
            i )
                omindex_log_fn="$OPTARG"
                ;;
            l )
                log_dir="$OPTARG"
                ;;
            * )
                emsg="$emsg"$'\n'"  Invalid option '$opt'"
        esac
    done
    
    # Test for extra arguments
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    shift $(( $OPTIND-1 ))
    if [[ $* != '' ]]; then
        emsg="$emsg"$'\n'"  Invalid extra argument(s) '$*'"
    fi
    
    # Test for mandatory options not set and directory permissions
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $cfg_dir != 'unset' ]]; then
        buf=$( ck_file "$cfg_dir"  d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg=$'\n'"  Configuration directory '$cfg_dir': $buf"
    else
        emsg="$emsg"$'\n'"  Mandatory option -c not given"
    fi
    if [[ $omindex_log_fn != 'unset' ]]; then
        buf=$( ck_file "$omindex_log_fn"  f:r: 2>&1 )
        [[ $buf != '' ]] && emsg=$'\n'"  omindex log file '$omindex_log_fn': $buf"
    else
        emsg="$emsg"$'\n'"  Mandatory option -l not given"
    fi
    if [[ ! $have_tty ]]; then
        if [[ $log_dir != unset ]]; then
            log_dir=${log_dir%/}/		# Ensure trailing /
            buf=$( ck_file "$log_dir" d:rx: 2>&1 )
            if [[ $buf = '' ]]; then
                buf=$( ck_file "$log_dir"  d:rwx: 2>&1 )
                [[ $buf != '' ]] && emsg=$'\n'"  log directory '$log_dir': $buf"
            else
                emsg=$'\n'"  log directory '$log_dir': $buf"
            fi
        else
        	emsg="$emsg"$'\n'"  Mandatory option -o not given"
        fi
    else
        log_dir=/tmp/    # Required for parse_cfg_for_bash.rb
    fi

    # Report any command line errors
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $emsg != '' ]]; then
        echo "$emsg" >&2
        usage
        exit 1
    fi
    
    # Parse the common configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set defaults (same as in docoll_lib.rb)
    extensions=(doc docx odp ods odt pdf pps ppsx ppt pptx rtf txt xls xlsx)
    emsg=
    cfg_dir=${cfg_dir%/}/        # Ensure a trailing /
    cfg_fn=${cfg_dir}collate.cfg
    ck_file $cfg_fn f:r: || finalise 1
    now=$( date +'%y-%m-%d@%H:%M' )
    parse_cfg_for_bash_rb_log_fn=${log_dir}parse_cfg_for_bash.rb.$now.log
    buf=$( ./parse_cfg_for_bash.rb --config $cfg_fn --log $parse_cfg_for_bash_rb_log_fn 2>&1 )
    parse_cfg_for_bash_rb_log_created=$true
    if [[ ! $buf =~ ^Parameters ]]; then
        cat $parse_cfg_for_bash_rb_log_fn
        finalise 1
    fi
    collation_root_dir=$( echo "$buf" | grep 'CollationRootDir: ' \
        | sed --regexp-extended -e 's/  CollationRootDir: //' -e 's/[[:space:]]*$//' \
    )
    buf=$( echo "$buf" | grep 'ExtensionsToInclude: ' \
        | sed --regexp-extended -e 's/  ExtensionsToInclude: \[//' \
            -e 's/][[:space:]]*$//' -e 's/[".,]//g' \
    )
    [[ $buf != '' ]] && extensions=( $buf )
    
    # Error trap data from the common configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( ck_file "$collation_root_dir" d:rx: 2>&1 )
    if [[ $buf != '' ]]; then
        msg E "$my_nam: collation root directory, '$collation_root_dir': $buf" 
        finalise 1
    fi
        
}  # end of function initialise

#--------------------------
# Name: usage
# Purpose: prints usage message
#--------------------------
function usage {

    echo "usage: ${0##*/} -c cfg_dir [-h] -i input_file [-l log_dir]" >&2    
    if [[ ${1:-} != 'verbose' ]]
    then
        echo "(use -h for help)" >&2
    else
        echo "  where:
    -c names the configuration directory
    -h prints this help and exits
    -i names the input file (omindex log file) to analyse
    -l names the directory to create log in
       Required when logging (no tty or $SET_HAVE_TTY_FALSE is true)
" >&2
    fi

}  # end of function usage

#--------------------------
# Name: main
# Purpose: where it all happens
#--------------------------
initialise "${@:-}"

# Filter program error messages are so common, varied and voluminous that it 
# is impractical to analyse them or include them in a higher level log except
# for the checks below

msg I "$my_nam: analysing $omindex_log_fn"

# Check for database could not be opened
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
buf=$( grep "Database couldn't be opened" "$omindex_log_fn" )
if [[ $buf != '' ]]; then
    msg E "Database could not be opened:"$'\n'"$buf"
    finalise 1
fi

# Get failure data
# ~~~~~~~~~~~~~~~~
files_in_tree=0
i=0
max_ext_len=3
max_in_tree_len=1
max_failed_len=1
for (( i=0; i<${#extensions[*]}; i++ ))
do
    ext=${extensions[i]}
    [[ ${#ext} -gt $max_ext_len ]] && max_ext_len=${#ext}   
    in_tree[i]=$( find "$collation_root_dir" -iname "*.$ext" | wc -l )
    (( files_in_tree += in_tree[i] ))
    [[ ${#in_tree[i]} -gt $max_in_tree_len ]] && max_in_tree_len=${#in_tree[i]}
    if [[ ${in_tree[i]} -ne 0 ]]; then
        [[ ${#in_tree[i]} -gt $max_in_tree_len ]] && max_in_tree_len=${#in_tree[i]}
        failed[i]=$( grep -i "\.$ext\" failed" "$omindex_log_fn" | wc -l )
        if [[ ${failed[i]} -ne 0 ]]; then
            [[ ${#failed[i]} -gt $max_failed_len ]] && max_failed_len=${#failed[i]}
            percentage[i]=$( echo "scale=2; (${failed[i]} * 100 ) / ${in_tree[i]}" | bc )
        fi
    fi
done

# Format prettily
# ~~~~~~~~~~~~~~~
output=$( 
    for (( i=0; i<${#extensions[*]}; i++ ))
    do
        if [[ ${in_tree[i]} -ne 0 ]]; then
            printf "  %${max_ext_len}s files: " ${extensions[i]}
            printf "in tree: %${max_in_tree_len}s, " ${in_tree[i]}
            printf "failed: %${max_failed_len}s" ${failed[i]}
            if [[ ${failed[i]} -ne 0 ]]; then
                printf " %5s%%\n" ${percentage[i]}
            else
                printf "\n"
            fi
        else
            printf "  %${max_ext_len}s files: none in tree\n" ${extensions[i]}
        fi
    done
    echo "  Total files in tree: $files_in_tree"
)
msg I "$my_nam: "$'\n'"$output"

# Get skipped file messages
# ~~~~~~~~~~~~~~~~~~~~~~~~~
buf=$( grep ': Skipping ' "$omindex_log_fn" )
if [[ $buf = '' ]]; then
    msg I "No files skipped by omindex!"
else
    n_files=$( grep ': Skipping ' "$omindex_log_fn" | wc -l )
    if [[ $n_files -lt 30 ]]; then
        msg W "$n_files files skipped by omindex:"$'\n'"$buf"
    else
        msg W "$n_files files skipped by omindex. List appended to $omindex_log_fn"
        echo $'\n\n\n'"$buf" >> "$omindex_log_fn"
    fi
fi

finalise 0
