#!/bin/bash

# Copyright (C) 2012 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: analyses an omindex log for failure rates by file name extension

# Usage: 
#    * The current working directory must be this script's directory
#    * Details of synopsis in the usage function.
#    * Automatically outputs to log if there is no tty to log to
#    * To force output to log: export SET_HAVE_TTY_FALSE=true

# Function call tree
#    +
#    |
#    +-- initialise
#    |   |
#    |   +-- parse_cmdline
#    |   |   |
#    |   |   +-- usage
#    |   |
#    |   +-- parse_cfg
#    |
#    +-- finalise
#
# Utility functions called from various places:
#    ck_file msg

# Function definitions in alphabetical order.  Execution begins after the last function definition.

#--------------------------
# Name: finalise
# Purpose: final logging and get out of here
#--------------------------
function finalise {

    local msg rc

    # Remove temporary file
    # ~~~~~~~~~~~~~~~~~~~~~
    [[ ${parse_cfg_for_bash_rb_log_fn:-} != '' ]] && rm -f "$parse_cfg_for_bash_rb_log_fn"

    # Final logging
    # ~~~~~~~~~~~~~
    msg=
    rc=$1
    case $rc in 
        129 )
            msg I "$my_nam: finalising on SIGHUP"
            ;;
        130 )
            msg I "$my_nam: finalising on SIGINT"
            ;;
        131 )
            msg I "$my_nam: finalising on SIGQUIT"
            ;;
        143 )
            msg I "$my_nam: finalising on SIGTERM"
            ;;
    esac
    msg I "$my_nam: exiting with return code $rc"

    exit $rc

}  # end of function finalise

#--------------------------
# Name: initialise
# Purpose: sets up environment, parses command line, sets up logging and parses the config file
#--------------------------
function initialise {

    local bash_lib buf cfg_fn extra_log_text filter found i log_fn my_cfg_fn now

    # Source the bash library
    # ~~~~~~~~~~~~~~~~~~~~~~~
    bash_lib=./bash_lib.sh
    source $bash_lib
    if [[ $? -ne 0 ]]; then
        echo "Unable to read the bash library, '$bash_lib'. Exiting" >&2
        exit 1
    fi
    
    # Parse command line
    # ~~~~~~~~~~~~~~~~~~
    parse_cmdline "${@:-}"
    
    # Redirect output to log and log startup command
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    exec 1>>"$log_fn"
    exec 2>>"$log_fn"
    msg I "$my_nam: started by: $0 $*"
    
    # Parse the common configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set defaults (same as in docoll_lib.rb)
    extensions=(doc docx odp ods odt pdf pps ppsx ppt pptx rtf txt xls xlsx)
    emsg=
    cfg_fn=$cfg_dir/collate.cfg
    ck_file $cfg_fn f:r: || finalise 1
    now=$( date +'%y-%m-%d@%H:%M' )
    parse_cfg_for_bash_rb_log_fn=$log_dir/parse_cfg_for_bash.rb.$now.log
    buf=$( ./parse_cfg_for_bash.rb --config $cfg_fn --log $parse_cfg_for_bash_rb_log_fn 2>&1 )
    if [[ ! $buf =~ ^Parameters ]]; then
        cat $parse_cfg_for_bash_rb_log_fn
        finalise 1
    fi
    collation_root_dir=$( echo "$buf" | grep 'CollationRootDir: ' \
        | sed --regexp-extended -e 's/  CollationRootDir: //' -e 's/[[:space:]]*$//' \
    )
    buf=$( echo "$buf" | grep 'ExtensionsToInclude: ' \
        | sed --regexp-extended -e 's/  ExtensionsToInclude: \[//' \
            -e 's/][[:space:]]*$//' -e 's/[".,]//g' \
    )
    [[ $buf != '' ]] && extensions=( $buf )
    
    # Error trap data from the common configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( ck_file "$collation_root_dir" d:rx: 2>&1 )
    if [[ $buf != '' ]]; then
        msg E "$my_nam: collation root directory, '$collation_root_dir': $buf" 
        finalise 1
    fi
        
}  # end of function initialise

#--------------------------
# Name: parse_cmdline
# Purpose: parses the command line
#--------------------------
function parse_cmdline {

    # This script is normally run by omindex.sh which directs stdout and stderr
    # to its own log.

    cfg_dir=unset
    emsg=
    log_fn=unset
    omindex_log_fn=unset
    tmp_dir=unset
    while getopts c:hl:o:t: opt 2>/dev/null
    do
        case $opt in
            c )
                cfg_dir="$OPTARG"
                ;;
            h )
                usage verbose
                exit 0
                ;;
            l )
                log_fn="$OPTARG"
                ;;
            o )
                omindex_log_fn="$OPTARG"
                ;;
            t )
                tmp_dir="$OPTARG"
                ;;
            * )
                emsg="$emsg"$'\n'"  Invalid option '$opt'"
        esac
    done
    
    # Test for extra arguments
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    shift $(( $OPTIND-1 ))
    if [[ $* != '' ]]; then
        emsg="$emsg"$'\n'"  Invalid extra argument(s) '$*'"
    fi
    
    # Test for mandatory options not set and directory permissions
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $cfg_dir != 'unset' ]]; then
        buf=$( ck_file "$cfg_dir" d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg=$'\n'"  Configuration directory '$cfg_dir': $buf"
    else
        emsg="$emsg"$'\n'"  Mandatory option -c not given"
    fi
    cfg_dir=${cfg_dir%%*(/)}    # Remove any trailing "/"s
    if [[ $log_fn != 'unset' ]]; then
        if [[ $log_fn != /dev/tty ]]; then
            log_dir=${log_fn%/*}
            buf=$( ck_file "$log_dir" d:rx: 2>&1 )
            [[ $buf != '' ]] && emsg=$'\n'"  Log directory '$log_dir': $buf"
        else
            log_dir=${tmp_dir:-}
        fi
    else
        emsg="$emsg"$'\n'"  Mandatory option -l not given"
    fi
    log_dir=${log_dir%%*(/)}    # Remove any trailing "/"s
    if [[ $omindex_log_fn != 'unset' ]]; then
        buf=$( ck_file "$omindex_log_fn" f:r: 2>&1 )
        [[ $buf != '' ]] && emsg=$'\n'"  Omindex log '$omindex_log_fn': $buf"
    else
        emsg="$emsg"$'\n'"  Mandatory option -o not given"
    fi
    if [[ $tmp_dir != 'unset' ]]; then
        buf=$( ck_file "$tmp_dir" d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg=$'\n'"  Temporary directory '$tmp_dir': $buf"
    else
        emsg="$emsg"$'\n'"  Mandatory option -t not given"
    fi
    tmp_dir=${tmp_dir%%*(/)}    # Remove any trailing "/"s

    # Report any command line errors
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $emsg != '' ]]; then
        echo "$emsg" >&2
        usage
        exit 1
    fi
    
}  # end of function parse_cmdline

#--------------------------
# Name: usage
# Purpose: prints usage message
#--------------------------
function usage {

    echo "usage: ${0##*/} -c cfg_dir [-h] -l log_file -o omindex_log -t tmp_dir" >&2    
    if [[ ${1:-} != 'verbose' ]]
    then
        echo "(use -h for help)" >&2
    else
        echo "  where:
    -c names the directory containing the configuration files
    -h prints this help and exits
    -l names the log file.  Use /dev/tty to get logging on screen
    -t names the directory for temporary files
    -o names the omindex log to analyse
" >&2
    fi

}  # end of function usage

#--------------------------
# Name: main
# Purpose: where it all happens
#--------------------------
initialise "${@:-}"

# Filter program error messages are so common, varied and voluminous that it 
# is impractical to analyse them or include them in a higher level log except
# for the checks below

msg I "$my_nam: analysing $omindex_log_fn"

# Check for database could not be opened
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
buf=$( grep "Database couldn't be opened" "$omindex_log_fn" )
if [[ $buf != '' ]]; then
    msg E "Database could not be opened:"$'\n'"$buf"
    finalise 1
fi

# Get failure data
# ~~~~~~~~~~~~~~~~
files_in_tree=0
i=0
max_ext_len=3
max_in_tree_len=1
max_failed_len=1
for (( i=0; i<${#extensions[*]}; i++ ))
do
    ext=${extensions[i]}
    [[ ${#ext} -gt $max_ext_len ]] && max_ext_len=${#ext}   
    in_tree[i]=$( find "$collation_root_dir" -iname "*.$ext" 2>&1 \
        | grep -v "lost+found': Permission denied" | wc -l \
    )
    (( files_in_tree += in_tree[i] ))
    [[ ${#in_tree[i]} -gt $max_in_tree_len ]] && max_in_tree_len=${#in_tree[i]}
    if [[ ${in_tree[i]} -ne 0 ]]; then
        [[ ${#in_tree[i]} -gt $max_in_tree_len ]] && max_in_tree_len=${#in_tree[i]}
        failed[i]=$( grep -i "\.$ext\" failed" "$omindex_log_fn" | wc -l )
        if [[ ${failed[i]} -ne 0 ]]; then
            [[ ${#failed[i]} -gt $max_failed_len ]] && max_failed_len=${#failed[i]}
            percentage[i]=$( echo "scale=2; (${failed[i]} * 100 ) / ${in_tree[i]}" | bc )
        fi
    fi
done

# Format prettily
# ~~~~~~~~~~~~~~~
output=$( 
    for (( i=0; i<${#extensions[*]}; i++ ))
    do
        if [[ ${in_tree[i]} -ne 0 ]]; then
            printf "  %${max_ext_len}s files: " ${extensions[i]}
            printf "in tree: %${max_in_tree_len}s, " ${in_tree[i]}
            printf "failed: %${max_failed_len}s" ${failed[i]}
            if [[ ${failed[i]} -ne 0 ]]; then
                printf " %5s%%\n" ${percentage[i]}
            else
                printf "\n"
            fi
        else
            printf "  %${max_ext_len}s files: none in tree\n" ${extensions[i]}
        fi
    done
    echo "  Total files in tree: $files_in_tree"
)
msg I "$my_nam: "$'\n'"$output"

# Get skipped file messages
# ~~~~~~~~~~~~~~~~~~~~~~~~~
buf=$( grep ': Skipping ' "$omindex_log_fn" )
if [[ $buf = '' ]]; then
    msg I "No files skipped by omindex!"
else
    n_files=$( grep ': Skipping ' "$omindex_log_fn" | wc -l )
    if [[ $n_files -lt 30 ]]; then
        msg W "$n_files files skipped by omindex:"$'\n'"$buf"
    else
        msg W "$n_files files skipped by omindex. List appended to $omindex_log_fn"
        echo $'\n\n\nSummary of files that failed indexing\n' >> "$omindex_log_fn"
        echo "$buf" >> "$omindex_log_fn"
    fi
fi

finalise 0
