#!/bin/bash

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: wrapper script for use with omindex --filter option to run unoconv

# Usage: 
#   * Option and argument details in the usage function.
#   * Sends output from unoconv to stdout (as required by omindex).
#   * Logs errors only to the file named in $UNOCONV_WRAPPER_LOG if set unless
#     command line option -l overrides it.

# Notes for developers:
#     * omindex only receives stdout from programs run via the --filter option.
#   * Hence the use of envar $UNOCONV_WRAPPER_LOG which can be set by whatever
#     calls omindex.
#   * The source code is at:
#     https://gitorious.org/xapian/xapian/trees/master/xapian-applications/omega
#     The most pertinent parts are the index_mimetype function in omindex.cc
#     (under comment // Easy "run a command and read UTF-8 text from stdout"
#     cases) and the stdout_to_string function in runfilter.cc.

# Function call tree
#    +
#    |
#    +-- initialise
#    |   |
#    |   +-- ck_soffice_server
#    |   |
#    |   +-- start_soffice_server
#    |       |
#    |       +-- ck_soffice_server
#    |
#    +-- run_unoconv
#    |
#    +-- finalise
#
# Utility functions called from various places:
#    ck_file msg

# Function definitions in alphabetical order.  Execution begins after the last function definition.

#--------------------------
# Name: ck_soffice_server
# Purpose: checks whether a OpenOffice.org server is running
# Return: 0 if one is running; 1 if not
#--------------------------
function ck_soffice_server {
    local buf

    # Is a soffice.bin process listening on the port?
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( netstat --all --numeric --program --tcp 2>/dev/null | grep ":$soffice_port .*LISTEN" )
    if [[ $buf =~ /soffice.bin$ ]]; then
        return 0
    else
        return 1
    fi

}  # end of function ck_soffice_server

#--------------------------
# Name: finalise
# Purpose: final logging and get out of here
#--------------------------
function finalise {

    local rc

    # Kill any redundant shell
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ ${parent_shell_PID:-} != '' ]]; then
        # Is this the parent or the subshell?
        if [[ ${subshell_PID:-} != '' ]]; then
            # Parent so kill subshell
            [[ "$( ps --no-heading -o ppid $subshell_PID )" == $$ ]] \
                && /bin/kill -s SIGKILL $subshell_PID >/dev/null 2>&1
        else
            # Subshell so kill parent
            /bin/kill -s SIGUSR1 $parent_shell_PID >/dev/null 2>&1
        fi
    fi

    # Remove temporary file
    # ~~~~~~~~~~~~~~~~~~~~~
    [[ $tmp_fn_created_flag ]] && rm -f "$tmp_fn"

    # Final logging
    # ~~~~~~~~~~~~~
    rc=$1
    case $rc in 
        129 )
            msg W "$my_nam: finalising on SIGHUP"
            ;;
        130 )
            msg W "$my_nam: finalising on SIGINT"
            ;;
        131 )
            msg W "$my_nam: finalising on SIGQUIT"
            ;;
        137 | 144 )
            # This is for when the subshell kills the parent shell.
            # omega's runfilter.cc re-runs the filter (this script)
            # when the filter reports EINTR (any return code > 127 ?)
            # so set a normal failed return code and do not duplicate
            # the subshell's finalising message
            # SIGUSR1 is expected; SIGCHLD is belt-and-braces
            exit 1
            ;;
        143 )
            msg W "$my_nam: finalising on SIGTERM"
            ;;
    esac

    exit $rc

}  # end of function finalise

#--------------------------
# Name: initialise
# Purpose: sets up environment, parses command line, sets up logging and parses the config file
#--------------------------
function initialise {

    local buf emsg logging_OK my_log my_log_dir opt

    # Source the bash library
    # ~~~~~~~~~~~~~~~~~~~~~~~
    bash_lib=./bash_lib.sh
    source $bash_lib
    if [[ $? -ne 0 ]]; then
        echo "Unable to read the bash library, '$bash_lib'. Exiting" >&2
        exit 1
    fi
    
    # Set extra traps
    # ~~~~~~~~~~~~~~~
    trap 'finalise 137' 'USR1'
    trap 'finalise 144' 'CHLD'

    # Override tty status
    # ~~~~~~~~~~~~~~~~~~~
    [[ ${SET_HAVE_TTY_FALSE:-$false} ]] && have_tty=$false
    
    # Set configuration defaults
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
    soffice_port=2002
    wait_for_soffice_server=5
    wait_for_unoconv=20
    if [[ ${UNOCONV_WRAPPER_LOG:-} != '' ]]; then
        my_log=$UNOCONV_WRAPPER_LOG
        logging_OK=$true
    else
        my_log=unset
        logging_OK=$false
    fi

    # Parse command line
    # ~~~~~~~~~~~~~~~~~~
    emsg=
    while getopts hl:p: opt 2>/dev/null
    do
        case $opt in
            h )
                usage verbose
                exit 0
                ;;
            l )
                my_log="$OPTARG"
                if [[ $my_log =~ ^/ ]]; then
                    logging_OK=$true
                else
                    emsg="$emsg"$'\n''  Option -l: path must be absolute'
                    logging_OK=$false
                fi
                ;;
            p )
                soffice_port="$OPTARG"
                ;;
            * )
                emsg="$emsg"$'\n'"  Invalid option '$opt'"
        esac
    done
    
    # Get the input file argument
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    shift $(( $OPTIND-1 ))
    if [[ $# -eq 1 ]]; then
        input_fn=$1
    else
        shift
        emsg="$emsg"$'\n'"  Invalid extra argument(s) '$*'"
    fi
    
    # Test log permissions
    # ~~~~~~~~~~~~~~~~~~~~
    if [[ ! $have_tty ]]; then
        if [[ $my_log != unset ]]; then
            if [[ -e $my_log ]]; then
                buf=$( ck_file "$my_log"  f:w: 2>&1 )
                [[ $buf != '' ]] \
                    && { logging_OK=$false; emsg=$'\n'"  log '$my_log': $buf"; }
            else
                my_log_dir=${my_log%/*}
                buf=$( ck_file "$my_log_dir"  d:rwx: 2>&1 )
                [[ $buf != '' ]] \
                    && { logging_OK=$false; emsg=$'\n'"  directory for log '$my_log': $buf"; }
            fi
        fi
    else
        logging_OK=$true
    fi

    # Set up output redirection and logging 
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # This script does not log information (which the msg function sends to
    # stdout), only warnings and errors which go to stderr as is appropriate for
    # a script which may be run thousands of times during an omindex run.
    if [[ ! $have_tty ]]; then
        [[ $my_log != unset && $logging_OK ]] && exec 2>>"$my_log"
    else    
        exec 2>/dev/tty
    fi

    # Report any command line errors
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $emsg != '' ]]; then
        echo "$emsg" >&2
        usage
        exit 1
    fi

    # Create temporary file
    # ~~~~~~~~~~~~~~~~~~~~~
    tmp_fn_created_flag=$false
    tmp_fn="$( mktemp "/tmp/$my_nam.XXX" 2>&1 )"
    rc=$?
    if [[ $rc -ne 0 ]]; then
        msg E "Unable to create temporary file:"$'\n'"$tmp_fn"
        finalise 1
    fi
    tmp_fn_created_flag=$true

}  # end of function initialise

#--------------------------
# Name: run_unoconv
# Purpose: runs unoconv
# Return: calls finalise on error, otherwise returns 0
#--------------------------
function run_unoconv {

    local buf i

    # Set up a timing subprocess
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Sometimes the soffice.bin server process hangs.  When that happens, 
    # omindex will SIGKILL this script after 300 seconds, giving this
    # script no opportunity to log that it has been killed.
    # https://gitorious.org/xapian/xapian/blobs/master/xapian-applications/omega/runfilter.cc
    parent_shell_PID=$$  # In case subshell calls finalise
    (
        # TODO:
        # Try delayed loop with timeout counter, reset whenever syscr value from
        # soffice.bin's and/or unoconv's /proc/<PID>/io increases
        sleep $wait_for_unoconv
        msg E "unoconv not finished converting '$input_fn' after $wait_for_unoconv seconds; giving up"
        finalise 1
    #) >/dev/null 2>&1 &
    )&
    subshell_PID=$!  # In case this shell calls finalise
    disown $!        # Suppresses the bash job control job ending message

    # Run unoconv, sending output to temporary file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( unoconv --stdout -f text "$input_fn" 2>&1 > $tmp_fn )
    case $buf in
        '' )
            ;;
        'Fatal Python error: PyImport_GetModuleDict: no module dictionary!' )
            # According to http://www.mail-archive.com/ubuntu-bugs@lists.ubuntu.com/msg623148.html
            # the filtered message happens after unoconv starts exiting, presumably
            # after conversion is complete.
            ;;
        *'The provided document cannot be converted to the desired format.' )
            msg E "unoconv could not convert '$input_fn'"
            finalise 1
            ;;
        * )
            msg E "Unexpected stderr from unoconv for '$input_fn':"$'\n'"$buf"
            finalise 1
    esac

    # Ensure unoconv did output some text and if so send to stdout
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ -s "$tmp_fn" ]]; then
        cat "$tmp_fn"
    else
        msg E "unoconv did not output any text"
        finalise 1
    fi

    return 0

}  # end of function run_unoconv

#--------------------------
# Name: start_soffice_server
# Purpose: starts an OpenOffice.org server
# Return: 0 if OpenOffice.org server is started; otherwise calls finalise
#--------------------------
function start_soffice_server {
    local buf rc

    soffice -headless -accept="socket,host=127.0.0.1,port=$soffice_port;urp;" -nofirststartwizard &
    disown $!        # Suppresses the bash job control job ending message
    for (( i=0; i<wait_for_soffice_server; i++ ))
    do
        sleep 1
        ck_soffice_server && return 0
    done
    msg E "Timed out waiting for $wait_for_soffice_server seconds for OpenOffice.org server to start"
    finalise 1

}  # end of function start_soffice_server

#--------------------------
# Name: usage
# Purpose: prints usage message
#--------------------------
function usage {

    echo "usage: ${0##*/} [-h] [-l log]" >&2    
    if [[ ${1:-} != 'verbose' ]]
    then
        echo "(use -h for help)" >&2
    else
        echo "  where:
    -h prints this help and exits
    -l names the log file which must be an absolute pathname
" >&2
    fi

}  # end of function usage

#--------------------------
# Name: main
# Purpose: where it all happens
#--------------------------
initialise "${@:-}"

ck_soffice_server || start_soffice_server
run_unoconv

finalise 0
