#!/bin/bash -x

# This file is part of Clusterix, Copyright (C) 2004 Alessandro Manzini, 
# email: a.manzini@infogroup.it

# Clusterix is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Clusterix is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Clusterix; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

# Script that control the presence of the other control process
control_control="$1"
# Script that have to return 0 if all is ok.
controlscript="$3"
# Script that stop the cluster in case of fail
stopscript="$2"
# String of control for ps command
stringcontrol="$1 $0"
log="$4"
checkprocfreq="$5"
# Number of tries before to say that a service is down
countnum="$6"
# secs between one failed try to the next
failedtrysec="$7"
# Message to display in log for Service Test
trymessageservice="$8"
# ps command
ps="$9"
# ps option
psoption="${10}"

checkroutine () {
trycountnum="$1"
tryprogram="$2"
tryfailedsec="$3"
trymessage="$4"
trycount=1
onoff="$(($trycountnum + 1))"

$tryprogram
if [ $? -ne 0 ]
then
        echo "`date`: Found $trymessage failed. Number of tries: ${trycount}.  Retrying..." >> $log
	trycount=$(($trycount + 1))
        while [ $trycount -le $trycountnum ]
        do
        sleep $tryfailedsec
        $tryprogram
	        if [ $? -ne 0 ]
       		 then
	         echo "`date`: Found $trymessage failed. Number of tries: ${trycount}.  Retrying..." >> $log
	         else
	         echo "`date`: $trymessage is now good after $trycount number of tries.Ok..." >> $log
	         trycount=$(($trycountnum + 5))
	         fi
	         trycount=$(($trycount + 1))
	         done

	         if [ "$trycount" -eq "$onoff" ]
	         then
	         echo "trycount=$trycount onoff=$onoff"
	         echo "`date`: $trymessage failed for $countnum times. I assume it is down..."  >> $log
	         return 1
	         else
	         echo "trycount=$trycount onoff=$onoff"
	        return 0
	        fi
fi
}


while [ true ]; do

checkroutine "$countnum" "$controlscript" "$failedtrysec" "$trymessageservice"

if [ $? -ne 0 ]
then
$stopscript
fi


pid=$($ps $psoption | grep "$stringcontrol" | grep -v grep | awk '{print $2}')
echo $pid
if  test -n "$pid"
then
echo "`date`: control_control already running..."
else
echo "`date`: $control_control not present. Restarting..." >> $log
$control_control "$0" "$stopscript" "$controlscript" "$log" "$checkprocfreq" "$countnum" "$failedtrysec" "$trymessageservice" "$ps" "$psoption" > /dev/null 2>&1 &
fi


sleep $checkprocfreq
done
