#!/usr/bin/perl -w
use strict ;

=head1 sched_job

A complete network scheduling system solution

=head2 DESCRIPTION

This program take a job file an execute it.

=head2 USAGE

    Usage : sched_job [-h] [-v] [-n] [-m] [-s serial] [-e|-r id] [-c config.conf] -j job
     --help      : print this help
     --conf f    : use f for config file
     --serial s  : use serial s
     --job job   : execute job
     --nonetwork : disable connexion to sched_master
     --master-sync : send STATUS to master after each event
     --execute id: execute only selected id (don't use condition)
     --retry id :  execute starting from id

=head2 INSTALLATION

Complete installation details are in the README and README.conf files included
with the software. It works with library available on CPAN.
 - XML::Mini
 - Net::EasyTCP
 - Event

=head2 CONFIGURATION

    [job]

    ; directory where reside

    ; job file
    job_dir=/path/to/jobdir 

    ;  status information
    work_dir=/path/to/workdir
    
    ; network configuration (see Sched::Net)

    master_ip=localhost
    master_port=5544
    master_passwd=test
    master_retry=3
    db_dir=/path/to/dbdir

=head2 AUTHOR

(C) 2004-2005 Eric Bollengier

You may reach me through the contact info at eric@eb.homelinux.org

=head2 LICENSE

    sched_job, part of the network scheduling system (Sched)
    Copyright (C) 2004-2005 Eric Bollengier
        All rights reserved.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=cut



use File::Copy qw/copy/ ;
use File::Basename ;

use XML::Mini ;

use Sched ;
use Sched::Job ;
use Sched::NS ;
use Sched::Cmd ;
use Sched::Var ;
use Sched::Cond ;
use Sched::Callback ;
use Sched::Timeout ;

my $VERSION = '$Id: sched_job,v 1.1 2005/04/05 19:45:18 mcgregor Exp $' ;

$ENV{PATH} = '/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin' ;

################################################################
# traitement de la ligne de commande

sub HELP_MESSAGE
{
    print "Usage : $0 [-h] [-v] [-m] [-n] [-s serial] [-e|-r id] [-c config.conf] -j job
     --help      : print this help

     --conf f    : use f for config file
     --serial s  : use serial s
     --job job   : execute job

     --nonetwork : disable connexion to sched_master
     --master-sync : send STATUS to master after each event

     --execute id: execute only selected id (don't use condition)
     --retry id :  execute starting from id
" ;
    exit (1) ;
}


################################################################
################	Variables	        ################

my $serial = Sched::get_serial() ;	              # identifiant de session
my $file_conf = "$Sched::prefix_etc/job.cfg";	      # fichier de conf 
my $file_job ;		# fichier xml du job
my %defines ;		# liste des variables ajoutees 
my $no_network ;
my $master_sync=0 ;	# synchronise avec le master le status

my $execute ;
my $retry ;

my $job ; 		# Sched::Job 

use Getopt::Long ;

GetOptions("conf=s"   => \$file_conf,
	   "job=s"    => \$file_job,
	   "serial=s" => \$serial,
	   "define=s" => \%defines,
	   "nonetwork" => \$no_network, 
	   "execute=s" => \$execute,
	   "retry=s"  => \$retry,
	   "master-sync" => \$master_sync,
	   "help"     => \&HELP_MESSAGE) ;

if (!-r $file_conf) {
    print "E : config file is not readable\n" ;
    HELP_MESSAGE() ;
}

if ($retry and $execute) {
    print "E : --execute and --retry are incompatible\n" ;
    HELP_MESSAGE() ;
}

################################################################
# gestion des timer
use Event ;

my $count=0 ;		# nombre de task en cours
my $have_to_sync=0 ;    # si commit pendant idle 
################################################################
# parcours du graph

# Lance une serie de tache
# lanch_subtask($xml)
sub launch_subtask
{
    my ($xml) = @_ ;

    for my $t (@{ $xml->getAllChildren('task') }) {
	launch_task($t) ;
    }
}

# Lance une tache et met en place un timer pour le maxtime
# si le graph est en cours de cancel, on ne lance rien
# lanch_task($XMLtask)
sub launch_task
{
    my ($t) = @_ ;

    return if (not $job->is_running()) ;

    $Sched::log->write("D : lancement de " . $t->attribute('id'), 15) ;

    my $cmd = Sched::Cmd::get_from_sid($Sched::sid . ":" . $t->attribute('id')) ||
	new Sched::Cmd(xml => $t, serial => $serial) ;

    return if (not defined $cmd) ;

    return if ($cmd->attribute('state') ne 'new') ;

    my $r = Sched::Cond::is_ok($cmd->{xml}) ;

    my $sid = $cmd->attribute('sid') ;

    if (not defined $r) { # pas tous les resultats ou erreur de script...
	$Sched::log->write("D : Cond($sid) manque de resultats", 20) ;
	&Sched::Cond::check_all_result($cmd->{xml},
				       \&launch_task,
				       [$cmd]) ;
	return ;
    } elsif ($r == 0) {		# ERR
	$Sched::log->write("D : Cond($sid) en erreur", 20) ;
	$cmd->attribute('status', 127) ;
	$cmd->attribute('state', 'failed') ;
	$cmd->attribute('info', 'I : condition failed') ;
	Sched::Callback::call_cb("CHLD $sid") ;	
	launch_subtask($cmd->{xml}) ;
    } else {			# Ok
	$Sched::log->write("D : Cond($sid) OK", 20) ;
	$cmd->attribute('info', 'I : cond ok') ;
	$count++ ;
	$cmd->run() ;
	Sched::Timeout::add_timer($sid, $cmd->attribute('maxtime')) ;
    }
    job_sync() ;
}

sub _task_cancel
{
    my $id = shift ;
    my $cmd = Sched::Cmd::get_from_sid("$Sched::sid:$id") ;
    if (! $cmd) {
	$Sched::log->write("E : impossible de trouver $id") ; 
	return ;
    }

    $cmd->cancel() ;
    Sched::Timeout::add_timer($cmd->attribute('sid'), 5) ;
    job_sync() ;
}

sub _task_status
{
    my $id = shift ;
    my $cmd = Sched::Cmd::get_from_sid("$Sched::sid:$id") ;
    if (! $cmd) {
	# renvoyer une rponse NOT FOUND
	$Sched::log->write("E : impossible de trouver $id") ; 
	Sched::Net::send_to_master("TASK_STATUS_IS $id 127", 'keep_last') ;
	return ;
    }

    if (($cmd->attribute('state') eq "finish") or
	($cmd->attribute('state') eq "failed")) {
	my $st = $cmd->attribute('status') ;
	Sched::Net::send_to_master("TASK_STATUS_IS $id $st", 'keep_last') ;
    } else {
	$Sched::log->write("I : wait CHLD $id...", 15) ;
	Sched::Callback::add_cb("CHLD ${Sched::sid}:$id", \&_task_status, $id);
    }
}

sub _task_chld
{
    my $data = shift ;

    my $doc = new XML::Mini::Document() ;
    $doc->fromString($data) ;
    my $xml = $doc->getRoot()->getElement('task') ;

    if (!$xml) {
	 $Sched::log->write("E : impossible de charger $data") ;
	return ;
    }

    my $cmd = Sched::Cmd::get_from_sid($xml->attribute('sid')) ;

    if (!$cmd) {
	$Sched::log->write("E : impossible de trouver le sid (" . $xml->attribute('sid') . ")") ;
	return ;
    }

    $cmd->update_from_xml($xml) ;
    Sched::Timeout::del_timer($cmd->attribute('sid')) ;

    $count-- ;

    $Sched::log->write('Seems to wait network child...', 10) ;
    Sched::Callback::call_cb("CHLD " . $cmd->attribute('sid')) ;
    launch_subtask($cmd->{xml}) ;

    job_sync() ;
}

my %proto = (
	     'WATCH' => {
		 arg => undef,
		 fct => sub { 
		 	$master_sync = ! $master_sync ;
			$Sched::log->write("I : master_sync = $master_sync", 10) ;
		},
	     },

	     'PING' => {	# pas de gestion des erreurs
		 arg => undef,
		 fct => sub {
		     Sched::Net::send_to_master('OK', 'drop') ;
		 },
	     },
	     'OK' => {	# pas de gestion des erreurs
		 arg => undef,
		 fct => sub {
		 },
	     },

	     'UPDATE' => {
		 arg => undef,
		 fct => \&job_send_status,
	     },

	     'CHLD' => {
		 arg => undef,
		 fct => \&_task_chld,
	     },

	     'CANCELJOB' => {
		 arg => undef,
		 fct => sub { kill(15, $$) ; },
	     },

	     'CANCEL' => {
		 arg => "^$Sched::re_cmdid\$",
		 fct => \&_task_cancel,
	     },
	     'QUIT' => {
		 arg => undef,
		 fct => sub {
		     Sched::Net::_got_error() ;
		   },
	     },
	     'TASK_STATUS' => {
		 arg => "^$Sched::re_cmdid\$",
		 fct => \&_task_status,
	     },
	     'TASK_STATUS_IS' => {
		 arg => "^$Sched::re_cmdid [0-9]+\$",
		 fct => sub {},
	     },
	     );

# kill toutes les taches lancees (state = running)
# cancel_running_task($XML)
sub cancel_running_task
{
    my $root = shift ;
    my $ret ;

    for my $t (@{ $root->getAllChildren('task') }) {
	if ($t->attribute('state') and $t->attribute('state') eq "running") {
	    my $cmd = Sched::Cmd::get_from_sid("$Sched::sid:" . $t->attribute('id')) ||
		new Sched::Cmd(xml => $t, serial => $serial) ;

	    next if (not defined $cmd) ;

	    $cmd->cancel() ;
	    Sched::Timeout::add_timer($cmd->attribute('sid'), 5) ;
	}

	cancel_running_task($t) ;

    }

    return 0 ;
}


# Verifie qu'il n'y a pas de tache en attente de condition
# OUI -> 1
# NON -> 0
# find_waiting_task($XML)
sub find_waiting_task
{
    my $root = shift ;
    my $ret ;

    for my $t (@{ $root->getAllChildren('task') }) {

	my $state = $t->attribute('state') || '' ;

	# pas encore initialise
	if ($state eq "") {
	    return 1 ;
	}
	
	# en attente de condition
	if ($state eq "new") {
	    return 1 ;
	}

	if (find_waiting_task($t)) {
	    return 1 ;
	}
    }
    return 0 ;
}

sub job_send_status
{
    Sched::Net::send_to_master("STATUS " . $job->{xml}->toString(),
			       'keep_last');
}

sub job_sync
{
    $job->sync() ;

    $have_to_sync = 1 if ($master_sync) ;
}

################################################################
# fonction idle
sub idle_wait_func
{
    while (my $cmd = Sched::Cmd::wait()) {
	my $sid = $cmd->attribute('sid') ;
	Sched::Timeout::del_timer($sid) ;
	  
	$count-- ;
	job_sync() ;

	$Sched::log->write('Seems to wait child...', 10) ;

	# si un after_cmd est defini, on l'execute
	if (defined $cmd->attribute('after_cmd')) {
	    $Sched::log->write("I : execution after_cmd ($sid)") ;

	    my $r = Sched::Cond::eval_script($cmd->{xml}, 'after_cmd') ; 

	    $r = (defined $r)?$r:'undef' ;

	    $Sched::log->write("I : after_cmd ($sid) -> $r") ;
	    $cmd->attribute('after_cmd_status', $r) ;
	}

	Sched::Callback::call_cb("CHLD " . $sid) ;
	
	launch_subtask($cmd->{xml}) ;
    }

    if ($count == 0) {

	if (not $job->is_running()) {
	    $job->sync() ;
	    Sched::Net::send_to_master("STATUS " . $job->{xml}->toString(),
				       'keep_last');
	    Sched::Net::master_close() ;
	    
	    exit ($job->attribute('status')) ;
	}

 	if (!find_waiting_task($job->{xml})) {
 	    my $ret = 1 ;

 	    if (defined $job->attribute('cond')) {
 		$ret = Sched::Cond::is_ok($job->{xml}) ;
 	    }

	    # si la condition est ok, on sort avec le code OK (0)
	    if (not defined $ret) {
		$ret = 255 ;
	    } else {
		$ret = not $ret ; 
	    } 

 	    $job->attribute('status', sprintf('%i', $ret))  ; 
 	    $job->attribute('state', 'finish')  ; 

	    $job->sync() ;
	    job_send_status() ;
	    Sched::Net::master_close() ;

	    exit($ret) ;
	}
    }

    if ($have_to_sync) {
	$have_to_sync = 0 ;
	job_send_status() ;
    }

    return 1 ;
}

my %info_term = ( SIGINT  => "Recieve Ctrl-C (SIGINT)",
		  SIGTERM => "Recieve Term signal",
		  TIMEOUT => "running time > maxtime") ;

################################################################
# event en cas de timeout/sigterm
sub _event_term
{
    my $evt = shift ;
    $job->cancel() ;

    # status different ?
    my $r   = Sched::NS::aa('=sigterm-status=') ;
    $r = (defined $r)?$r:255 ;

    my $state = ($r == 0)?"finish":"failed" ;

    my $msg = Sched::NS::aa('=sigterm-info=') || $info_term{$evt->w->desc} ;

    $job->attribute('info', $msg) ;
    $job->attribute('status', $r) ;
    $job->attribute('state', $state) ;

    # arret des taches en cours
    cancel_running_task($job->{xml}) ;

    my $w = Event->timer(desc => "Arret du graph",
			 cb => sub {
			     job_sync() ;
			     exit($job->attribute('status')) ;
			 },
			 #after => 60) ;
			 after => 30) ;
    $w->start() ;
    $Sched::log->write("I : waiting 60s for command completion") ;
}

################################################################
# main
{
    # initialisation
    &Sched::init('job', $file_conf) ;
    &Sched::set_serial($serial) ;

    $Sched::log->write("I : Utilisation du serial : $serial", 10) ;

    # chargement du fichier job

    my $file ;

    if (not defined $file_job) {
	$Sched::log->write("E : Pas de fichier job specifie") ;
	HELP_MESSAGE() ;
    }

    my $job_dir  = Sched::cfg('job_dir') ;
    my $work_dir = Sched::cfg('work_dir') ;

    unless (-d $job_dir and -d $work_dir) {
	$Sched::log->write("E : erreur d'acces sur job_dir ($job_dir) ou work_dir ($work_dir)") ;
	exit (1) ;
    }

    if (-f $file_job) {
	$file = $file_job ;
    } elsif (-f "$job_dir/$file_job") {
	$file = "$job_dir/$file_job" ;
    } elsif (-f "$job_dir/$file_job.xml") {
	$file = "$job_dir/$file_job.xml" ;
    } else {
	$Sched::log->write("E : Impossible de charger $file_job !") ;
	exit (2) ;
    }

    $Sched::log->write("I : Chargement du fichier job $file", 10) ;
    
    my $base_file = File::Basename::basename($file) ;
    Sched::set_job_file("$work_dir/$serial/$base_file") ;

    mkdir("$work_dir/$serial") ;

    if (-r "$work_dir/$serial/$base_file")
    {
	if (Sched::Job::get_md5("$work_dir/$serial/$base_file")
	    eq
	    Sched::Job::get_md5($file)) 
	{
	    my $j = new Sched::Job(serial => $serial,
				   file  => $file) ;

	    if (!$j) {
		$Sched::log->write("E : impossible de charger $file") ;
		exit(2) ;
	    }

	    my $id = $j->attribute('id') ;

	    if (-r "$work_dir/$serial/status_${serial}_${id}") {
		$Sched::log->write("I : reprise d'une ancienne session") ;
		$base_file = "status_${serial}_${id}" ;
	    } 	    

	} else {
	    $Sched::log->write("E : mauvaise version du fichier job") ;
	    exit(2) ;
	}
    } elsif (!copy($file, "$work_dir/$serial")) {
	$Sched::log->write("E : impossible d'initialiser $file ($!)") ;
	exit(2) ;
    }

    if (!chdir("$work_dir/$serial")) {
	$Sched::log->write("E : impossible d'initialiser $file ($!)") ;
	exit(2) ;	
    }

    $job = new Sched::Job(serial => $serial,
			  file  => $base_file) ;

    if (!$job) {
	$Sched::log->write("E : Erreur dans le chargement du fichier job ($file)") ;
	unlink($file) ;
	rmdir("$work_dir/$serial") ;
 	exit (3) ;
    }

    Sched::set_jobid($job->attribute('id')) ;
    Sched::NS::set_jobid($job->attribute('id')) ;
    Sched::NS::register_id('=xml-root=', $job->{xml}) ;
    Sched::set_sid() ; 

    $job->sync() ;
    
    $Sched::log->write("I : Creation de l'espace de travail", 20) ;
    
    for my $k (keys %defines) {
	&Sched::Var::add_var($k, $defines{$k}) ;
    }

################################################################
# Initialisation du reseau
    if (!$no_network) {
	&Sched::Net::set_proto(%proto) ;
	&Sched::Net::master_connect() ;
    } else {
	$master_sync = 0 ;
	$Sched::log->write("W : newtork function will be disable") ;
    }

################################################################
# Initialisation des Event
    
    my $wait_w = Event->idle(desc => "Recuperation des task",
			     min => 5,
			     max => 120,
			     cb => \&idle_wait_func) ;
    $wait_w->start() ;
    

    my $signal_w = Event->signal(signal => 'CHLD',
				 desc => "Recuperation des Task",
				 cb => \&idle_wait_func,
				 ) ;
    $signal_w->start() ;

    my $signal_u = Event->signal(signal => 'USR1',
				 desc => "Envoi du status au master",
				 cb => \&job_send_status,
				 ) ;
    $signal_u->start() ;

    my $maxtime =$job->attribute('maxtime') || Sched::cfg('maxtime') || 1000 ;
    my $timer_w = Event->timer(desc => "TIMEOUT",
			       cb => \&_event_term, 
			       after => $maxtime) ;
    $timer_w->start() ;

    my $term_w = Event->signal(signal => 'TERM',
			       desc => "SIGTERM",
			       cb => \&_event_term) ;
    $term_w->start() ;



    my $int_w = Event->signal(signal => 'INT',
			       desc => "SIGINT",
			       cb => \&_event_term) ;
    $int_w->start() ;

    # mode de reprise
    if ($execute or $retry) {
	my $id = $execute || $retry ;

	my $r = Sched::Cmd::find_task_by_id($job->{xml}, $id) ;

	die "E : can't find $id in $file_job" if (!$r) ;

	# reset state, status
	$job->attribute('state', '') ;

	# encode(1) => MQ== 
	$r->attribute('cond','MQ==') ;

	# reset du task et les sub task (si execute)
	Sched::Cmd::reset_task($r, $execute) ;
	
	launch_task($r) ;

	# en mode execute, on ne lance que 1 task
	$job->cancel() if ($execute) ;

    } else {
	launch_subtask($job->{xml}) ;
    }

    my $ret = Event::loop();
}

# EOF
