#!/usr/bin/perl -w
use strict ;

=head1 sched_master

A complete network scheduling system solution

=head2 DESCRIPTION

sched_master have to do reporting and launch network task.

=head2 DEPENDS

   Compress::Zlib
   Crypt::RSA
   Crypt::Blowfish
   Crypt::Rijndael
   Crypt::RC6
   Crypt::DES_EDE3
   Crypt::DES
   Crypt::CipherSaber
   Crypt::Random
   Crypt::CBC

=head2 USAGE

 Usage : sched_master [-h] [-c master.cfg] [-t] [-d]
    --help      : print this help
    --conf f    : path to config file
    --test      : verify config file and exit
    --debug

=head2 CONFIGURATION

    [master]
    hist_dir=/var/lib/sched/master/hist
    job_dir=/home/eric/travail/sched/src/xml/job
    db_dir=/var/lib/sched/master/db
    dsn=dbname=sched;user=sched;password=xxx
    group=sched
    user=nobody
    
    master_passwd=xxxy
    master_port=5544
    
    view_passwd=yyyy
    view_ip=127.0.0.1

=head2 INSTALLATION

Complete installation details are in the README and README.conf files included
with the software. It works with the XML::Mini, LWP and Net::EasyTCP library
available on CPAN.

=head2 TODO
    
    ser group 
    version des clients connectes

    connexion avec la GUI web
     - verification des versions sur les clients
     - deployment d'une version (via le slave)
     - retrait d'une version (via le slave)

=head2 AUTHOR

(C) 2004-2005 Eric Bollengier

You may reach me through the contact info at eric@eb.homelinux.org

=head2 LICENSE

    sched_slave, part of the network scheduling system (Sched)
    Copyright (C) 2004-2005 Eric Bollengier All rights reserved.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=cut

my $VERSION = '$Id: sched_master,v 1.4 2005/05/04 19:37:59 mcgregor Exp $' ;

$ENV{PATH} = '/bin:/usr/bin' ;

use File::Copy qw/copy/ ;
use XML::Mini ;
use Net::EasyTCP ;
use Sched ;
use Sched::Cmd ;
use Sched::Job ;

my $server ;
my $db_dir ;

################################################################
# traitement de la ligne de commande

sub HELP_MESSAGE
{
    print "Usage : $0 [-h] [-c master.cfg] [-t] [-d]
    --help	: print this help
    --conf f	: path to config file
    --test	: verify config file and exit
    --debug
" ;
    exit (1) ;
}

use Getopt::Long ;

my $test_conf ;
my $file_conf = "$Sched::prefix_etc/master.cfg" ;
my $debug ;
GetOptions("conf=s"   => \$file_conf,
           "help"     => \&HELP_MESSAGE,
	   "test"     => \$test_conf,
	   "debug"    => \$debug) ;

if (!-f $file_conf or !-r $file_conf) {
    print "E : can't open config file ($file_conf) $!\n" ;
    HELP_MESSAGE() ;
}

################################################################
# verification de la configuration

my %conf_default = ( 'master_port' => 5544,
		     'user'  => 'nobody',
		     'group' => 'nogroup',
		     'view_ip' => '127.0.0.1');

sub valid_conf
{    
    my $ret = 1 ;

    $Sched::log->write("I : reading config file ($file_conf)") ;

    if (!$Sched::cfg->SectionExists('master')) {
	$Sched::log->write("E : can't find [master]") ;
	return 0 ;
    }
    
    # user group
    for my $k qw(master_passwd dsn)
    {
	if (!Sched::cfg($k)) {
	    $Sched::log->write("W : [master]/$k is not defined");
	    $ret = 0 ;
	} else {
	    $Sched::log->write("D : $k = " . Sched::cfg($k), 15)
		if ($test_conf) ;
	}
    }

    for my $k qw(db_dir)
    {
	if (!Sched::cfg($k)) {
	    $Sched::log->write("E : [master]/$k is not defined");
	    $ret = 0 ;
	} elsif(! -d Sched::cfg($k)) {
	    $Sched::log->write("E : [master]/$k is not a directory (" . Sched::cfg($k) . ")");
	    $ret = 0 ;
	} else {
	    $Sched::log->write("D : $k = " . Sched::cfg($k), 15)
		if ($test_conf) ;
	}
    }
	
    for my $k (keys %conf_default)
    {
	if (!Sched::cfg($k)) {
	    $Sched::log->write("W : [master]/$k is not defined");
	    $Sched::log->write("I : $k = $conf_default{$k}") ;
	    $Sched::cfg->newval('master', $k, $conf_default{$k}) ;
	} else {
	    $Sched::log->write("D : $k = " . Sched::cfg($k), 15)
		if ($test_conf) ;
	}
    }

    $Sched::cfg->WriteConfig(\*STDOUT) if ($test_conf) ;
   
    return $ret ;
}

################################################################


use DBI ;

my %req_sql = (
 'search_host_ip' => 'SELECT count(*) FROM host WHERE name=? and ip=?',
 'add_host_ip'    => 'INSERT INTO host(name,ip) VALUES (?,?)',
  
 'add_job' => 'INSERT INTO job(id, md5, host) VALUES (?,?,?)',
 'last_job_no' => 'SELECT last_value from job_job_no_seq',

 'add_task' => 'INSERT INTO task(job_no,id,host,xml) VALUES (?,?,?,?)',
 'search_task' => 'SELECT host,xml FROM task WHERE job_no=? and id=?',
 'search_job' => 'SELECT job_no,host FROM job WHERE id=? and md5=?',
 'search_task_jobid_md5' 
      => 'SELECT task.host AS host,task.xml AS xml 
	  FROM task INNER JOIN job ON job.job_no = task.job_no 
	  WHERE job.id=? and job.md5=? and task.id=?',
 'update_job_status' => 'UPDATE job_hist SET xml=? WHERE job_sid=?',
 'update_job_end_status' => 
         'UPDATE job_hist SET xml=?, end_date=?, status=? WHERE job_sid=?',
 'init_job_status' => 'INSERT INTO job_hist 
                        (job_sid, job_no, start_date, host, xml)
                       VALUES (?,?,?,?,?) ',

 'set_host_online' => 
'UPDATE host 
  SET online = ?
 WHERE name = ? and ip = ?',

 'reset_all_host_online' => 
'UPDATE host 
  SET online = 0',

 'search_host_in_hostgroup' => '
SELECT count(*) 
FROM hostgroup_member 
 INNER JOIN host on hostgroup_member.host_no = host.host_no
 INNER JOIN hostgroup on hostgroup.hostgroup_no = hostgroup_member.hostgroup_no 
 WHERE host.name = ? and hostgroup.name IN (__NB__)',

'add_task_notification' =>'
INSERT INTO task_notify (task_sid, host, date) VALUES (?, ?, now())',

'reset_task_notification' => 'DELETE FROM task_notify WHERE task_sid = ?',

'add_task_hist' => '
INSERT INTO task_hist (task_sid, job_no, date, host) 
VALUES (?, ?, now(), ?)
',

'get_task_nofication_list' => '
SELECT host 
FROM  task_notify
WHERE task_sid = ?
',

'find_running_task' => '
SELECT count(*)
FROM  task_hist 
WHERE task_sid = ? and host = ?
',

'update_task_hist' => '
UPDATE task_hist SET xml = ?, date = now()
WHERE task_sid = ?
',
	       );

my %do_sql ;

my $dbh ;

sub db_init
{
    # postgresql
    $dbh = DBI->connect("DBI:Pg:" . Sched::cfg('dsn'),'', '');
    
    if (!$dbh)
    {
	print "E : erreur de connexion\n" ;
	return 0 ;
    }

    for my $r (keys %req_sql)
    {
	$do_sql{$r} = $dbh->prepare($req_sql{$r}) ||
	    die "E : impossible d'utiliser $r : $req_sql{$r}" ;
    }

    return 1 ;
}

use POSIX qw/strftime/ ;

sub init_job_status($$$$)
{
    my ($sid, $host, $job_no, $xml) = @_ ;
    my $start_date = 'now()' ;

    if ($xml =~ /<job[^>]+start_date=([\'\"])(\d+)\1/) {
	$start_date = POSIX::strftime('%F %T', localtime($2)) ;
    } 

    my $sth = $do_sql{'init_job_status'} ;
    
    my $d = $sth->execute( $sid, $job_no, $start_date, $host, $xml);
    $sth->finish() ;
}

# sid xml job_no
sub update_job_status($$)
{
    my ($sid, $xml) = @_ ;
    my ($sth, $d) ;
    if ($xml =~ /<job[^>]+state=([\'\"])(finish|failed)\1/) {
	if ($xml =~ /<job[^>]+sync_date=([\'\"])(\d+)\1/) {
	    my $d = $2 ;
	    
	    $xml =~ /<job[^>]+status=([\'\"])(\d+)\1/ ;
	    my $s = $2 ;
	    
	    $sth = $do_sql{'update_job_end_status'} ;
	    $d = $sth->execute( $xml,
				POSIX::strftime('%F %T', localtime($d)),
				$s,
				$sid);
	} 
    } else {
	$sth = $do_sql{'update_job_status'} ;
	$d = $sth->execute( $xml, $sid);
    }
    $sth->finish() ;
    return $d ;
}

# host, 'hostgroup1, hostgroup2'
sub search_host_in_hostgroup
{
    my $host = shift ;

    my $sql = $req_sql{search_host_in_hostgroup} ;
    my $arg = join(',', map { '?' } @_) ;

    $sql =~ s/__NB__/$arg/ ;
    my $sth = $dbh->prepare($sql) 
	|| return 0 ;

    $sth->execute($host, @_);
    my @row = $sth->fetchrow_array ;

    $sth->finish() ;

    return $row[0] ;
}

#
sub reset_all_host_online
{
    my $sth = $do_sql{'reset_all_host_online'} ;
    $sth->execute( @_ );    

    $sth->finish() ;
}

# val, name, ip
sub set_host_online
{
    my $sth = $do_sql{'set_host_online'} ;
    $sth->execute( @_ );

    $sth->finish() ;
}

# id, md5
sub search_job($$)
{
    my $sth = $do_sql{'search_job'} ;
    my $d = $sth->execute( @_ );
    my ($job,$host) = $sth->fetchrow_array ;

    $sth->finish() ;

    return ($job,$host) ;
}

# job_sid, id, host, xml
sub add_task($$$$)
{
    my $sth = $do_sql{'add_task'} ;
    my $d = $sth->execute( @_ );

    $sth->finish() ;
}

# id, md5, hostlist
sub add_job($$$)
{
    my $sth = $do_sql{'add_job'} ;
    my $d = $sth->execute( @_ );
    #my $last = $dbh->{mysql_insertid} ;
    #if (!$last) {		# postgresql
    $sth = $do_sql{'last_job_no'} ;
    $sth->execute() ;
    my ($last) = $sth->fetchrow_array ;
    $sth->finish() ;
    #}
    return $last ;
}

# job.id, job.md5, task.id
sub search_task2($$$)
{
    my $sth = $do_sql{'search_task_jobid_md5'} ;
    my $d = $sth->execute( @_ );

    my $hash_ref ;

    if ($d) {
	$hash_ref = $sth->fetchrow_hashref;
    }

    $sth->finish() ;

    return $hash_ref ;
}

# job.sid, task.id
sub search_task($$)
{
    my $sth = $do_sql{'search_task'} ;
    my $d = $sth->execute( @_ );

    my $hash_ref ;

    if ($d) {
	$hash_ref = $sth->fetchrow_hashref;
    }

    $sth->finish() ;
    
    return $hash_ref ;
}

# name, ip
sub search_host_ip($$)
{
    my $sth = $do_sql{'search_host_ip'} ;
    $sth->execute( @_ );
    my @row = $sth->fetchrow_array ;

    $sth->finish() ;

    return ($row[0] ne 0) ;
}

# name, ip
sub add_host($$)
{
    my $sth = $do_sql{'add_host_ip'} ;
    $sth->execute( @_ );

    $sth->finish() ;
}

# task_sid, host
sub add_task_notification($$)
{
    my $sth = $do_sql{'add_task_notification'} ;
    unless ($sth->execute( @_ )) {
	$Sched::log->write("E : can't add notification $_[0] ($sth->errstr)") ;
    }
    $sth->finish() ;
}

# task_sid
sub reset_task_notification($)
{
    my $sth = $do_sql{'reset_task_notification'} ;
    unless ($sth->execute( @_ )) {
       $Sched::log->write("E : can't reset notification $_[0] ($sth->errstr)");
    }
    $sth->finish() ;
}

# task_sid, job_no, host
sub add_task_hist($$$)
{
    my $sth = $do_sql{'add_task_hist'} ;
    unless ($sth->execute( @_ )) {
       $Sched::log->write("E : can't add task_hist $_[0] ($sth->errstr)");
    }
    $sth->finish() ;
}

# task_sid, host
sub have_lanch_task($$)
{
    my $sth = $do_sql{'find_running_task'} ;

    $sth->execute( @_ );

    my ($count)  = $sth->fetchrow_array ;

    $sth->finish() ;
    
    return $count ;
}

# xml,task_sid
sub update_task_hist($$)
{
    my $sth = $do_sql{'update_task_hist'} ;
    unless ($sth->execute( @_ )) {
	$Sched::log->write("E : can't update $_[1] status ($sth->errstr)") ;
    }
    $sth->finish() ;
}

################################################################

my %wait_to_send ;

my %_master_cmd_list ;

# $socket->serial() => hostname
my %_master_serial ; 

# $host|$id	=> $serial
my %_master_conn ;

# $host|$id	=> $job_no
my %_master_job ;

# host => $serial => $socket
sub get_socket($)
{
    my $host = shift ;

    return undef if (!defined $_master_conn{$host}) ;

    my $serial = $_master_conn{$host} ;
    return get_socks_from_serial($serial) ;
}

# $serial => $socket
sub get_socks_from_serial($)
{
    my ($serial) = @_ ;

    for my $c ($server->clients()) {
	return $c if ($c->serial() eq $serial) ;
    }
    return undef ;
}

sub _master_cmd
{
    my ($host, $id, $simulate) = @_ ;

    my ($s, $h, $j) = split(/:/, $host) ;

    my $rep = search_task($_master_job{$host}, $id) ; 

    if (!defined $rep) {
	$Sched::log->write("E : error while searching $host $id");
	send_to($host, "CHLD <task sid='$host:$id' id='$id' status='255' state='failed' info='chargement impossible du fichier sur le master'/>") ;
	return ;
    }

    my $target = $rep->{host} ;

    my $task_sid = "$host:$id" ;

    my $cmd = new Sched::Cmd(serial => $s,
			     data => $rep->{xml},
			     sid => $task_sid) ;
    if (!$cmd) {
	$Sched::log->write("E : error while creating Sched::Cmd from ($id)");
	send_to($host, "CHLD <task sid='$task_sid' id='$id' status='255' state='failed' info='impossible de parser $s:$id'/>") ;
	return ;			     
    }

    my $command = Sched::Cmd::to_xml($cmd) ;
    $cmd->dispose() ;

    if ($simulate) {
	send_to($target, "SIM $command") ;
    } else {
	send_to($target, "CMD $command") ;
    }

    add_task_hist($task_sid, $_master_job{$host}, $target) ;
    add_task_notification($task_sid, $host) ;

    # on note que $host souhaite avoir un retour de sa commande
    #push @{$_master_cmd_list{"$target:$task_sid"}}, $host ;    
}

sub _master_cmd_sim
{
    _master_cmd(@_, 1) ;
}

END {
    if ($dbh) {
	$dbh->disconnect() or print $dbh->errstr ;
    }
}

# expression reguliere pour valider les echanges

my $re_job = '^[\w\d\.-]+:[\w\d\.-]+:[\w\d\.-]+$' ;
my $re_task = '^([\w\d\.-]+:[\w\d\.-]+:[\w\d\.-]+):([\w\d\.-]+)$' ;
my $re_slave = '^[\w\d\.-]+$' ;
my $re_register = '^[\w\d\.-]+:[\w\d\.-]+:[\w\d\.-]+\s+[\w\d]+$' ;
my $re_view = '^[\w\d_\.-]+$' ;

my %proto ;

my %proto_view = (
	'UPDATE' => {
		arg => $re_job,
		fct => sub {
			my ($host, $arg) = @_ ;
			my $j = get_socket($arg) ;
			$j->send('UPDATE') if ($j) ;
		},
	    },
	'WATCH' => {
		arg => $re_job,
		fct => sub {
			my ($host, $arg) = @_ ;
			my $j = get_socket($arg) ;
			$j->send('WATCH') if ($j) ;
		},
	    },
	'CANCEL' => {
		arg => $re_task,
		fct => sub {
			my ($host, $arg) = @_ ;
			$arg =~ $re_task ;
			my $j = get_socket($1) ;
			$j->send("CANCEL $2") if ($j) ;
		},
	    },
	'CANCELJOB' => {
		arg => $re_job,
		fct => sub {
			my ($host, $arg) = @_ ;
			my $j = get_socket($arg) ;
			$j->send('CANCELJOB') if ($j) ;
		},
	    },
	'QUIT' => {
		arg => undef,
		fct => sub {
			#my ($host, $arg) = @_ ;
			#my $j = get_socket($host) ;
			#$j->close() if ($j) ;
		},
	},
	) ;

# protocol reseau master <-> slave
my %proto_slave = (
		   'PING' => {	# pas de gestion des erreurs
		       arg => undef,
		       fct => sub {
			   my ($host, $arg) = @_ ;
			   my $slave = get_socket($host) ;
			   $slave->send('OK') if ($slave) ;
		       },
		   },
		   		   
		   'CHLD' => {
		       from => undef,
		       arg => undef,
		       fct => \&_master_chld,
		   },
		   'NOP' => {
		       arg => undef,
		       fct => sub {
		       },
		   },
		   'OK' => {
		       arg => undef,
		       fct => sub {
		       },
		   },
		   ) ;

# protocol reseau master <-> job
my %proto_job = (
		 'PING' => {	# pas de gestion des erreurs
		     arg => undef,
		     fct => sub {
			 my ($host, $arg) = @_ ;
			 my $slave = get_socket($host) ;
			 $slave->send('OK') if ($slave) ;
		     },
		 },
		 'CANCEL' => {
		     arg => '^[\w\d_\.-]+$',
		     fct => \&_master_cancel,
		 },

		 'STATUS' => {
		     arg => undef,
		     fct => sub {
			 my ($host, $arg) = @_ ;
			 
			 my ($s, $h, $j) = split(/:/, $host) ;

			 update_job_status($host, $arg) ;
		     },
		 },
		 'CMD' => {
		     arg => '^[\w\d_\.-]+$',
		     fct => \&_master_cmd,
		 },
		 'SIM' => {
		     arg => '^[\w\d_\.-]+$',
		     fct => \&_master_cmd_sim,
		 },
		 'NOP' => {
		     arg => undef,
		     fct => sub {
		     },
		 },
		 'OK' => {
		     arg => undef,
		     fct => sub {
		     },
		 },
		 'GET_TASK_STATUS' => {
		     arg =>  $re_task,
		     fct => sub {
			 # trouver le bon destinataire
			 # envoyer un TASK_STATUS
			 # ajouter un callback sur TASK_STATUS_IS
		     },
		 },
		 'TASK_STATUS_IS' => {
		     arg =>  '^[\w\d\.-]+ \d+$',
		     fct => sub {
			 # trouver le SID
			 # declencher le callback
		     },
		 },
		 
		 );

# protocol reseau master <-> ?
my %proto_init = (
		  'REGISTER' => {
		      arg => $re_register,
		      fct => \&_master_register,
		  },
		  
		  'HELO' => {
		      arg => $re_slave,
		      fct => \&_master_helo,
		  },
		  'EHLO' => {
		      arg => '^[\w\d_\.-]+$',
		      fct => \&_master_ehlo,
		  },
		  
		  ) ;
# TODO :
# o il faut envoyer les commandes que si le job
#   qui la soumet est la. sinon on stocke.
# o il faudrait pouvoir annuler un ordre
#   si c'est un CANCEL et que le CMD est pas encore
#   envoy, il faut le supprimer et renvoyer un message
#   d'erreur au job

# envoi une commande a un host, ou bien on la spool
sub send_to
{
    my $host = shift ;
    my $what = shift ;
    
    my $mode = shift || 'keep' ;

    my $slave = get_socket($host) ;
    if ((!defined $slave) or (!$slave->send($what))) {

	if (!defined $wait_to_send{$host}) {
	    _init_tie($host) ;
	}

	my $wts = $wait_to_send{$host} ;
	
	if ($mode eq 'drop') {
	    # on ne fait rien
	    return ;

	} elsif ($mode eq 'keep_last') {

	    # on ne prend que la derniere commande
	    # STATUS, UPDATE, WATCH...

	    my ($cmd, undef) = split(/\s+/, $what, 2) ;

	    # on parcourt la liste (a l'envers) et suppression
	    # des commandes d'avant

	    for (my $i = $#{$wts} ; $i >= 0 ; $i--) {
		if (   (${$wts}[$i]) 
		    && (${$wts}[$i] =~ /^$cmd\b/)
		   ) 
		{
		    splice(@{$wts}, $i, 1) ;
		}
	    }

	    push @{$wts}, $what ;

	} elsif ($mode eq 'keep') {

	    # on garde la commande
	    push @{$wts}, $what ;

	} elsif ($mode eq 'cancel') {

	    # on efface toutes les commandes $arg
	    # et si on trouve rien, on ajoute la nouvelle
	    # commande a la fin

	    my $push = 1 ;
	    my $arg = shift || '' ;
	    
	    for (my $i = $#{$wts} ; $i >= 0 ; $i--) {
		if (${$wts}[$i] eq $arg) {
		    splice(@{$wts}, $i, 1) ; ;
		    $push = 0 ;
		}
	    }

	    if ($push) {
		push @{$wts}, $what ;
	    }

	} else {

	    $Sched::log->write("E : send_to : mode $mode is unknow") ;

	}
    }
}

sub _init_tie
{
    my ($host) = @_ ;

    my @a ;
    tie @a,  'Tie::File', "$db_dir/$host", recsep => '' ;
    $wait_to_send{$host} = \@a ;
}

sub reconnect
{
    my ($host) = @_ ;

    my $slave = get_socket($host) ;
    return if (!$slave) ;

    if (! defined $wait_to_send{$host}) {
	return if (! -f "$db_dir/$host") ;
	_init_tie($host) ;
    }

    while (my $what = shift @{$wait_to_send{$host}}) {

	if (!$slave->send($what)) {
	    unshift @{$wait_to_send{$host}}, $what ;
	    $slave->close() ;
	    return ;
	}
    } 

    untie @{ $wait_to_send{$host} } ;
    delete $wait_to_send{$host} ;
    unlink ("$db_dir/$host") ;
}

sub ip_match_host($$)
{
    my ($ip, $host) = @_ ;
    my ($nb) = search_host_ip($host, $ip) ;
    return $nb ;
}

# il faut verifier que le job vient du bon host...
#
# TODO : 
#   - ajouter dans task_hist au lancement
#   - verifier au retour que cela provient bien
#     de la bonne machine
#   - stocker le resultat dans la table
#   - notifier tous les job qui attendent le resultat
# notification de fin d'un job

sub _master_chld
{
    my ($host, $data) = @_ ;

    my $xml = xml_load('task', $data) ;

    return if (!defined $xml) ;

    my $sid = $xml->attribute('sid') || ''; 
    
    if (! have_lanch_task($sid, $host)) {
	$Sched::log->write("W : $host claims to have launch $sid") ;
	return ;
    }

    my $sth = $do_sql{'get_task_nofication_list'} ;

    $sth->execute( $sid );

    while ( my ($target)  = $sth->fetchrow_array) {
	send_to($target, "CHLD $data") ;
    } 

    $sth->finish() ;

    update_task_hist($data, $sid) ;
    reset_task_notification($sid) ;
}

sub _master_register
{
    my ($client, $s_h_j_md5) = @_ ;

    my ($s_h_j, $md5) = split(/\s+/, $s_h_j_md5) ; ;

    my ($serial, $host, $job) = split(/:/, $s_h_j) ;   

    if (!ip_match_host($client->remoteip(),
		       $host))
    {
	$Sched::log->write("E : host ip don't match") ;
	$client->send('ERR') ;
	$client->close() ;
	return ;
    }

    # TODO : voir pour le client->serial
    if ( (exists $_master_serial{$client->serial()}) ||
	 (exists $_master_conn{$s_h_j})
       ) 
    {
	$Sched::log->write("E : client have already subscribe $serial") ;
	$client->send('ERR') ;
	$client->close() ;

	$client = get_socket($s_h_j) ;
	$client->send('PING') ;

	return ;
    }
    
    my ($job_no,$job_host) = search_job($job, $md5) ;
    
    if (! $job_no) {
	$Sched::log->write("E : $job is not registred") ;
	$client->send('ERR') ;
	$client->close() ;
	return ;
    }

    if ($job_host !~ /(^|,)$host(,|$)/i) {
	my @hostgroup = split(/\s*,\s*/, $job_host) ;
	if (!search_host_in_hostgroup($host, @hostgroup)) {
	    $Sched::log->write("E : $host can't load $job") ;
	    $client->send('ERR') ;
	    $client->close() ;
	    return ;			     	
	}
    }

    init_job_status($s_h_j, $host, $job_no, '') ;
	
    $client->send('OK') ;

    $_master_serial{$client->serial()} = $s_h_j ;
    $_master_conn{$s_h_j} = $client->serial() ;
    $_master_job{$s_h_j} = $job_no ;

    $Sched::log->write("I : [" . $client->serial() .
		       "] job ($s_h_j) have successfully registred") ;

    reconnect($s_h_j) ;
}

sub xml_load($$)
{
    my ($what, $file) = @_ ;

    my $xml = new XML::Mini::Document() ;
    $xml->parse($file) ;
    $xml = $xml->getRoot()->getElement($what) if ($xml) ;

    return $xml ;
}

my $_counter = 0 ;
sub _master_ehlo
{
    my ($client, $pwd) = @_ ;

    if ($client->remoteip() ne Sched::cfg('view_ip')) 
    {
	$Sched::log->write("E : view ip don't match") ;
	$client->send('ERR') ;
	$client->close() ;
	return ;
    }    

    if ($pwd ne Sched::cfg('view_passwd'))
    {
	$Sched::log->write("E : view mdp don't match") ;
	$client->send('ERR') ;
	$client->close() ;
	return ;
    }

    $client->send('OK') ;

    $_counter++ ;
    my $id = "view $_counter" ;

    $_master_serial{$client->serial()} = $id;
    $_master_conn{$id} = $client->serial() ;

    $Sched::log->write("I : [" . $client->serial() . 
		       "] view ($id) have successfully registred") ;
}


sub _master_helo
{
    my ($client, $host) = @_ ;

    if (!ip_match_host($client->remoteip(),
		       $host))
    {
	$Sched::log->write("E : host ip don't match") ;
	$client->send('ERR') ;
	$client->close() ;
	return ;
    }

    if (defined $_master_conn{$host}) 
    {
	$Sched::log->write("E : [$host] have already register") ;
	$client->send('ERR') ;
	$client->close() ;

	$client = get_socket($host) ;
	$client->send('PING') ;

	return ;
    }

    set_host_online(1, $host, $client->remoteip()) ;

    $client->send('OK') ;
    
    $_master_serial{$client->serial()} = $host ;
    $_master_conn{$host} = $client->serial() ;

    $Sched::log->write("I : [" . $client->serial() . 
		       "] slave ($host) have successfully registred") ;
    
    reconnect($host) ;
}

sub _master_cancel
{
    my ($host, $id) = @_ ;

    my ($s, $h, $j) = split(/:/, $host) ;

    my $rep = search_task($_master_job{$host}, $id) ; 

    if (!defined $rep) {
	$Sched::log->write("E : CANCEL $id impossible (error while loading $j:$id)") ;
	return ;
    }

    my $target = $rep->{host} ;
    
    send_to($rep->{host}, "CANCEL $host:$id") ;

    ## en attendant la memoire entre 2 redemarrage
    #push @{$_master_cmd_list{"$target:$host:$id"}}, $host 
    #   if (not defined $_master_cmd_list{"$target:$host:$id"}) ;

    # Plusieurs cas possible
    #  - job qui arrete un autre job
    #  - job qui arrete une task d'un job
    #  - job qui arrete une task d'un slave
    #  - cancel d'un cmd pas encore sur le slave

    #  - gui qui arrete une task d'un slave
    #  - gui qui arrete une task d'un job
    #  - gui qui arrete un job
}

sub _connected() {
    my $client = shift;
    my $serial = $client->serial();

    my $oldfh = select($client->socket()); $| = 1; select($oldfh);

    $proto{$serial} = \%proto_init ;
}

sub _disconnected() {
    my $client = shift;
    my $serial = $client->serial();

    if (defined $_master_serial{$serial}) {
	my $host = $_master_serial{$serial} ;
	delete $_master_serial{$serial} ;
	delete $_master_conn{$host} ;

	if (defined $_master_job{$host}) {
	    # job
	    delete $_master_job{$host} ;
	} else {
	    # host
	    set_host_online(0, $host, $client->remoteip()) ;
	}
    }

    delete $proto{$serial} ;
    $Sched::log->write("I : $serial disconnected", 10) ;
}

sub _gotdata_master
{
    my $client = shift;
    my $serial = $client->serial();
    my $data = $client->data();

    if (!$data) {
	$client->close() ;
	return ;
    }

    print localtime() . " DEBUG : $serial : $data\n" if ($debug) ;

    $Sched::log->write("I : [$serial] Data from " 
		       . $client->remoteip() 
		       . " E=" . ($client->encryption()  || '')
		       . " C=" . ($client->compression() || ''),
		       20) ;
    
    my ($cmd, $arg) = split(/\s+/, $data, 2) ;

    if (!defined $proto{$serial}) {
	$Sched::log->write("E : can't find protocol for $serial") ;
	$client->close() ;
	return ;
    }

    my %p = %{$proto{$serial}} ;

    if (defined $p{$cmd}) {
        if ((!$p{$cmd}->{arg}) || ($arg =~ $p{$cmd}->{arg})) {
	    if (($cmd eq 'REGISTER') || ($cmd eq 'HELO') || ($cmd eq 'EHLO')) {
		
		$p{$cmd}->{fct}($client,
				$arg) ;

		$proto{$serial} = \%proto_slave if ($cmd eq  'HELO') ;
		$proto{$serial} = \%proto_view  if ($cmd eq  'EHLO') ;
		$proto{$serial} = \%proto_job   if ($cmd eq 'REGISTER') ;

	    } else {
		my $host = $_master_serial{$serial} ;
		$p{$cmd}->{fct}($host,
				$arg) ;
	    }
        } else  {
	    $Sched::log->write("W : error in command ($cmd $arg)", 10) ;
            $client->send("ERR dans la commande ($cmd $arg)") ;
        }
    } else {
	$Sched::log->write("W : unknow command ($cmd)", 10) ;
        $client->send("ERR commande inconnue ($cmd)") ;
    }
}

################################################################
{
    &Sched::init('master', $file_conf) or die "E : error of configuration" ;
    valid_conf() || die "E : Too many error during config verification" ;

    exit 0 if ($test_conf) ;

    ############ DROP ROOT USER ####################################

    my $gid = getgrnam(Sched::cfg('group')) ;

    if ($> == 0) {
	
	my $uid = getpwnam(Sched::cfg('user')) ;
	
	if ( not defined $uid) {
	    die "E : Impossible d'utiliser ". Sched::cfg('user') . " ($!)";
	}

	$> = $< =  $uid ;
    }
    
    $( = $) =  $gid if (defined $gid) ;

    ################################################################

    db_init() ;

    reset_all_host_online() ;

    # on ne remplace pas les expressions de temps
    $Sched::Var::do_strftime = 0 ;

    $db_dir   = Sched::cfg('db_dir') ;

    $server = new Net::EasyTCP( mode =>      "server",
                                port =>      Sched::cfg('master_port'),
                                password =>  Sched::cfg('master_passwd'),
				welcome => "Sched master",
				donotencryptwith => [],
				donotcompresswith => [],
				#donotcompress => 1,
				#donotencrypt => 1,
				) ;
    if (!$server) {
        $Sched::log->write("E : server initialisation error ($@)") ;
        exit(1) ;
    }

    $server->setcallback(data       => \&_gotdata_master,
                         connect    => \&_connected,
                         disconnect => \&_disconnected) ;

    $Sched::log->write("I : server ok", 10) ;

    $SIG{TERM} = $SIG{INT} = sub { 
	$Sched::log->write("I : server is exiting...", 10) ;
         
	$server->stop() ; 
	for my $c ($server->clients()) {
	    if ($c) {
		$c->send('QUIT') ;
		$c->close() ;
	    }
	}
	reset_all_host_online() ;
	sleep(2) ; 
	exit(0) ; 
    }; 

    $server->start() ;

}

# EOF

