/* Schedwi
   Copyright (C) 2007-2015 Herve Quatremain

   This file is part of Schedwi.

   Schedwi is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Schedwi is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
 * mod_shell.c
 * Schedwi module to execute external scripts:
 *   - when a job/jobset status changes.  These scripts can be used to
 *     signal failed jobs to a Monitoring server for instance
 *   - to check if a job can start (external constraint)
 *
 * To enable this module, add the following directive in the schedwisrv
 * configuration file:
 *
 *   MODULE_LOAD=mod_shell
 *
 * Two directives define the commands to run.  They must be defined in the
 * schedwisrv configuration file:
 *
 *   MODULE:mod_shell:CMD_CHECK=<command_and_its_parameter>
 *   MODULE:mod_shell:CMD_STATUS=<command_and_its_parameter>
 *
 * MODULE:mod_shell:CMD_CHECK defines a command (with its full path) and
 *   its parameters to call to check if a job/jobset can start.  This command
 *   should return 0 if the job/jobset can start or an other value otherwise.
 *   The job/jobset to check is provided through the SCHEDWI_JOBID and
 *   SCHEDWI_JOBPATH environment variables. The following variables are set:
 *     SCHEDWI_JOBID=<workload>_<id> (for instance SCHEDWI_JOBID=20070907_12)
 *     SCHEDWI_JOBPATH=<path_name> (for instance SCHEDWI_JOBPATH=/prod/convert)
 *     SCHEDWI_STARTEPOCH=<sec_since_epoch> (start time - as in time(2))
 *     SCHEDWI_STARTLIMIT=<minutes> (number of minutes after start time for the
 *                                  job/jobset to be started. 0 means no limit)
 *
 * MODULE:mod_shell:CMD_STATUS defines a command (with its full path) and
 *   its parameters to call when a job/jobset status has changed.  This
 *   command should return 0 on success.  The job/jobset is identified by
 *   the SCHEDWI_JOBID and SCHEDWI_JOBPATH environment variables (see above).
 *   The new (current) status of the job/jobset is provided through the
 *   SCHEDWI_STATUS_NEW environment variable and the previous status through
 *   the SCHEDWI_STATUS_OLD environment variable.  The status in
 *   SCHEDWI_STATUS_NEW and SCHEDWI_STATUS_OLD use the following keywords:
 *      WAITING
 *      RUNNING
 *      COMPLETED
 *      FAILED
 *
 * The directives can be used more than once to define several commands.
 * These commands will be run one after the other, in no particular order.
 * However, if several MODULE:mod_shell:CMD_CHECK commands are provided,
 * the check will stop as soon as a command returns a non-zero value.
 *
 * The commands are executed on the same host as the schedwisrv daemon, under
 * the same user account.  Therefore they must be executable by the
 * user/group define by the USER and GROUP directives in the schedwisrv
 * configuration file.  Also, the standard input/outputs (stdin, stdout
 * an stderr) are not available to the commands (they are closed).
 *
 * One way to compile and install the module is by copying the source file in
 * the archive src directory and to run the following commands:
 *
 * libtool --mode=compile gcc -I. -I.. -DHAVE_CONFIG_H -Wall -c mod_shell.c
 * libtool --mode=compile gcc -I. -I.. -DHAVE_CONFIG_H -Wall -c lib_functions.c
 *
 * libtool --mode=link gcc -g -o mod_shell.la \
 *       -rpath /path/to/install/dir          \
 *       -no-undefined -module -avoid-version mod_shell.lo lib_functions.lo
 *
 * libtool --mode=install install -c mod_shell.la \
 *       /path/to/install/dir/mod_shell.la
 */

#include <schedwi.h>

#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#else
#if HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if HAVE_STRING_H
#include <string.h>
#endif
#endif

#include <xmem.h>
#include <schedwi_system.h>
#include <schedwi_time.h>
#include <job_status_node.h>
#include <result_mgnt.h>
#include <job_status_state.h>


/*
 * Command structure
 */
struct cmds {
	char *cmd;
	struct cmds *next;
};

/* Check commands linked list */
static struct cmds *cmds_check  = NULL;
/* Status commands linked list */
static struct cmds *cmds_status = NULL;


/*
 * Destroy (free) the provided linked list
 */
static void
cmds_list_destroy (struct cmds *ptr)
{
	struct cmds *tmp;


	while (ptr != NULL) {
		tmp = ptr->next;
		if (ptr->cmd != NULL) {
			free (ptr->cmd);
		}
		free (ptr);
		ptr = tmp;
	}
}


/**
 * Create a new command structure
 *
 * @return The new allocated structure to be freed by the caller
 *         with cmds_list_destroy()
 */
static struct cmds*
cmds_list_new (const char *cmd)
{
	struct cmds *ptr;


	ptr = (struct cmds *) xmalloc (sizeof (struct cmds));
	ptr->cmd = xstrdup (cmd);
	ptr->next = NULL;
	return ptr;
}


/*
 * Search for a command in the provided linked list
 *
 * Return:
 *    A pointer to the command structure or
 *    NULL if not found
 */
static struct cmds*
cmds_list_search (struct cmds *head, const char *cmd)
{
	while (head != NULL && strcmp (head->cmd, cmd) != 0) {
		head = head->next;
	}
	return head;
}


/** libtool aliases for the exported symbols.  These aliases are used in order
 * to avoid symbol conflicts on some operating systems.  Each symbol
 * (schedwi_init, schedwi_exit, schedwi_conf, schedwi_status and schedwi_check)
 * must be prefixed with "modulename_LTX_" (\c modulename is the name of the
 * module - mod_shell here).
 */
#define schedwi_init mod_shell_LTX_schedwi_init
#define schedwi_exit mod_shell_LTX_schedwi_exit
#define schedwi_conf mod_shell_LTX_schedwi_conf
#define schedwi_status mod_shell_LTX_schedwi_status
#define schedwi_check mod_shell_LTX_schedwi_check



/** Module initialization function.  This function is called at loading time
 *  and allows the module to initialize its internal data.  It is called only
 *  once, typically when the schedwisrv daemon starts.
 *
 *  \retval 0     The initialization has succeeded.
 *  \retval other The function failed.  schedwisrv will print an error message
 *                and exit.
 */
//#define schedwi_init module_test_LTX_schedwi_init
int
schedwi_init ()
{
	/* Nothing to initialize */
	return 0; /* OK */
}


/** Module exit function.  This function is called just before the module is
 *  unload and can be used to free internal data (free memory, close opened
 *  files, ...)
 */
//#define schedwi_exit module_test_LTX_schedwi_exit
void
schedwi_exit ()
{
	/* Free the two linked lists */
	cmds_list_destroy (cmds_check);
	cmds_list_destroy (cmds_status);
	cmds_check = cmds_status = NULL;
}


/** Module configuration function.  This function is called during the parsing
 *  of the schedwisrv configuration file, for each directive which is intended
 *  to this module.  The format of a such directive is as follow:
 *  \verbatim MODULE:<module_name>:<key> = <value> \endverbatim
 *  \c module_name is the current module name (mod_shell)
 *  \c key is the parameter name (CMD_CHECK or CMD_STATUS).
 *  \c value is the parameter value.  See the schedwisrv configuration file
 *  (schedwisrv.conf) for more details.
 *  \warning The schedwisrv daemon reads its configuration file at startup
 *           but also when it receives the HUP signal.  This schedwi_conf
 *           function may therefore be called several times with the same
 *           parameters during the life of the daemon.
 *
 *  \param[in] key   The name of the parameter (CMD_CHECK or CMD_STATUS)
 *  \param[in] value The associated value
 *
 *  \retval 0     The provided parameter has successfully been processed.
 *  \retval other An error occurred.  schedwisrv will print an error message
 *                and exit.
 */
//#define schedwi_conf module_test_LTX_schedwi_conf
int
schedwi_conf (const char *key, const char *value)
{
	struct cmds *ptr;

	/* Check command */
	if (strcasecmp (key, "CMD_CHECK") == 0) {
		/* If not already in the list */
		if (cmds_list_search (cmds_check, value) == NULL) {
			ptr = cmds_list_new (value);
			if (ptr == NULL) {
				return -1;
			}
			ptr->next = cmds_check;
			cmds_check = ptr;
		}
		return 0; /* OK */
	}

	/* Status command */
	if (strcasecmp (key, "CMD_STATUS") == 0) {
		/* If not already in the list */
		if (cmds_list_search (cmds_status, value) == NULL) {
			ptr = cmds_list_new (value);
			if (ptr == NULL) {
				return -1;
			}
			ptr->next = cmds_status;
			cmds_status = ptr;
		}
		return 0; /* OK */
	}

	/* Unknown parameter */
	return 1;
}


/** Job/jobset status change.  Called to signal the change of the status of
 *  a job/jobset.
 *
 * \param[in] result A result_t object which contains the job workload, id, ...
 * \param[in] job_name_with_path Full name of the job (with its path)
 * \param[in] new_state New state of the job/jobset
 * \param[in] previous_state Previous state of the job/jobset
 * \param[in] duration How long (in secondes) the job was in the previous_state
 * \param[in] retry_num Retry number if the job has failed and is going to be
 *                      retried
 * \param[in] status_message Description.  For a fail job it may be the reason
 *                           why it failed. May be empty or NULL
 *
 *  \retval 0     The function has succeeded.
 *  \retval other An error occurred.  schedwisrv will log an error message.
 */
//#define schedwi_status module_test_LTX_schedwi_status
int
schedwi_status (const result_t * const result, const char *job_name_with_path,
		job_status_state new_state, job_status_state previous_state,
		long int duration, int retry_num, const char *status_message)
{
	const char *env[2];
	struct cmds *ptr;
	int ret;

	switch (previous_state) {
		case JOB_STATUS_STATE_WAITING:
			env[0] = "SCHEDWI_STATUS_OLD=WAITING";
			break;
		case JOB_STATUS_STATE_RUNNING:
			env[0] = "SCHEDWI_STATUS_OLD=RUNNING";
			break;
		case JOB_STATUS_STATE_COMPLETED:
			env[0] = "SCHEDWI_STATUS_OLD=COMPLETED";
			break;
		case JOB_STATUS_STATE_FAILED:
			env[0] = "SCHEDWI_STATUS_OLD=FAILED";
			break;
		default:
			env[0] = "SCHEDWI_STATUS_OLD=UNKNOWN";
			break;
	}

	switch (new_state) {
		case JOB_STATUS_STATE_WAITING:
			env[1] = "SCHEDWI_STATUS_NEW=WAITING";
			break;
		case JOB_STATUS_STATE_RUNNING:
			env[1] = "SCHEDWI_STATUS_NEW=RUNNING";
			break;
		case JOB_STATUS_STATE_COMPLETED:
			env[1] = "SCHEDWI_STATUS_NEW=COMPLETED";
			break;
		case JOB_STATUS_STATE_FAILED:
			env[1] = "SCHEDWI_STATUS_NEW=FAILED";
			break;
		default:
			env[1] = "SCHEDWI_STATUS_NEW=UNKNOWN";
			break;
	}

	/*
	 * Exec all the provided scripts (defined
	 * by MODULE:mod_shell:CMD_STATUS in the schedwisrv configuration file)
	 */
	ret = 0;
	for (ptr = cmds_status; ptr != NULL; ptr = ptr->next) {
		if (schedwi_system (	ptr->cmd, result->job_id,
					job_name_with_path,
					env, 2) != 0)
		{
			ret = -1;
		}
	}

	return ret;
}


/** Decide whether the provided job/jobset can start or not.  This function is
 *  called  whenever a job/jobset is ready to be started (i.e. its start time,
 *  constraint links and files are okay) to tell if it can actually start.
 *
 *  \param[in] job_details Job parameters (workload_date, job_id,
 *                         job_name_with_path, ...)
 *  \param[in] start_time  Time (secondes since epoch - see time(2)) at which
 *                         the job should start.
 *  \param[in] start_limit The number of minutes past the start time for
 *                         the job to start.
 *
 *  \retval 0     The job/jobset can start.
 *  \retval 1     The job/jobset cannot start yet (keep it waiting).
 *  \retval other An error occurred.  schedwisrv will log an error message and
 *                the job will not start yet.
 */
//#define schedwi_check module_test_LTX_schedwi_check
int
schedwi_check (	const job_status_node_ptr const job_details,
		time_t start_time,
		short int start_limit)
{
	struct cmds *ptr;
	int ret;
	char *env[2];


	/*
	 * Define some variables:
	 *    SCHEDWI_STARTEPOCH The start time (as in time(2))
	 *    SCHEDWI_STARTLIMIT The number of minutes past the start time for
	 *                       the job to start.
	 */
	env[0] = (char *) xmalloc (strlen ("SCHEDWI_STARTEPOCH") + 100);
	sprintf (env[0], "SCHEDWI_STARTEPOCH=%ld", start_time);
	env[1] = (char *) xmalloc (strlen ("SCHEDWI_STARTLIMIT") + 100);
	sprintf (env[1], "SCHEDWI_STARTLIMIT=%d", start_limit);

	for (ptr = cmds_check; ptr != NULL; ptr = ptr->next) {
		ret = schedwi_system (	ptr->cmd, job_details->workload_jobid,
					job_details->job_name_with_path,
					(const char * const *)env, 2);
		if (ret < 0 || ret == 127 || ret >= 300) {
			free (env[0]);
			free (env[1]);
			return -1; /* System error */
		}
		if (ret != 0) {
			free (env[0]);
			free (env[1]);
			return 1;  /* OK but do not start the job/jobset yet */
		}
	}
	free (env[0]);
	free (env[1]);
	return 0; /* GO! */
}

/*-----------------============== End Of File ==============-----------------*/
