/* Schedwi
   Copyright (C) 2014, 2015 Herve Quatremain

   This file is part of Schedwi.

   Schedwi is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Schedwi is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/* job_status_set.c -- Set the status of a job */

#include <schedwi.h>

#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#else
#if HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if HAVE_STRING_H
#include <string.h>
#endif
#endif

#if HAVE_STDIO_H
#include <stdio.h>
#endif

#include <module.h>
#include <sql_job.h>
#include <sql_stat.h>
#include <sql_status.h>
#include <sql_hierarchy.h>
#include <lwc_log.h>
#include <startjob.h>
#include <xmem.h>
#include <job_status_set.h>


#define SQLSTATUS_SET_BULK "INSERT INTO job_status (workload_date,job_id,status,time_status_set,wait_reason) VALUES "
#define UPDATE_STRING_LEN 100
#define MAX_PENDING_UPDATE 30000

/*
 * Error callback function for the sql_job_get_retries(), sql_status_get(),
 * sql_job_get_retries_interval(), sql_status_set(), sql_stat_success() and
 * sql_stat_fail() functions.
 */
static void
sql_get_error_logger (void *data, const char *msg, int err_code)
{
	if (msg != NULL) {
		lwc_writeLog (LOG_ERR, msg);
	}
	else {
		lwc_writeLog (LOG_ERR,
		_("Database error while trying to retrieve a job parameter"));
	}
}


/**
 * Call the user provided module and log a message giving the new job status.
 */
static void
call_module_and_log (	const result_t *result, const char *job_name_with_path,
			job_status_state new_state,
			job_status_state previous_state,
			long int duration,
			int retry_num, const char *status_message)
{
	/* Call the user provided modules */
	module_status (	result, job_name_with_path,
			new_state, previous_state,
			duration, retry_num, status_message);

	lwc_writeLog (	LOG_INFO, _("Workload %d: %s (id %lld): %s"),
			result->workload_int,
			job_name_with_path,
			result->job_id_int,
			job_status_state2str (new_state));
}


/**
 * Update the status of a job in the database and call the modules.
 *
 * @return 0 on success or -1 on error (an error message has been logged by
 *         lwc_writeLog())
 */
int
job_status_set (const result_t *result, const char *job_name_with_path,
		job_status_state new_state,
		job_status_state previous_state,
		time_t status_time, long int duration,
		int retry_num, int wait_reason, const char *status_message)
{
	if (result == NULL) {
		return 0;
	}

	if (sql_status_set (
			result->workload_int,
			result->job_id_int,
			job_status_state_status2int (new_state),
			status_time,
			retry_num,
			wait_reason,
			status_message,
			duration,
			sql_get_error_logger, NULL) != 0)
	{
		return -1;
	}

	/* Update the job_stat database table */
	if (new_state == JOB_STATUS_STATE_COMPLETED) {
		sql_stat_success (	result->job_id_int, duration,
					sql_get_error_logger, NULL);
	}
	else {
		if (new_state == JOB_STATUS_STATE_FAILED) {
			sql_stat_fail (	result->job_id_int, duration,
					sql_get_error_logger, NULL);
		}
	}

	call_module_and_log (	result, job_name_with_path,
				new_state, previous_state,
				duration, retry_num, status_message);
	return 0;
}


/**
 * Change the status of a job
 *
 * @return 0 on success or -1 on error (an error message has been logged by
 *         lwc_writeLog())
 */
int
job_change_status (	lwc_LL *hierarchy_list, const result_t *result,
			job_status_state new_state, const char *status_message)
{
	time_t now;
	int max_retries, retries, status, wait_reason;
	short int retries_interval;
	time_t last_status_set_time;
	char *job_name_with_path;


	if (lwc_getNumNode (hierarchy_list) <= 0 || result == NULL) {
		return 0;
	}
	now = time (NULL);

	/* Retrieve from the database some information about the job */
	if (sql_status_get (	result->workload_int,
				result->job_id_int,
				&status, &last_status_set_time,
				&retries, NULL, NULL, NULL,
				sql_get_error_logger, NULL) != 0)
	{
		return -1;
	}

	job_name_with_path = hierarchy_list_to_job_full_path (hierarchy_list);

	/*
	 * If the job is failed, see if retries are set.  If yes, reset the
	 * status of the job to waiting so it will be picked up again later
	 * on
	 */
	retries_interval = 0;
	wait_reason = 0;
	if (new_state == JOB_STATUS_STATE_FAILED) {
		/* Retrieve the max number of retries */
		if (sql_job_get_retries (	result->workload_int,
						hierarchy_list,
						&max_retries,
						sql_get_error_logger,
						NULL) != 0)
		{
			free (job_name_with_path);
			return -1;
		}

		/* The job is going to be retried */
		if (max_retries > retries) {
			if (sql_job_get_retries_interval (
						result->workload_int,
						hierarchy_list,
						&retries_interval,
						sql_get_error_logger,
						NULL) != 0)
			{
				free (job_name_with_path);
				return -1;
			}
			retries++;
			new_state = JOB_STATUS_STATE_WAITING;
			wait_reason = WAIT_REASON_MASK_TIME;
			lwc_writeLog (	LOG_INFO,
	_("Workload %d: %s (id %lld): Failed but is going to be retried"),
					result->workload_int,
					job_name_with_path,
					result->job_id_int);
		}
	}

	/* Update the status in the database */
	if (job_status_set (
			result,
			job_name_with_path,
			new_state, job_status_state_int2status (status),
			now + retries_interval * 60,
			(result->duration >= 0) ? result->duration
						: now - last_status_set_time,
			retries,
			wait_reason,
			status_message) != 0)
	{
		free (job_name_with_path);
		return -1;
	}
	free (job_name_with_path);
	return 0;
}


/**
 * Change the status of a job/jobset and start the job if need be.
 *
 * @return 0 on success or -1 on error (an error message has been logged by
 *         lwc_writeLog())
 */
int
job_status_force (lwc_LL *hierarchy_list, job_status_node_ptr job_details)
{
	int ret;
	result_t *result;
	char node_type;
	char *err_msg;


	if (hierarchy_list == NULL || job_details == NULL) {
		return 0;
	}

	/* If the job already has the same status then just return */
	if (job_status_has_changed (job_details) == 0) {
		return 0;
	}

	/* Get the type of the item (job or jobset) */
	node_type = hierarchy_list_get_type (hierarchy_list);

	/* Build the result object */
	result = result_new ();
	result_set_id (	result,
			job_details->workload_date, job_details->job_id);
	result_set_duration (result, job_details->duration);

	/* Set the new status in the database */
	if (job_status_set (	result,
				job_details->job_name_with_path,
				job_details->status,
				job_details->previous_status,
				job_details->time_status_set,
				job_details->duration,
				job_details->retry_num,
				job_details->wait_reason,
				job_details->message) != 0)
	{
		result_destroy (result);
		return -1;
	}
	result_destroy (result);

	/* Start the job */
	if (	   job_details->status == JOB_STATUS_STATE_RUNNING
		&& node_type == JOB)
	{
		err_msg = NULL;
		ret = startjob (hierarchy_list,
				job_details->job_name_with_path,
				job_details->workload_date,
				job_details->job_id,
				&err_msg);
		switch (ret) {
		case -1:
			lwc_writeLog (	LOG_ERR,
			_("Workload %d: %s (id %lld): failed to start"),
					job_details->workload_date,
					job_details->job_name_with_path,
					job_details->job_id);
			job_status_node_set_status (job_details,
						JOB_STATUS_STATE_FAILED, 0);
			job_status_node_set_message (job_details, err_msg);
			job_status_force (hierarchy_list, job_details);
			if (err_msg != NULL) {
				free (err_msg);
			}
			return -1;

		case 1:
			if (err_msg == NULL) {
				lwc_writeLog (LOG_ERR,
			_("Workload %d: %s (id %lld): failed to start"),
					job_details->workload_date,
					job_details->job_name_with_path,
					job_details->job_id);
			}
			else {
				lwc_writeLog (	LOG_ERR,
			_("Workload %d: %s (id %lld): failed to start: %s"),
					job_details->workload_date,
					job_details->job_name_with_path,
					job_details->job_id,
					err_msg);
			}
			job_status_node_set_status (job_details,
						JOB_STATUS_STATE_FAILED, 0);
			job_status_node_set_message (job_details, err_msg);
			job_status_force (hierarchy_list, job_details);
			if (err_msg != NULL) {
				free (err_msg);
			}
			return -1;

		case 2:
			if (err_msg != NULL) {
				free (err_msg);
			}
			job_status_node_set_status (job_details,
						JOB_STATUS_STATE_COMPLETED, 0);
			job_status_node_set_message (job_details, NULL);
			return job_status_force (hierarchy_list, job_details);

		default:
			if (err_msg != NULL) {
				free (err_msg);
			}
		}
	}
	return 0;
}


/**
 * Write the job status in the provided updates buffer to the database.
 *
 * @return 0 on success, -1 on memory allocation error and -2 on SQL error.
 *         An error message is logged in those cases.
 */
int
job_status_set_mem_flush (lwc_LL *lst)
{
	size_t len;
	char *s, *t;
	char *err_msg = NULL;
	int ret;


	len = lwc_getNumNode (lst) * (UPDATE_STRING_LEN + 2);
	if (len == 0) {
		return 0;
	}
	len += strlen (SQLSTATUS_SET_BULK);
	s = (char *) xmalloc (len);
	strcpy (s, SQLSTATUS_SET_BULK);
	lwc_rewindLL (lst);
	while ((t = (char *) lwc_nextLL (lst)) != NULL) {
		strcat (s, t);
		strcat (s, ",");
	}
	/* Remove the trailing `,' */
	s[strlen (s) - 1] = '\0';
	ret = sql_non_select (	NULL, NULL, &err_msg, NULL, NULL, NULL,
				s, SQL_END);
	free (s);
	if (ret != 0) {
		if (err_msg != NULL) {
			lwc_writeLog (LOG_ERR, err_msg);
			free (err_msg);
		}
		else {
			lwc_writeLog (LOG_ERR,
		_("Database error while trying to set jobs initial status"));
		}
		return ret;
	}
	lwc_emptyLL (lst, (void (*)(const void *))free);
	return 0;
}


/**
 * Add an entry in the updates buffer.
 *
 * @return 0 on success, -1 on memory allocation error and -2 on SQL error.
 *         An error message is logged in those cases.
 */
int
job_status_set_mem_add (lwc_LL *lst,
		const result_t *result, const char *job_name_with_path,
		job_status_state new_state,
		job_status_state previous_state,
		time_t status_time, int wait_reason)
{
	char *s;


	if (lst == NULL || result == NULL) {
		return 0;
	}
	s = (char *) xmalloc (UPDATE_STRING_LEN);
	snprintf (	s, UPDATE_STRING_LEN, "(%d,%lld,%d,%ld,%d)",
			result->workload_int,
			result->job_id_int,
			job_status_state_status2int (new_state),
			status_time,
			wait_reason);
	lwc_addEndLL (lst, s);
	call_module_and_log (	result, job_name_with_path,
				new_state, previous_state,
				0, 0, NULL);
	if (lwc_getNumNode (lst) > MAX_PENDING_UPDATE) {
		return job_status_set_mem_flush (lst);
	}
	return 0;
}


/**
 * Destroy the provided update list.  All remaining updates in the list are
 * not updated in the database and then are lost.
 */
void
job_status_set_mem_destroy (lwc_LL *lst)
{
	lwc_delLL (lst, (void (*)(const void *))free);
}

/*-----------------============== End Of File ==============-----------------*/
