/*
 *
 *   (C) Copyright IBM Corp. 2002, 2003
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: cluster.c
 *
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <pthread.h>

#include "fullengine.h"
#include "cluster.h"
#include "engine.h"
#include "remote.h"
#include "daemon.h"
#include "memman.h"
#include "message.h"
#include "commit.h"


static boolean callback_registered = FALSE;

uint           num_config_nodes = 0;
ece_nodeid_t * config_nodes = NULL;
node_list_t  * config_node_names = NULL;
ece_event_t  * membership = NULL;
node_list_t  * membership_names = NULL;

ece_clusterid_t clusterid = ECE_NO_CLUSTER;
ece_nodeid_t  * my_nodeid = NULL;


static void free_node_list(void * ptr) {

	 node_list_t * node_list = (node_list_t *) ptr;
	 int i;

	 for (i = 0; i < node_list->count; i++) {
		 if (node_list->node_info[i].node_name != NULL) {
			 engine_free((char *) node_list->node_info[i].node_name);
			 node_list->node_info[i].node_name = NULL;
		 }
	 }
}


int evms_get_node_list(node_search_flags_t flags,
		       node_list_t     * * node_list_ptr) {

	int i;
	node_list_t * node_list = NULL;
	node_list_t * list_to_copy;

	LOG_PROC_ENTRY();


	if (flags & ACTIVE_NODES_ONLY) {
		list_to_copy = membership_names;
	} else {
		list_to_copy = config_node_names;
	}

	if (list_to_copy == NULL) {
		*node_list_ptr = NULL;
		LOG_PROC_EXIT_INT(ENODEV);
		return ENODEV;
	}

	node_list = alloc_app_struct(sizeof(node_list_t) + list_to_copy->count * sizeof(node_info_t), free_node_list);

	if (node_list == NULL) {
		LOG_CRITICAL("Error getting memory for a node_list_t.\n");
		LOG_PROC_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	node_list->count = list_to_copy->count;

	/*
	 * Make copies of the node name strings so that the user cannot
	 * trash the ones returned by the ECE.
	 */
	for (i = 0; i < node_list->count; i++) {
		node_list->node_info[i].flags = list_to_copy->node_info[i].flags;
		node_list->node_info[i].node_name = engine_strdup(list_to_copy->node_info[i].node_name);

		if (node_list->node_info[i].node_name == NULL) {
			LOG_ERROR("Error duplicating name string for node %s.\n", list_to_copy->node_info[i].node_name);
			free_node_list(node_list);
			evms_free(node_list);
			LOG_PROC_EXIT_INT(ENOMEM);
			return ENOMEM;
		}
	}

	*node_list_ptr = node_list;

	LOG_PROC_EXIT_INT(0);
	return 0;
}


int engine_get_node_list(node_search_flags_t   flags,
			 const node_list_t * * node_list) {

	LOG_PROC_ENTRY();

	if (flags & ACTIVE_NODES_ONLY) {
		*node_list = membership_names;
	} else {
		*node_list = config_node_names;
	}

	if (*node_list != NULL) {
		LOG_PROC_EXIT_INT(0);
		return 0;

	} else {
		LOG_PROC_EXIT_INT(ENODEV);
		return ENODEV;
	}
}


int evms_set_current_node(char * new_node_name) {

	int rc = 0;
	int i;

	LOG_PROC_ENTRY();

	rc = check_engine_read_access();
        if (rc != 0) {
		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	for (i = 0; i < num_config_nodes; i++) {
		if (strcmp(new_node_name, config_node_names->node_info[i].node_name) == 0) {

			current_nodeid = &config_nodes[i];
                        local_focus = (current_nodeid == my_nodeid);
			break;
		}
	}

	if (i >= num_config_nodes) {
		engine_user_message(NULL, NULL, _("There is no node named %s in this cluster.\n"), new_node_name);
		rc = EINVAL;
	}

	LOG_PROC_EXIT_INT(rc);
	return rc;
}


const char * nodeid_to_string(const ece_nodeid_t * node) {

	int i = 0;
	const char * node_name = NULL;

	if (node != NULL) {
		if (config_node_names != NULL) {
			for (i = 0; (i < num_config_nodes); i++) {
				if (memcmp(node, &config_nodes[i], sizeof(ece_nodeid_t)) == 0) {
					node_name = config_node_names->node_info[i].node_name;
					break;
				}
			}
		}
	}

	return node_name;
}


static void add_member(const ece_nodeid_t * node) {

	int i;
	const char * add_node_name = nodeid_to_string(node);
	boolean dup_entry;

	LOG_PROC_ENTRY();
	LOG_DEBUG("Add node %s.\n", add_node_name);

	/*
	 * Check to see if we already have the new node.
	 * This can happen when we first register the callback for DELTAS.
	 * The ECE will deliver a member ship followed by the DELTA_JOIN and
	 * DELTA_LEAVE for the members.
	 */
	dup_entry = FALSE;
	for (i = 0; i < membership->num_entries; i++) {
		if (memcmp(&(membership->node[i]), node, sizeof(ece_nodeid_t)) == 0) {

			/* We already have this node. */
			dup_entry = TRUE;
			break;
		}
	}

	if (!dup_entry) {
		memcpy(&(membership->node[membership->num_entries]), node, sizeof(ece_nodeid_t));

		membership_names->node_info[membership->num_entries].flags = NODE_ACTIVE;
		membership_names->node_info[membership->num_entries].node_name = add_node_name;

		membership->num_entries++;
		membership_names->count = membership->num_entries;

		/* Mark the entry in the config_node_names as active. */
		for (i = 0; i < num_config_nodes; i++) {
			if (config_node_names->node_info[i].node_name == add_node_name) {
				config_node_names->node_info[i].flags |= NODE_ACTIVE;
				break;
			}
		}

	} else {
		LOG_WARNING("Node %s is already a member.\n", add_node_name);
	}

	LOG_DEBUG("Membership has %d nodes.\n", membership->num_entries);

	LOG_PROC_EXIT_VOID();
}


static void remove_member(const ece_nodeid_t * node) {

	int i;
	boolean found;
	const char * remove_node_name = nodeid_to_string(node);

	LOG_PROC_ENTRY();
	LOG_DEBUG("Remove node %s.\n", remove_node_name);

	for (i = 0, found = FALSE; (i < membership->num_entries) && !found; i++) {
		if (memcmp(&(membership->node[i]), node, sizeof(ece_nodeid_t)) == 0) {
			/*
			 * Delete the entry by copying all the following
			 * entries, including the empty entry after the
			 * last entry, down one slot.
			 */
			memcpy(&(membership->node[i]), &(membership->node[i+1]), sizeof(ece_nodeid_t) * (membership->num_entries - i));

			/* Similarly for the membership_names */
			memcpy(&(membership_names->node_info[i]), &(membership_names->node_info[i+1]), sizeof(node_info_t) * (membership->num_entries - i));

			/* Update the counts */
			membership->num_entries--;
			membership_names->count = membership->num_entries;
			found = TRUE;
		}
	}

	if (!found) {
		LOG_WARNING("Did not find node %s in our membership.\n", remove_node_name);
	}

	LOG_DEBUG("Membership has %d nodes.\n", membership->num_entries);

	for (i = 0; i < num_config_nodes; i++) {
		if (config_node_names->node_info[i].node_name == remove_node_name) {
			config_node_names->node_info[i].flags &= ~NODE_ACTIVE;
		}
	}

	LOG_PROC_EXIT_VOID();
}


static void new_membership_names(ece_event_t * membership) {

	int i;

	LOG_PROC_ENTRY();

	/* Clear out the current membership names. */
	for (i = 0; i < num_config_nodes; i++) {
		membership_names->node_info[i].flags &= ~NODE_ACTIVE;
		membership_names->node_info[i].node_name = NULL;
	}

	membership_names->count = 0;

	for (i = 0; i < membership->num_entries; i++) {
		membership_names->node_info[i].flags |= NODE_ACTIVE;
		membership_names->node_info[i].node_name = nodeid_to_string(&membership->node[i]);
	}

	membership_names->count = membership->num_entries;

	LOG_PROC_EXIT_VOID();
}


static void * close_engine(void * node) {

	char * node_name = (char *) node;

	char * choices[] = {"Ok"};
	int answer = 0;

	/* Let the callback thread return. */
	usleep(100000);

	evms_close_engine();

	engine_user_message(&answer, choices,
			    _("Node %s has left the cluster!  "
			    "Configuration cannot continue.  "
			    "The Engine is closed.\n"),
			    node_name);

	/* The node name was strdup()ed for this thread.  Free it. */
	engine_free(node);

	return NULL;
}


static void ece_callback(const ece_callback_class_t class,
			 const size_t               size,
			 const void               * data) {

	ece_event_t * event;
	ece_msg_t * msg;
	int i;

	LOG_PROC_ENTRY();

	switch (class) {
	case CALLBACK_MEMBERSHIP:
		event = (ece_event_t *) data;

		LOG_DEBUG("CALLBACK_MEMBERSHIP\n");
		switch (event->type) {
		case DELTA_JOIN:
			LOG_DEBUG("  DELTA_JOIN\n");

			for (i = 0; i < event->num_entries; i++) {
				add_member(&(event->node[i]));
			}
			membership->quorum_flag = event->quorum_flag;
			break;

		case DELTA_LEAVE:
			LOG_DEBUG("  DELTA_LEAVE\n");

			for (i = 0; i < event->num_entries; i++) {
				remove_member(&(event->node[i]));
				if (memcmp(&(event->node[i]), &current_nodeid, sizeof(ece_nodeid_t)) == 0) {

					/*
					 * Launch a separate thread to handle
					 * the close so that this thread can
					 * return to the ECE.
					 */
					pthread_t tid;

					pthread_create(&tid,
						       &pthread_attr_detached,
						       close_engine,
						       engine_strdup(nodeid_to_string(&(event->node[i]))));
				}
			}
			membership->quorum_flag = event->quorum_flag;
			break;

		case MEMBERSHIP:
			LOG_DEBUG("  MEMBERSHIP\n");

			engine_free(membership);
			membership = engine_alloc(size);

			if (membership != NULL) {
				memcpy(membership, event, size);

				new_membership_names(membership);
			}
			break;

		default:
			/* Don't care about other kinds of events. */
			LOG_WARNING("  Unknown message type: %#x\n", event->type);
			break;

		}
		break;

	case CALLBACK_MESSAGE:
		msg = (ece_msg_t *) data;

		LOG_DEBUG("CALLBACK_MESSAGE\n");
		if (engine_mode & ENGINE_DAEMON) {
			daemon_router(msg);
		} else {
			engine_router(msg);
		}
		break;

	default:
		/* Ignore other kinds classes of callbacks. */
		LOG_WARNING("Unknown message class: %#x\n", class);
		break;
	}

	LOG_PROC_EXIT_VOID();
	return;
}


int connect_to_ece() {

	int rc = 0;
	int i;
	u_int32_t timeout = 10;	/* Seconds to wait for membership */
	ece_nodeid_t my_node = ECE_NO_NODE;

	LOG_PROC_ENTRY();

	if (cluster_manager == NULL) {
		LOG_DETAILS("No cluster manager plug-in was loaded.\n");
		LOG_PROC_EXIT_INT(ENODEV);
		return ENODEV;
	}

	status_message(_("Connecting to the cluster manager...\n"));

	ece_funcs = cluster_manager->functions.cluster;

	rc = ece_funcs->get_clusterid(&clusterid);
	if (rc != 0) {
		LOG_WARNING("Error code %d from plug-in %s when asked for the cluster ID.\n", rc, cluster_manager->short_name);

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	rc = ece_funcs->get_num_config_nodes(&num_config_nodes);
	if (rc != 0) {
		LOG_WARNING("Error code %d from plug-in %s when asked for the number of configured nodes in the cluster.\n", rc, cluster_manager->short_name);
		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	config_nodes = engine_alloc(sizeof(ece_nodeid_t) * num_config_nodes);
	if (config_nodes == NULL) {
		LOG_CRITICAL("Error allocating memory for all the node IDs.\n");

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	rc = cluster_manager->functions.cluster->get_all_nodes(&num_config_nodes, config_nodes);

	if (rc != 0) {
		LOG_SERIOUS("Error code %d when getting a list of all the node IDs.\n", rc);

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	config_node_names = engine_alloc(sizeof(node_list_t) + sizeof(node_info_t) * num_config_nodes);
	if (config_node_names == NULL) {
		LOG_CRITICAL("Error getting memory for an array of node name pointers.\n");

		config_node_names = NULL;

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	config_node_names->count = num_config_nodes;

	for (i = 0; (i < num_config_nodes) && (rc == 0); i++) {
		uint name_size = 8;     /* Initial allocation size. */

		char * config_node_name = engine_alloc(name_size);

		if (config_node_name == NULL) {
			LOG_CRITICAL("Error getting memory for a node name.\n");

			disconnect_from_ece();

			LOG_PROC_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		/*
		 * Get the size of the node name string by calling
		 * nodeid_to_string() with a zero length.  nodeid_to_string()
		 * will set the length to the appropriate size.
		 */
		rc = cluster_manager->functions.cluster->nodeid_to_string(&config_nodes[i],
									  config_node_name,
									  &name_size);

		if (rc == ENOSPC) {
			engine_free(config_node_name);
			config_node_name = engine_alloc(name_size);

			if (config_node_name != NULL) {
				rc = cluster_manager->functions.cluster->nodeid_to_string(&config_nodes[i],
											  config_node_name,
											  &name_size);

				if (rc != 0) {
					LOG_SERIOUS("Error code %d when getting the name for a node ID.\n", rc);

					disconnect_from_ece();

					LOG_PROC_EXIT_INT(rc);
					return rc;
				}

			} else {
				LOG_CRITICAL("Error getting memory for a node name.\n");

				disconnect_from_ece();

				LOG_PROC_EXIT_INT(ENOMEM);
				return ENOMEM;
			}
		}

		config_node_names->node_info[i].node_name = config_node_name;
	}

	rc = ece_funcs->get_my_nodeid(&my_node);
	if (rc != 0) {
		LOG_WARNING("Error code %d from plug-in %s when asked for this machine's cluster node ID.\n", rc, cluster_manager->short_name);

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	for (i = 0; i < num_config_nodes; i++) {
		if (memcmp(&my_node, &config_nodes[i], sizeof(ece_nodeid_t)) == 0) {
			my_nodeid = &config_nodes[i];
			break;
		}
	}

	membership = engine_alloc(sizeof(ece_event_t) + sizeof(ece_nodeid_t) * num_config_nodes);
	if (membership == NULL) {
		LOG_CRITICAL("Error allocating memory for the cluster membership.\n");

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	membership_names = engine_alloc(sizeof(node_list_t) + (sizeof(node_info_t) * num_config_nodes));
	if (membership_names == NULL) {
		LOG_CRITICAL("Error allocating memory for the cluster membership names.\n");

		membership_names = NULL;

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	rc = ece_funcs->register_callback(DELTAS, ece_callback);
	if (rc != 0) {
		LOG_WARNING("Error code %d from plug-in %s when registering callback function.\n", rc, cluster_manager->short_name);

		disconnect_from_ece();

		LOG_PROC_EXIT_INT(rc);
		return rc;
	}

	callback_registered = TRUE;

	/*
	 * The following call to get_membership() isn't really used to get the
	 * membership.  The membership will be delivered by callbacks.  What the
	 * following loop does is wait until the ECE has the membership
	 * information.  It is quite possible at Engine startup to load the ECE
	 * plug-in and then try to send messages during discovery before the ECE
	 * gets the membership from the lower level cluster manager.  The
	 * result is an EBUSY, which fails the discovery process which in turn
	 * fails the evms_open_engine().
	 */
	evms_get_config_uint32("clustering.membership_timeout", &timeout);

	do {
		/*
		 * Use an empty event because we don't want this
		 * get_membership() to interfere with the membership while the
		 * deltas may be coming in at the same time and updating the
		 * membership.
		 */
		ece_event_t empty_event = {num_entries: 0};

		rc = ece_funcs->get_membership(&empty_event);
		if (rc == EAGAIN) {
			usleep(1000000);
			timeout--;

		} else {
			if (rc == ENOSPC) {
				rc = 0;
			}
		}

	} while ((timeout > 0) && (rc == EAGAIN));

	LOG_PROC_EXIT_INT(rc);
	return rc;
}


int disconnect_from_ece() {

	int rc = 0;

	LOG_PROC_ENTRY();

	status_message(_("Disconnecting from the cluster manager...\n"));

	if (callback_registered) {
		ece_funcs->unregister_callback(ece_callback);
		callback_registered = FALSE;
	}

	if (membership != NULL) {
		engine_free(membership);
		membership = NULL;
	}

	if (membership_names != NULL) {
		engine_free(membership_names);
		membership_names = NULL;
	}

	if (config_nodes != NULL) {
		engine_free(config_nodes);
		config_nodes = NULL;
	}

	if (config_node_names != NULL) {
		int i;

		for (i = 0; i < num_config_nodes; i++) {
			if (config_node_names->node_info[i].node_name != NULL) {
				engine_free((char *) config_node_names->node_info[i].node_name);
				config_node_names->node_info[i].node_name = NULL;
			}
		}

		engine_free(config_node_names);

		config_node_names = NULL;
	}

	num_config_nodes = 0;
	cluster_manager = NULL;
	ece_funcs = NULL;

	LOG_PROC_EXIT_INT(rc);
	return rc;
}


