/*
 * Portions of this file Copyright 1999-2005 University of Chicago
 * Portions of this file Copyright 1999-2005 The University of Southern California.
 *
 * This file or a portion of this file is licensed under the
 * terms of the Globus Toolkit Public License, found at
 * http://www.globus.org/toolkit/download/license.html.
 * If you redistribute this file, with or without
 * modifications, you must include this notice in the file.
 */


/*
 * This file implements the globus_duroc_control_t type and its support routines.
 *
 * globus_duroc_control_t is instantiated to perform DUROC job-control operations
 * the public interface API is defined in globus_duroc_control.h
 *
 * these routines are used by globus_duroc_control.c
 */

#include "nexus.h"

#include "globus_gram_client.h"

#include "subjob.h"
#include "job_monitor.h"
#include "control.h"

#include "globus_i_duroc_common.h"

#include <assert.h>
#include <stdio.h>



typedef struct globus_duroc_control_i_deferral_s {
  char * globus_gram_contact;
  int state;
  int errorcode;
} globus_duroc_control_i_deferral_t;


static
void s_checkin_msg_handler (nexus_endpoint_t * endpointp,
			    nexus_buffer_t   * bufferp,
			    nexus_bool_t       is_non_threaded_handler);


nexus_handler_t s_checkin_handlert[] =
{
  {
    NEXUS_HANDLER_TYPE_NON_THREADED,
    (nexus_handler_func_t) s_checkin_msg_handler
  }
};
/* KEEP THIS TABLE CONSISTENT WITH globus_duroc_runtime.h :
 * CHECKIN_MSG_ID = 0 */

#define CHECKIN_HANDLERT_SIZE 1


/* KEEP THESE DEFINITIONS CONSISTENT WITH duroc-runtime.c */

#define RUN_MSG_ID 0
#define DIE_MSG_ID 1


static int
s_subjob_checkin_port_init (globus_duroc_control_checkin_port_t * portp,
			    globus_duroc_control_t              * controlp)
{
  int err;

  assert (portp!=NULL);

  err = nexus_endpointattr_init (&(portp->epattr)); assert (!err);

  err = nexus_endpointattr_set_handler_table (&(portp->epattr),
					      s_checkin_handlert,
					      CHECKIN_HANDLERT_SIZE);
  assert (!err);

  err = nexus_endpoint_init (&(portp->ep),
			     &(portp->epattr));
  assert (!err);

  nexus_endpoint_set_user_pointer (&(portp->ep),
				   (void *) controlp);
  
  err = nexus_startpoint_bind (&(portp->sp),
			       &(portp->ep));
  assert (!err);

  return 0;
}

static void
s_subjob_checkin_port_destroy (globus_duroc_control_checkin_port_t * portp)
{
  GLOBUS_IGNORE portp;

  /* don't destroy for now */
}


static int
s_deferral_pred (void * deferral,
		 void * contact)
{
  return utils_streq (((globus_duroc_control_i_deferral_t *) deferral)->globus_gram_contact,
		      (char *) contact);
}


static void
s_subjob_callback_func (void * user_arg,
			char * subjob_contact,
			int    state,
			int    errorcode)
{
  int err;
  globus_duroc_control_t * controlp;
  globus_duroc_job_monitor_t *job_monitorp;
  globus_duroc_subjob_t *subjobp;

  controlp = ((globus_duroc_control_t *) user_arg);
  assert (controlp!=NULL);

  err = globus_duroc_control_i_job_lookup_by_gram (controlp,
						   subjob_contact,
						   &job_monitorp);
  assert (!err);

  if ( job_monitorp != NULL ) {
    err = globus_duroc_control_i_subjob_lookup_by_gram (job_monitorp,
							subjob_contact,
							&subjobp);
    assert (!err);
  }

  if ( (job_monitorp != NULL)
       && (subjobp != NULL) ) {
    globus_duroc_control_i_subjob_state_update (controlp,
					 job_monitorp, 
					 subjobp, 
					 state, errorcode);

    globus_duroc_control_i_subjob_release (controlp, job_monitorp, &subjobp);
    globus_duroc_control_i_job_monitor_release (controlp, &job_monitorp);
  }
  else {
    err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

    if ( controlp->open_globus_gram_jobs > 0 ) {
      /* we must defer this state update */
      globus_list_t * existing_deferral_node;
      globus_duroc_control_i_deferral_t * deferralp;

      utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		   "\nsubjob state update deferred:\n"
		   "subjob GRAM contact >>%s<<\n"
		   "state %d  errorcode %d\n\n",
		   subjob_contact, state, errorcode);

      existing_deferral_node = globus_list_search_pred (controlp->deferrals,
						      s_deferral_pred,
						      (void *) subjob_contact);
      if ( existing_deferral_node != NULL ) {
	/* update existing deferral */
	deferralp = ((globus_duroc_control_i_deferral_t *)
		     globus_list_first (existing_deferral_node));
	
	deferralp->state = state;
	deferralp->errorcode = errorcode;
      }
      else {
	deferralp = ((globus_duroc_control_i_deferral_t *)
		     globus_malloc (sizeof (globus_duroc_control_i_deferral_t)));
	assert (deferralp!=NULL);
	
	deferralp->globus_gram_contact = utils_strdup (subjob_contact);
	deferralp->state = state;
	deferralp->errorcode = errorcode;
	
	err = globus_list_insert (&(controlp->deferrals),
				  (void *) deferralp);
	assert (!err);
      }
    }

    err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);
  }
}


int 
globus_duroc_control_activate (void)
{
  if ( globus_module_activate (GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS ) 
    goto activate_common_module_error;

  if ( globus_module_activate (GLOBUS_THREAD_MODULE) != GLOBUS_SUCCESS )
    goto activate_thread_module_error;

  if ( globus_module_activate (GLOBUS_NEXUS_MODULE) != GLOBUS_SUCCESS )
    goto activate_nexus_module_error;

  if ( globus_module_activate (GLOBUS_DUCT_CONTROL_MODULE) != GLOBUS_SUCCESS )
    goto activate_duct_control_module_error;

  if ( globus_module_activate (GLOBUS_GRAM_CLIENT_MODULE) != GLOBUS_SUCCESS )
    goto activate_globus_gram_client_module_error;

  return GLOBUS_SUCCESS;

activate_globus_gram_client_module_error:
  globus_module_deactivate (GLOBUS_DUCT_CONTROL_MODULE);

activate_duct_control_module_error:
  globus_module_deactivate (GLOBUS_NEXUS_MODULE);

activate_nexus_module_error:
  globus_module_deactivate (GLOBUS_THREAD_MODULE);
  
activate_thread_module_error:
  globus_module_deactivate (GLOBUS_COMMON_MODULE);

activate_common_module_error:
  return GLOBUS_FAILURE;
}

int
globus_duroc_control_deactivate (void)
{
  int rc;

  rc = GLOBUS_SUCCESS;

  if ( globus_module_deactivate (GLOBUS_GRAM_CLIENT_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_DUCT_CONTROL_MODULE) != GLOBUS_SUCCESS)
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_NEXUS_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_THREAD_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  return rc;
}


#define s_control_init_cb_allow_err(err) \
(globus_duroc_at_error("globus_gram_callback_allow", err), GLOBUS_DUROC_ERROR_GRAM_FAILED)
#define s_control_init_checkin_init_err(err) \
     (globus_duroc_at_error("subjob_checkin_init", err), GLOBUS_DUROC_ERROR_INIT_FAILED)
#define s_control_init_duct_init_err(err) \
     (globus_duroc_at_error("globus_duct_init", err), GLOBUS_DUROC_ERROR_DUCT_FAILED)
#define s_control_init_demuxt_err(err) \
     (globus_duroc_at_error("hashtable_init", err), GLOBUS_DUROC_ERROR_INIT_FAILED)


/*
 * create shared ports:
 * -- GRAM state callback port
 * -- subjob check-in port
 * -- DUCT rendezvous port
 *
 * create demultiplexing tables:
 * -- GRAM job ID to radix subjob ID (callback demux)
 * -- radix subjob ID to job monitor (radix demux)
 */
int globus_duroc_control_init (globus_duroc_control_t * controlp)
{
  int err;

  if ( globus_module_activate (GLOBUS_DUROC_CONTROL_MODULE)
       != GLOBUS_SUCCESS ) 
    return GLOBUS_DUROC_ERROR_INIT_FAILED;

  if (controlp==NULL) return GLOBUS_DUROC_ERROR_INVALID_PARAMETER;

  err = nexus_mutex_init (&(controlp->mutex), NULL); assert (!err);

  err = globus_gram_client_callback_allow (s_subjob_callback_func,
					   (void *) controlp,
			   (char **) & (controlp->subjob_callback_contact));
  if (err) {
    err = s_control_init_cb_allow_err (err);
    goto control_init_cb_allow_error;
  }
  assert ((controlp->subjob_callback_contact)!=NULL);

  err = s_subjob_checkin_port_init (&(controlp->subjob_checkin_port),
				    controlp);
  if (err) {
    err = s_control_init_checkin_init_err (err);
    goto control_init_checkin_init_error;
  }

  err = globus_hashtable_init (&(controlp->serialno_hasht), 
			       16 /* zero info default size */,
			       globus_hashtable_int_hash, 
			       globus_hashtable_int_keyeq);
  if (err) {
    err = s_control_init_demuxt_err (err);
    goto control_init_radixt_init_error;
  }

  err = globus_hashtable_init (&(controlp->globus_gram_hasht), 
			       16 /* nice default size */,
			       globus_hashtable_string_hash, 
			       globus_hashtable_string_keyeq);
  if (err) {
    err = s_control_init_demuxt_err (err);
    goto control_init_gramt_init_error;
  }

  controlp->next_free_serialno = 1;
  controlp->job_monitors = NULL;
  controlp->deferrals = NULL;
  controlp->open_globus_gram_jobs = 0;

  return GLOBUS_DUROC_SUCCESS;


  /* control_init_error_clauses: 

  globus_hashtable_destroy (&(controlp->globus_gram_hasht)); */
 control_init_gramt_init_error:

  globus_hashtable_destroy (&(controlp->serialno_hasht));
 control_init_radixt_init_error:

  s_subjob_checkin_port_destroy (&(controlp->subjob_checkin_port));
 control_init_checkin_init_error:

  globus_gram_client_callback_disallow (controlp->subjob_callback_contact);
 control_init_cb_allow_error:

  return err;
}

void 
globus_duroc_control_destroy (globus_duroc_control_t *controlp)
{
  GLOBUS_IGNORE controlp;

  /* HACK: no-op */
}

int 
globus_duroc_control_i_control_make_job_no (globus_duroc_control_t *controlp)
{
  int err;
  int serialno;

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);
  serialno = controlp->next_free_serialno;
  controlp->next_free_serialno += 1;
  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

  return serialno;
}

int 
globus_duroc_control_i_control_link_job (globus_duroc_control_t     * controlp,
				  globus_duroc_job_monitor_t * job_monitorp)
{
  int err;
  int job_serialno;

  err = nexus_mutex_lock (&(job_monitorp->mutex)); assert (!err);
  job_serialno = job_monitorp->serialno;
  err = nexus_mutex_unlock (&(job_monitorp->mutex)); assert (!err);

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

  err = globus_hashtable_insert (&(controlp->serialno_hasht),
				 (void *) (long) job_serialno,
				 (void *) job_monitorp);
  assert (!err);
  err = globus_list_insert (&(controlp->job_monitors),
			    (void *) job_monitorp);
  assert (!err);

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

  return GLOBUS_DUROC_SUCCESS;
}
/*
 * create a linearized startpoint 'contact' for this control
 */
int
globus_duroc_control_i_control_contact_lsp (globus_duroc_control_t  * controlp,
				     char           ** contact)
{
  int                  err;
  nexus_startpoint_t   sp_copy;
  globus_byte_t        bbuff[GLOBUS_DUCT_MAX_MSG_LENGTH];
  globus_byte_t      * ptr;
  int                  len;
 
  len = 0;

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);
 
  err = nexus_startpoint_copy (&sp_copy, 
                               &(controlp->subjob_checkin_port.sp)); 
  assert (!err);
 
  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);
 
  ptr = bbuff;
  nexus_stdio_lock ();
  sprintf ((char *)ptr, "%d", nexus_user_format());
  nexus_stdio_unlock ();
  while ( (*ptr)!='\0' ) ptr++; ptr++; /* d d ... d \0 MESG */
  nexus_user_put_startpoint_transfer (&ptr, &sp_copy, 1);
  len = (int) (ptr - bbuff);
  assert (len<=GLOBUS_DUCT_MAX_MSG_LENGTH);
 
  /* now hex-encode the buffer and prepend 'LSP' */
  
  (*contact) = globus_malloc (sizeof(char) * (3 /* LSP */
                                             + (2 * len) + 1 /* hex buff */
                                             + 1 /* \0 */));
  assert ( (*contact)!=NULL );
 
  nexus_stdio_lock ();
  err = sprintf ( (*contact), "LSP"); assert (err==3);
  nexus_stdio_unlock ();
  globus_l_duroc_hex_encode_byte_array (bbuff, len, (*contact)+3);
  
  /* contact has form:
   *   >L S P hd hd ... hd<
   * "LSP" prefix identifies this as linearized startpoint
   * hex digit substring should be hex_decoded to obtain:
   *   >d d ... d \0 user-sp<
   * "d d ... d" is the user-buffer format
   * user-sp is the startpoint
   */
  return GLOBUS_DUROC_SUCCESS;
}
 
void
globus_duroc_control_i_control_unlink_job (globus_duroc_control_t *controlp,
				    globus_duroc_job_monitor_t *job_monitorp)
{
  int err;
  globus_list_t *node;
  globus_duroc_job_monitor_t * job_monitorp2;
  int job_serialno;

  err = nexus_mutex_lock (&(job_monitorp->mutex)); assert (!err);
  job_serialno = job_monitorp->serialno;
  err = nexus_mutex_unlock (&(job_monitorp->mutex)); assert (!err);

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

  job_monitorp2 = ((globus_duroc_job_monitor_t *)
		   globus_hashtable_remove (&(controlp->serialno_hasht),
					    (void *) (long) job_serialno));
  assert (job_monitorp == job_monitorp2);

  node = globus_list_search (controlp->job_monitors,
			     (void *) job_monitorp);
  assert (node!=NULL);
  job_monitorp2 = ((globus_duroc_job_monitor_t *)
		   globus_list_remove (&(controlp->job_monitors),
				       node));
  assert (job_monitorp == job_monitorp2);

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);
}

int
globus_duroc_control_i_control_open_gram (globus_duroc_control_t * controlp)
{
  int err;

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

  controlp->open_globus_gram_jobs += 1;

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

  return GLOBUS_DUROC_SUCCESS;
}

int
globus_duroc_control_i_control_link_gram (globus_duroc_control_t * controlp,
				   const char * contact,
				   int serialno)
{
  int err;

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

  assert( globus_hashtable_lookup (&(controlp->globus_gram_hasht),
				   (void *) contact)
	  == NULL );

  err = globus_hashtable_insert (&(controlp->globus_gram_hasht),
				 (void *) utils_strdup (contact),
				 (void *) (long) serialno);
  assert (!err);

  assert (controlp->open_globus_gram_jobs > 0);
  controlp->open_globus_gram_jobs -= 1;

  /* check for deferrals */
  {
    globus_list_t * existing_deferral_node;
    globus_duroc_control_i_deferral_t * deferralp;

    existing_deferral_node = globus_list_search_pred (controlp->deferrals,
						      s_deferral_pred,
						      (void *) contact);
    if ( existing_deferral_node != NULL ) {
      /* consume existing deferral */
      deferralp = ((globus_duroc_control_i_deferral_t *)
		   globus_list_first (existing_deferral_node));
      
      globus_list_remove (&(controlp->deferrals),
			  existing_deferral_node);

      if ( controlp->open_globus_gram_jobs == 0 ) {
	/* any other deferrals are garbage, so purge them */
	while ( ! globus_list_empty (controlp->deferrals) ) {
	  deferralp = ((globus_duroc_control_i_deferral_t *)
		       globus_list_first (controlp->deferrals));
	  globus_free (deferralp->globus_gram_contact);
	  globus_free (deferralp);
	  globus_list_remove (&(controlp->deferrals), controlp->deferrals);
	}
      }

      err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

      utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		   "subjob state update reprocessed:\n"
		   "subjob GRAM contact >>%s<<\n"
		   "state %d  errorcode %d\n\n",
		   deferralp->globus_gram_contact,
		   deferralp->state,
		   deferralp->errorcode);

      /* "rethrow" the state callback */
      s_subjob_callback_func ((void *) controlp,
			      deferralp->globus_gram_contact,
			      deferralp->state,
			      deferralp->errorcode);

      globus_free (deferralp->globus_gram_contact);
      globus_free (deferralp);
    }
    else {
      err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);
    }
  }


  return GLOBUS_DUROC_SUCCESS;
}

void 
globus_duroc_control_i_control_unlink_gram (globus_duroc_control_t *controlp,
				     const char * contact)
{
  int err;
  int serialno;

  err = nexus_mutex_lock (&(controlp->mutex)); assert (!err);

  serialno = ((int) (long) 
	      globus_hashtable_remove (&(controlp->globus_gram_hasht),
				       (void *) contact));
  assert (serialno>0);

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);
}

/*
 * generate the job contact string for this
 * <control, job_monitor> pair
 *
 * contacts include communications info and a
 * linearized job_monitor state snapshot to allow limited
 * failure-handling by the outside agent
 * (globus_duroc_job_kill can fall back on a 'controlless' kill
 * of the snapshot job if the real control is unavailable)
 */
int
globus_duroc_control_job_contact (globus_duroc_control_t      * controlp,
			   globus_duroc_job_monitor_t  * job_monitorp,
			   char                ** contactp)
{
  int err;
  int job_serialno;

  GLOBUS_IGNORE controlp;

  err = nexus_mutex_lock (&(job_monitorp->mutex)); assert (!err);
  job_serialno = job_monitorp->serialno;
  err = nexus_mutex_unlock (&(job_monitorp->mutex)); assert (!err);

  assert (contactp!=NULL);

  (*contactp) = globus_malloc (sizeof(char)
			       * (utils_strlen ("XXXXXXXXXXXXXXX")
				  + 1));
  assert ((*contactp)!=NULL);

  utils_sprintf ((*contactp), "%x", job_serialno);

  return GLOBUS_DUROC_SUCCESS;
}

static int
s_job_contact_serialno (const char * job_contact)
{
  int job_serialno;

  nexus_stdio_lock ();
  sscanf (job_contact, "%x", &job_serialno);
  nexus_stdio_unlock ();

  return job_serialno;
}


/*
 * get job_monitor_t record for contact
 * atomically increments record ref_count
 * on success and s_job_release must be called after use
 */
int
globus_duroc_control_i_job_lookup (globus_duroc_control_t * controlp,
			    const char * job_contact,
			    globus_duroc_job_monitor_t ** job_monitorpp)
{
  int serialno;

  serialno = s_job_contact_serialno (job_contact);
  assert (serialno>0);

  return globus_duroc_control_i_job_lookup_by_serialno (controlp,
						 serialno,
						 job_monitorpp);
}

#define s_job_by_globus_gram_lock_err(err) \
(globus_duroc_at_error("mutex_lock", err), GLOBUS_DUROC_ERROR_NEXUS_FAILED)

/*
 * get job_monitor_t record for label
 * atomically increments record ref_count
 * on success and s_job_release must be called after use
 */
int 
globus_duroc_control_i_job_lookup_by_gram (globus_duroc_control_t      * controlp,
				    const char           * globus_gram_contact,
				    globus_duroc_job_monitor_t ** job_monitorpp)
{
  int err;
  int err2;
  int serialno;

  if ( (controlp==NULL) || (globus_gram_contact==NULL) || (job_monitorpp==NULL) ) 
    return GLOBUS_DUROC_ERROR_INVALID_PARAMETER;

  err = nexus_mutex_lock (&(controlp->mutex));
  if (err) {
    err = s_job_by_globus_gram_lock_err (err);
    goto job_by_globus_gram_lock_error;
  }

  serialno = ((int) (long) 
	      globus_hashtable_lookup (&(controlp->globus_gram_hasht),
				       (void *) globus_gram_contact));

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

  if ( serialno <= 0 ) {
    (*job_monitorpp) = NULL;
    err = GLOBUS_DUROC_SUCCESS;
  }
  else {
    err = globus_duroc_control_i_job_lookup_by_serialno (controlp,
						  serialno, 
						  job_monitorpp);
  }

  return err;

  /* job_by_globus_gram_error_clauses:

  err2 = nexus_mutex_unlock (&(controlp->mutex)); assert (!err2); */
 job_by_globus_gram_lock_error:

  return err;
}

#define s_job_by_serialno_lock_err(err) \
(globus_duroc_at_error("mutex lock", err), GLOBUS_DUROC_ERROR_NEXUS_FAILED)
#define s_job_by_serialno_lock2_err(err) \
(globus_duroc_at_error("mutex lock", err), GLOBUS_DUROC_ERROR_NEXUS_FAILED)

/*
 * get job_monitor_t record for serialno
 * atomically increments record ref_count
 * on success and s_job_release must be called after use
 */
int
globus_duroc_control_i_job_lookup_by_serialno (globus_duroc_control_t       *controlp,
					int                   serialno,
					globus_duroc_job_monitor_t **job_monitorpp)
{
  int err;
  int err2;

  if ( (controlp==NULL) || (serialno<=0) || (job_monitorpp==NULL) ) 
    return GLOBUS_DUROC_ERROR_INVALID_PARAMETER;

  err = nexus_mutex_lock (&(controlp->mutex));
  if (err) {
    err = s_job_by_serialno_lock_err (err);
    goto job_by_serialno_lock_error;
  }

  (*job_monitorpp) = ((globus_duroc_job_monitor_t *) 
		      globus_hashtable_lookup (&(controlp->serialno_hasht),
					       (void *) (long) serialno));

  err = nexus_mutex_unlock (&(controlp->mutex)); assert (!err);

  if ( (*job_monitorpp) != NULL ) {
    err = nexus_mutex_lock (&((*job_monitorpp)->mutex));
    if (err) {
      err = s_job_by_serialno_lock2_err (err);
      goto job_by_serialno_lock2_error;
    }

    (*job_monitorpp)->ref_count += 1;

    err = nexus_mutex_unlock (&((*job_monitorpp)->mutex)); assert (!err);
  }

  return GLOBUS_DUROC_SUCCESS;

  /* job_by_serialno_error_clauses:

  err2 = nexus_mutex_unlock (&((*job_monitorpp)->mutex)); assert (!err2); */
 job_by_serialno_lock2_error:

  err2 = nexus_mutex_unlock (&(controlp->mutex)); assert (!err2);
 job_by_serialno_lock_error:

  return err;
}

void
globus_duroc_control_i_job_monitor_release (globus_duroc_control_t      * controlp,
				     globus_duroc_job_monitor_t ** job_monitorpp)
{
  GLOBUS_IGNORE controlp;
  GLOBUS_IGNORE job_monitorpp;
}


void
globus_duroc_control_i_subjob_kill (globus_duroc_subjob_t *subjobp)
{
  int err;
  char * contact;

  err = nexus_mutex_lock (&(subjobp->mutex)); assert (!err);

  contact = utils_strdup (subjobp->contact);
  assert (contact!=NULL);

  err = nexus_mutex_unlock (&(subjobp->mutex)); assert (!err);

  err = globus_gram_client_job_cancel (contact);

  /*  if (err) globus_duroc_at_error ("globus_gram_job_cancel", err); */
}

/*
 * subjob delete subroutine:
 * -- send GRAM job kill command
 * -- enter removal mapping into callback demux table
 * -- remove GRAM job id from radix demux table
 * -- remove GRAM job data from job monitor
 */
int 
globus_duroc_control_i_subjob_delete (globus_duroc_control_t      *controlp,
			       globus_duroc_job_monitor_t *job_monitorp,
			       const char          *subjob_label)
{
  int err;
  globus_duroc_subjob_t *subjobp;

  err = globus_duroc_control_i_subjob_lookup_by_label (job_monitorp, 
						subjob_label, 
						&subjobp);
  if (err) {
    err = GLOBUS_DUROC_ERROR_UNKNOWN_LABEL;
    goto s_subjob_delete_bad_label_error;
  }
  assert (subjobp!=NULL);

  globus_duroc_control_i_subjob_kill (subjobp);
  
  globus_duroc_control_i_subjob_destroy (controlp, job_monitorp, &subjobp);

  return GLOBUS_DUROC_SUCCESS;

  
 s_subjob_delete_bad_label_error:

  return err;
}

#if 0
/*
 * for each GRAM state callback:
 * -- if job ID is mappable to job monitor
 * -- -- enter state update into job monitor
 * -- -- propogate/synthesize state to client ?
 * -- else if job ID is mappable to deferral structure
 * -- -- enter state update into deferral structure
 * -- else if deferral possible
 * -- -- add deferral mapping to callback demux table
 * -- -- enter state update into deferral structure
 */
static int 
s_process_globus_gram_callback (globus_duroc_control_t *controlp,
			 const char     *subjob_contact,
			 int             state,
			 int             errorcode)
{
  GLOBUS_IGNORE controlp;
  GLOBUS_IGNORE subjob_contact;
  GLOBUS_IGNORE state;
  GLOBUS_IGNORE errorcode;

  return 0;
}
#endif

#define s_checkin_unknown_job_err(serialno) \
globus_duroc_at_error ("bad job serialno", serialno)

#define s_checkin_unknown_subjob_err(serialno) \
globus_duroc_at_error ("bad subjob serialno", serialno)

#define s_checkin_unknown_protocol_err(protocol) \
globus_duroc_at_error ("incompatible checkin protocol", protocol)

#define s_checkin_kill_err(err) \
globus_duroc_at_error ("nexus_send_rsr", err)

/*
 * for each subjob checkin message:
 * -- enter check-in data into subjob record
 * -- send RUN ? [if committed and complete check-in]
 */
static void 
s_checkin_msg_handler (nexus_endpoint_t * endpointp,
		       nexus_buffer_t   * bufferp,
		       nexus_bool_t       is_non_threaded_handler)
{
  int                   err;
  globus_duroc_control_t     * controlp;
  int                   job_serialno;
  int                   subjob_serialno;
  int                   protocol_version;
  int                   die_reason;
  nexus_startpoint_t    command_sp;
  globus_duroc_job_monitor_t * job_monitorp;
  globus_duroc_subjob_t      * subjobp;

  GLOBUS_IGNORE is_non_threaded_handler;

  controlp = ((globus_duroc_control_t *)
	      nexus_endpoint_get_user_pointer (endpointp));
  assert (controlp!=NULL);

  /* unpack the checkin message */
  err = nxbuff_get_int (bufferp, &protocol_version); assert (!err);
  err = nxbuff_get_startpoint (bufferp, &command_sp); assert (!err);

  if ( protocol_version != GLOBUS_DUROC_CHECKIN_PROTOCOL_VERSION ) {
    /* incompatible runtime library: cancel it! */
    s_checkin_unknown_protocol_err (protocol_version);
    die_reason = GLOBUS_DUROC_ERROR_PROTOCOL_VERSION_MISMATCH;
    goto checkin_unknown_subjob_kill;
  }
  else {
    /* we don't support multiple versions here, 
     * so this code is not conditionalized on version number
     * once we get this far
     */
    err = nxbuff_get_int (bufferp, &job_serialno); assert (!err);
    err = nxbuff_get_int (bufferp, &subjob_serialno); assert (!err);
    
    utils_debug (GLOBUS_DUROC_DEBUG_FLAG, "checking in subjob <%x,%x>...\n",
		 job_serialno, subjob_serialno);

    err = globus_duroc_control_i_job_lookup_by_serialno (controlp, 
							 job_serialno,
							 &job_monitorp);
    if ( err
	 || (job_monitorp == NULL) ) {
      s_checkin_unknown_job_err (job_serialno);
      die_reason = GLOBUS_DUROC_ERROR_INVALID_CHECKIN;
      goto checkin_unknown_subjob_kill;
    }

    err = globus_duroc_control_i_subjob_lookup_by_serialno (job_monitorp,
							    subjob_serialno,
							    &subjobp);
    if ( err
	 || (subjobp == NULL) ) {
      s_checkin_unknown_subjob_err (subjob_serialno);
      die_reason = GLOBUS_DUROC_ERROR_INVALID_CHECKIN;
      goto checkin_unknown_subjob_kill;
    }

    err = nexus_mutex_lock (&(subjobp->mutex)); assert (!err);
    if (subjobp->checked_in != GLOBUS_FALSE) {
      err = nexus_mutex_unlock (&(subjobp->mutex)); assert (!err);
      die_reason = GLOBUS_DUROC_ERROR_INVALID_CHECKIN;
      goto checkin_unknown_subjob_kill;
    }
    err = nexus_startpoint_copy (&(subjobp->command_sp),
				 &command_sp);
    assert (!err);
    subjobp->checked_in = GLOBUS_TRUE;
    
    if ((subjobp->state==GLOBUS_DUROC_SUBJOB_STATE_PENDING)
	||(subjobp->state==GLOBUS_DUROC_SUBJOB_STATE_ACTIVE))
      /* don't overwrite a failed/done state */
      subjobp->state = GLOBUS_DUROC_SUBJOB_STATE_CHECKED_IN;
    
    err = nexus_mutex_unlock (&(subjobp->mutex)); assert (!err);
    
    utils_debug (GLOBUS_DUROC_DEBUG_FLAG, "recorded subjob <%x,%x> checkin\n\n",
		 job_serialno, subjob_serialno);

    globus_duroc_control_i_subjob_release (controlp, job_monitorp, &subjobp);
    
    globus_duroc_control_i_job_monitor_poll (controlp, job_monitorp);

    globus_duroc_control_i_job_monitor_release (controlp, &job_monitorp);

    nexus_startpoint_destroy (&command_sp);
    nexus_buffer_destroy (bufferp);

    return;
  }
  
  
checkin_unknown_subjob_kill:
  /* spurious checkin!  might as well tell it to die.. */
  {
    nexus_buffer_t send_buffer;
    
    utils_debug (GLOBUS_DUROC_DEBUG_FLAG, "reaping unknown subjob <%x,%x>!\n\n",
		 job_serialno, subjob_serialno);
    
    err = nexus_buffer_init (&send_buffer, 0, 0); assert (!err);
    err = nxbuff_put_int (&send_buffer,
			  die_reason);

    err = nexus_send_rsr (&send_buffer, &command_sp,
			  DIE_MSG_ID,
			  NEXUS_TRUE /* destroy buffer */,
			  NEXUS_TRUE /* always safe */);
    if (err) {
      s_checkin_kill_err (err);
    }
  }

  nexus_startpoint_destroy (&command_sp);
  nexus_buffer_destroy (bufferp);
}



