/*
 * Copyright 1998-2001, University of Notre Dame.
 * Authors: Jeffrey M. Squyres and Arun Rodrigues with Brian Barrett,
 *          Kinis L. Meyer, M. D. McNally, and Andrew Lumsdaine
 * 
 * This file is part of the Notre Dame LAM implementation of MPI.
 * 
 * You should have received a copy of the License Agreement for the Notre
 * Dame LAM implementation of MPI along with the software; see the file
 * LICENSE.  If not, contact Office of Research, University of Notre
 * Dame, Notre Dame, IN 46556.
 * 
 * Permission to modify the code and to distribute modified code is
 * granted, provided the text of this NOTICE is retained, a notice that
 * the code was modified is included with the above COPYRIGHT NOTICE and
 * with the COPYRIGHT NOTICE in the LICENSE file, and that the LICENSE
 * file is distributed with the modified code.
 * 
 * LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.
 * By way of example, but not limitation, Licensor MAKES NO
 * REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE COMPONENTS
 * OR DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS
 * OR OTHER RIGHTS.
 * 
 * Additional copyrights may follow.
 * 
 *	Ohio Trollius
 *	Copyright 1996 The Ohio State University
 *	RBD
 *
 *	$Id: iccreate.c,v 6.5 1999/12/31 21:27:29 jsquyres Exp $
 *
 *	Function:	- create a new inter-communicator
 *	Accepts:	- local intra-communicator
 *			- local leader
 *			- peer communicator
 *			- peer leader
 *			- tag
 *			- new inter-communicator (out)
 *	Returns:	- MPI_SUCCESS or error code
 */

#include <stdlib.h>

#include <lam_config.h>
#include <app_mgmt.h>
#include <blktype.h>
#include <mpi.h>
#include <mpisys.h>
#include <rpisys.h>
#include <terror.h>

/*@

MPI_Intercomm_create - Creates an intercommuncator from two
intracommunicators

Input Paramters:
+ lcomm - Local (intra)communicator
. lleader - Rank in local_comm of leader (often 0)
. pcomm - Remote communicator
. pleader - Rank in peer_comm of remote leader (often 0)
- tag - Message tag to use in constructing intercommunicator; if
multiple 'MPI_Intercomm_creates' are being made, they should use
different tags (more precisely, ensure that the local and remote
leaders are using different tags for each 'MPI_intercomm_create').

Output Parameter:
. newcomm - Created intercommunicator

Notes:

The MPI 1.1 Standard contains two mutually exclusive comments on the
input intracommunicators.  One says that their repective groups must
be disjoint; the other that the leaders can be the same process.
After some discussion by the MPI Forum, it has been decided that the
groups must be disjoint.  Note that the `reason` given for this in the
standard is `not` the reason for this choice; rather, the `other`
operations on intercommunicators (like 'MPI_Intercomm_merge') do not
make sense if the groups are not disjoint.

.N fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
.N MPI_ERR_TAG
.N MPI_ERR_ARG
.N MPI_ERR_EXHAUSTED
.N MPI_ERR_RANK

.seealso: MPI_Intercomm_merge, MPI_Comm_free, MPI_Comm_remote_group, 
          MPI_Comm_remote_size

.N ACK
@*/
int MPI_Intercomm_create(MPI_Comm lcomm, int lleader, 
			 MPI_Comm pcomm, int pleader, int tag, 
			 MPI_Comm *newcomm)
{
	MPI_Status	stat;			/* message status */
	MPI_Group	lgroup;			/* local group */
	MPI_Group	rgroup;			/* remote group */
	int		myrank;			/* my rank */
	int		outmsg[2];		/* output message buffer */
	int		inmsg[2];		/* input message buffer */
	int		mycid;			/* my context ID */
	int		cid;			/* global context ID */
	int		err;			/* error code */
	int		i;			/* favourite index */
	int		lgsize;			/* local group size */
	int		rgsize;			/* remote group size */
	int		lgbytes;		/* local group GPS size */
	int		rgbytes;		/* remote group GPS size */
	struct _gps	*lprocs;		/* local process GPS */
	struct _gps	*rprocs;		/* remote process GPS */
	struct _proc	*p;			/* favourite pointer */
	struct _proc	**pp;			/* another pointer */
	struct _gps	*pgp;			/* yet another pointer */

	lam_initerr();
	lam_setfunc(BLKMPIICOMMCREATE);
/*
 * Check the arguments.
 */
	if (lcomm == MPI_COMM_NULL) {
		return(lam_errfunc(MPI_COMM_WORLD,
			BLKMPIICOMMCREATE, lam_mkerr(MPI_ERR_COMM, 0)));
	}

	if (LAM_IS_INTER(lcomm)) {
		return(lam_errfunc(lcomm,
			BLKMPIICOMMCREATE, lam_mkerr(MPI_ERR_COMM, 0)));
	}

	if (lcomm->c_group->g_myrank == lleader) {
		if (pcomm == MPI_COMM_NULL) {
			return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, 
					lam_mkerr(MPI_ERR_COMM, 0)));
		}

		rgroup = (LAM_IS_INTER(pcomm))
			? pcomm->c_rgroup : pcomm->c_group;
		
		if ((pleader < 0) || (pleader >= rgroup->g_nprocs)) {
			return(lam_errfunc(pcomm, BLKMPIICOMMCREATE, 
					lam_mkerr(MPI_ERR_RANK, 0)));
		}
	}

	if (tag < 0) {
		return(lam_errfunc(lcomm,
			BLKMPIICOMMCREATE, lam_mkerr(MPI_ERR_TAG, 0)));
	}

	if ((lleader < 0) || (lleader >= lcomm->c_group->g_nprocs)) {
		return(lam_errfunc(lcomm,
			BLKMPIICOMMCREATE, lam_mkerr(MPI_ERR_RANK, 0)));
	}

	if (newcomm == 0) {
		return(lam_errfunc(lcomm,
			BLKMPIICOMMCREATE, lam_mkerr(MPI_ERR_ARG, 0)));
	}



#if LAM_WANT_IMPI

	/* Remove this when IMPI collectives are implemented */

        if (LAM_IS_IMPI(lcomm)) {
	  return lam_err_comm(lcomm, MPI_ERR_COMM, 0, 
			      "Collectives not yet implemented on IMPI communicators");
	}
	if (LAM_IS_IMPI(pcomm)) {
	  return lam_err_comm(pcomm, MPI_ERR_COMM, 0, 
			      "Collectives not yet implemented on IMPI communicators");
	}
#endif

	LAM_TRACE(lam_tr_cffstart(BLKMPIICOMMCREATE));
/*
 * Create the new context ID: reduce-max to leader within the local group,
 * then find the max between the two leaders, then broadcast within group.
 * In the same message, the leaders exchange their local group sizes and
 * broadcast the received group size to their local group.
 */
	lgroup = lcomm->c_group;
	lgsize = lgroup->g_nprocs;
	myrank = lgroup->g_myrank;

	mycid = lam_getcid();
	if (mycid < 0) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_INTERN, EFULL)));
	}

	err = MPI_Reduce(&mycid, &cid, 1, MPI_INT, MPI_MAX, lleader, lcomm);
	if (err != MPI_SUCCESS) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, err));
	}

	if (lleader == myrank) {
		outmsg[0] = cid;
		outmsg[1] = lgsize;

		err = MPI_Sendrecv(outmsg, 2, MPI_INT, pleader, tag,
				inmsg, 2, MPI_INT, pleader, tag, pcomm, &stat);
		if (err != MPI_SUCCESS) {
			return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, err));
		}

		if (inmsg[0] < cid) inmsg[0] = cid;
	}

	err = MPI_Bcast(inmsg, 2, MPI_INT, lleader, lcomm);
	if (err != MPI_SUCCESS) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, err));
	}

	cid = inmsg[0];
	rgsize = inmsg[1];
/*
 * Allocate remote group process GPS array.
 */
	rgbytes = rgsize * sizeof(struct _gps);
	rprocs = (struct _gps *) malloc((unsigned) rgbytes);
	if (rprocs == 0) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_OTHER, errno)));
	}
/*
 * Leaders exchange process GPS arrays and broadcast them to their group.
 */
	if (lleader == myrank) {

		lgbytes = lgsize * sizeof(struct _gps);
		lprocs = (struct _gps *) malloc((unsigned) lgbytes);
		if (lprocs == 0) {
			return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
					lam_mkerr(MPI_ERR_OTHER, errno)));
		}
/*
 * Fill local process GPS.
 */
		for (i = 0, pp = lgroup->g_procs; i < lgsize; ++i, ++pp) {
			lprocs[i] = (*pp)->p_gps;
		}

		err = MPI_Sendrecv(lprocs, lgbytes/sizeof(int), MPI_INT,
					pleader, tag, rprocs,
					rgbytes/sizeof(int), MPI_INT,
					pleader, tag, pcomm, &stat);

		free((char *) lprocs);

		if (err != MPI_SUCCESS) {
			free((char *) rprocs);
			return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, err));
		}
	}

	err = MPI_Bcast(rprocs, rgbytes/sizeof(int), MPI_INT, lleader, lcomm);
	if (err != MPI_SUCCESS) {
		free((char *) rprocs);
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE, err));
	}
/*
 * Create the remote group.
 */
	rgroup = (MPI_Group) malloc((unsigned) sizeof(struct _group) +
					(rgsize * sizeof(struct _proc **)));
	if (rgroup == 0) {
		free((char *) rprocs);
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_OTHER, errno)));
	}

	rgroup->g_nprocs = rgsize;
	rgroup->g_myrank = MPI_UNDEFINED;
	rgroup->g_refcount = 1;
	rgroup->g_f77handle = -1;
	rgroup->g_procs = (struct _proc **)
				((char *) rgroup + sizeof(struct _group));

	for (i = 0, pgp = rprocs; i < rgsize; ++i, ++pgp) {
		if ((p = lam_procadd(pgp)) == 0) {
			return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
					lam_mkerr(MPI_ERR_OTHER, errno)));
		}
		if (!(p->p_mode & LAM_PRPIINIT)) {
			p->p_mode |= LAM_PCLIENT;
		}
		p->p_refcount++;
		rgroup->g_procs[i] = p;
	}

	free((char *) rprocs);
/*
 * Create the new communicator.
 */
	*newcomm = 0;
	if (lam_comm_new(cid, lgroup, rgroup, LAM_CINTER, newcomm)) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_OTHER, errno)));
	}

	lgroup->g_refcount++;
	(*newcomm)->c_errhdl = lcomm->c_errhdl;
	lcomm->c_errhdl->eh_refcount++;

	if (!al_insert(lam_comms, newcomm)) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_INTERN, errno)));
	}
	
	if (lam_tr_comm(*newcomm)) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_INTERN, errno)));
	}
	
	lam_setcid(cid);
/*
 * setup any new processes
 */
	if (RPI_SPLIT(_rpi_lamd_addprocs, _rpi_c2c_addprocs, ())) {
		return(lam_errfunc(lcomm, BLKMPIICOMMCREATE,
				lam_mkerr(MPI_ERR_OTHER, errno)));
	}

	LAM_TRACE(lam_tr_cffend(BLKMPIICOMMCREATE, lleader, lcomm, 0, 0));

	lam_resetfunc(BLKMPIICOMMCREATE);
	return(MPI_SUCCESS);
}
