/*
 * Copyright 1998-2001, University of Notre Dame.
 * Authors: Jeffrey M. Squyres and Arun Rodrigues with Brian Barrett,
 *          Kinis L. Meyer, M. D. McNally, and Andrew Lumsdaine
 * 
 * This file is part of the Notre Dame LAM implementation of MPI.
 * 
 * You should have received a copy of the License Agreement for the Notre
 * Dame LAM implementation of MPI along with the software; see the file
 * LICENSE.  If not, contact Office of Research, University of Notre
 * Dame, Notre Dame, IN 46556.
 * 
 * Permission to modify the code and to distribute modified code is
 * granted, provided the text of this NOTICE is retained, a notice that
 * the code was modified is included with the above COPYRIGHT NOTICE and
 * with the COPYRIGHT NOTICE in the LICENSE file, and that the LICENSE
 * file is distributed with the modified code.
 * 
 * LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.
 * By way of example, but not limitation, Licensor MAKES NO
 * REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE COMPONENTS
 * OR DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS
 * OR OTHER RIGHTS.
 * 
 * Additional copyrights may follow.
 * 
 *	Ohio Trollius
 *	Copyright 1997 The Ohio State University
 *	RBD/NJN
 *
 *	$Id: reduce.c,v 6.7 2000/10/17 12:11:30 jsquyres Exp $
 *
 *	Function:	- reduce values to root
 *	Accepts:	- send buffer
 *			- receive buffer
 *			- count of elements
 *			- type of elements
 *			- operation to perform
 *			- which process gets the result
 *			- communicator
 *	Returns:	- MPI_SUCCESS or error code
 */

#include <stdlib.h>

#include <lam_config.h>
#include <app_mgmt.h>
#include <blktype.h>
#include <mpi.h>
#include <mpisys.h>
#include <rpisys.h>
#include <terror.h>
#include <impi.h>


/*
 * local functions
 */
static int reduce_lin(void *sbuf, void *rbuf, int count, MPI_Datatype dtype,
		      MPI_Op op, int root, MPI_Comm comm);
static int reduce_log(void *sbuf, void *rbuf, int count, MPI_Datatype dtype,
		      MPI_Op op, int root, MPI_Comm comm);


/*@

MPI_Reduce - Reduces values on all processes to a single value

Input Parameters:
+ sbuf - address of send buffer (choice) 
. count - number of elements in send buffer (integer) 
. dtype - data type of elements of send buffer (handle) 
. op - reduce operation (handle) 
. root - rank of root process (integer) 
- comm - communicator (handle) 

Output Parameter:
. rbuf - address of receive buffer (choice, 
significant only at 'root') 

.N IMPI_YES

Algorithm:

If there are 4 or less ranks involved, the root loops over receiving
from each rank, and then performs the final reduction locally.  

If there are more than 4 ranks involved, a tree-based algorithm is
used to collate the reduced data at the root (the data is reduced at
each parent in the tree so that the reduction operations are actaully
distributed).

.N fortran

.N collops

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
.N MPI_ERR_INTERCOMM
.N MPI_ERR_IMPI
.N MPI_ERR_COUNT
.N MPI_ERR_TYPE
.N MPI_ERR_BUFFER
.N MPI_ERR_BUFFER_ALIAS
.N MPI_ERR_ROOT

.N ACK
@*/
int MPI_Reduce(void *sbuf, void* rbuf, int count, 
	       MPI_Datatype dtype, MPI_Op op, int root, 
	       MPI_Comm comm)
{
	int		size;			/* group size */
	int		err;			/* error code */
	struct _gps	*p;			/* favourite pointer */

	lam_initerr_m();
	lam_setfunc_m(BLKMPIREDUCE);
/*
 * Check for invalid arguments.
 */
	if ((comm == MPI_COMM_NULL) || LAM_IS_INTER(comm)) {
		return(lam_errfunc(comm, BLKMPIREDUCE,
				lam_mkerr(MPI_ERR_COMM, 0)));
	}

	if (dtype == MPI_DATATYPE_NULL) {
		return(lam_errfunc(comm, BLKMPIREDUCE, 
				lam_mkerr(MPI_ERR_TYPE, 0)));
	}

	if (count < 0) {
		return(lam_errfunc(comm, BLKMPIREDUCE,
				lam_mkerr(MPI_ERR_COUNT, 0)));
	}

	MPI_Comm_size(comm, &size);

	if ((root < 0) || (root >= size)) {
		return(lam_errfunc(comm, BLKMPIREDUCE,
				lam_mkerr(MPI_ERR_ROOT, 0)));
	}

	if (op == MPI_OP_NULL) {
		return(lam_errfunc(comm, BLKMPIREDUCE,
				lam_mkerr(MPI_ERR_OP, 0)));
	}

	LAM_TRACE(lam_tr_cffstart(BLKMPIREDUCE));
/*
 * Remember required parameters.
 */
	p = &(comm->c_group->g_procs[root]->p_gps);

	lam_setparam(BLKMPIREDUCE, root | (p->gps_grank << 16),
				(p->gps_node << 16) | p->gps_idx);

#if LAM_WANT_IMPI
	/* The IMPI case must be before the 0 count optimization -- if
           it's an IMPI communicator, we must do the whole IMPI
           algorithm */

        if (LAM_IS_IMPI(comm)) {
	  err = IMPI_Reduce(sbuf, rbuf, count, dtype, op, root, comm);
	  lam_resetfunc_m(BLKMPIREDUCE);
	  return err;
	}
#endif
/*
 * Check for zero count case.
 */
	if (count == 0) {
		LAM_TRACE(lam_tr_cffend(BLKMPIREDUCE,
					root, comm, dtype, count));

		lam_resetfunc_m(BLKMPIREDUCE);
		return(MPI_SUCCESS);
	}
/*
 * Decide which algorithm to use.
 */
	if (size <= LAM_COLLMAXLIN) {
		err = reduce_lin(sbuf, rbuf, count, dtype, op, root, comm);
	} else {
		err = reduce_log(sbuf, rbuf, count, dtype, op, root, comm);
	}

	return(err);
}

/*
 *	reduce_lin
 *
 *	Function:	- reduction using O(N) algorithm
 *	Accepts:	- same as MPI_Reduce()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
reduce_lin(void *sbuf, void *rbuf, int count, MPI_Datatype dtype,
	   MPI_Op op, int root, MPI_Comm comm)
{
	int		i;			/* favourite index */
	int		size;			/* group size */
	int		rank;			/* my rank */
	int		err;			/* error code */
	char		*buffer = 0;		/* incoming msg buffer */
	char		*origin = 0;		/* origin of data in buffer */
	char		*inbuf;			/* ptr incoming message */
	MPI_Status	status;			/* receive status */

	MPI_Comm_rank(comm, &rank);
	MPI_Comm_size(comm, &size);

	lam_mkcoll(comm);
/*
 * If not root, send data to the root.
 */
	if (rank != root) {
		err = MPI_Send(sbuf, count, dtype, root, BLKMPIREDUCE, comm);
		lam_mkpt(comm);

		if (err != MPI_SUCCESS) {
			return(lam_errfunc(comm, BLKMPIREDUCE, err));
		}

		LAM_TRACE(lam_tr_cffend(BLKMPIREDUCE,
						root, comm, dtype, count));

		lam_resetfunc_m(BLKMPIREDUCE);
		return(MPI_SUCCESS);
	}
/*
 * Root receives and reduces messages.
 * Allocate buffer to receive messages.
 */
	if (size > 1) {
		err = lam_dtbuffer(dtype, count, &buffer, &origin);
		if (err != MPI_SUCCESS) {
			return(lam_errfunc(comm, BLKMPIREDUCE, err));
		}
	}
/*
 * Initialize the receive buffer.
 */
	if (rank == (size - 1)) {
		err = lam_dtsndrcv(sbuf, count, dtype, rbuf, count,
					dtype, BLKMPIREDUCE, comm);
	} else {
		err = MPI_Recv(rbuf, count, dtype, size - 1,
					BLKMPIREDUCE, comm, &status);
	}

	if (err != MPI_SUCCESS) {
		if (buffer) free(buffer);
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIREDUCE, err));
	}
/*
 * Loop receiving and calling reduction function (C or Fortran).
 */
	for (i = size - 2; i >= 0; --i) {

		if (rank == i) {
			inbuf = sbuf;
		}
		else {
			err = MPI_Recv(origin, count, dtype, i,
					BLKMPIREDUCE, comm, &status);

			if (err != MPI_SUCCESS) {
				if (buffer) free(buffer);
				lam_mkpt(comm);
				return(lam_errfunc(comm,
						BLKMPIREDUCE, err));
			}

			inbuf = origin;
		}
/*
 * Call reduction function.
 */
		if (op->op_flags & LAM_LANGF77) {
			(op->op_func)
				(inbuf, rbuf, &count, &dtype->dt_f77handle);
		} else {
			(op->op_func)(inbuf, rbuf, &count, &dtype);
		}
	}

	if (buffer) free(buffer);

	lam_mkpt(comm);
	LAM_TRACE(lam_tr_cffend(BLKMPIREDUCE, root, comm, dtype, count));
	lam_resetfunc_m(BLKMPIREDUCE);
	return(MPI_SUCCESS);
}

/*
 *	reduce_log
 *
 *	Function:	- reduction using O(log N) algorithm
 *	Accepts:	- same as MPI_Reduce()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
reduce_log(void *sbuf, void *rbuf, int count, MPI_Datatype dtype,
	   MPI_Op op, int root, MPI_Comm comm)
{
	int		i;			/* favourite index */
	int		size;			/* group size */
	int		rank;			/* my rank */
	int		vrank;			/* virtual rank */
	int		err;			/* error code */
	int		peer;			/* peer rank */
	int		dim;			/* cube dimension */
	int		mask;			/* rank bit mask */
	int		fl_recv;		/* received msg flag */
	char		*buf1;			/* 1st temp buffer */
	char		*buf2;			/* 2nd temp buffer */
	char		*origin1;		/* origin in 1st temp buffer */
	char		*origin2;		/* origin in 2nd temp buffer */
	void		*inmsg;			/* ptr incoming msg */
	void		*resmsg;		/* ptr result msg */
	MPI_Status	status;			/* receive status */
/*
 * Allocate the incoming and resulting message buffers.
 */
	err = lam_dtbuffer(dtype, count, &buf1, &origin1);
	if (err != MPI_SUCCESS) return(err);

	err = lam_dtbuffer(dtype, count, &buf2, &origin2);
	if (err != MPI_SUCCESS) {
		if (buf1) free(buf1);
		return(err);
	}

	MPI_Comm_size(comm, &size);
	MPI_Comm_rank(comm, &rank);

	vrank = (op->op_commute) ? (rank - root + size) % size : rank;

	lam_mkcoll(comm);

	dim = comm->c_cube_dim;
/*
 * Loop over cube dimensions.
 * High processes send to low ones in the dimension.
 */
	inmsg = origin1;
	resmsg = origin2;
	fl_recv = 0;

	for (i = 0, mask = 1; i < dim; ++i, mask <<= 1) {
/*
 * A high-proc sends to low-proc and stops.
 */
		if (vrank & mask) {

			peer = vrank & ~mask;
			if (op->op_commute) peer = (peer + root) % size;

			err = MPI_Send((fl_recv) ? resmsg : sbuf, count,
					dtype, peer, BLKMPIREDUCE, comm);
			if (err != MPI_SUCCESS) {
				if (buf1) free(buf1);
				if (buf2) free(buf2);
				lam_mkpt(comm);
				return(lam_errfunc(comm,
						BLKMPIREDUCE, err));
			}

			break;
		}
/*
 * A low-proc receives, reduces, and moves to a higher dimension.
 */
		else {
			peer = vrank | mask;
			if (peer >= size) continue;
			if (op->op_commute) peer = (peer + root) % size;

			fl_recv = 1;

			err = MPI_Recv(inmsg, count, dtype, peer,
						BLKMPIREDUCE, comm, &status);
			if (err != MPI_SUCCESS) {
				if (buf1) free(buf1);
				if (buf2) free(buf2);
				lam_mkpt(comm);
				return(lam_errfunc(comm,
						BLKMPIREDUCE, err));
			}

			if (op->op_flags & LAM_LANGF77) {
				(*op->op_func)((i > 0) ? resmsg : sbuf,
					inmsg, &count, &dtype->dt_f77handle);
			} else {
				(*op->op_func)((i > 0) ? resmsg : sbuf,
					inmsg, &count, &dtype);
			}

			if (inmsg == origin1) {
				resmsg = origin1;
				inmsg = origin2;
			} else {
				resmsg = origin2;
				inmsg = origin1;
			}
		}
	}
/*
 * Get the result to the root if needed.
 */
	err = MPI_SUCCESS;

	if (vrank == 0) {
		if (root == rank) {
			lam_dtcpy(rbuf, (i > 0) ? resmsg : sbuf,
						count, dtype);
		} else {
			err = MPI_Send((i > 0) ? resmsg : sbuf, count,
					dtype, root, BLKMPIREDUCE, comm);
		}
	}
	else if (rank == root) {
		err = MPI_Recv(rbuf, count, dtype,
				0, BLKMPIREDUCE, comm, &status);
	}

	if (buf1) free(buf1);
	if (buf2) free(buf2);
	lam_mkpt(comm);

	if (err != MPI_SUCCESS) {
		return(lam_errfunc(comm, BLKMPIREDUCE, err));
	}
/*
 * Generate run time trace.
 */
	LAM_TRACE(lam_tr_cffend(BLKMPIREDUCE, root, comm, dtype, count));

	lam_resetfunc_m(BLKMPIREDUCE);
	return(MPI_SUCCESS);
}
