/*
 * Copyright (c) 2001-2002 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 *
 *	$Id: igather.c,v 1.3.2.1 2002/10/09 19:48:56 brbarret Exp $
 *
 *	Function:	- IMPI gather collective
 *	Accepts:	- send buffer
 *			- send count
 *			- send datatype
 *			- recv buffer
 *			- recv count
 *			- recv datatype
 *			- root
 *			- communicator
 *	Returns:	- MPI_SUCCESS or an MPI error code
 */

#include "lam_config.h"

#include <mpi.h>
#include <mpisys.h>
#include <impi.h>
#include <impi-defs.h>
#include <blktype.h>


/*
 * Local functions
 */
static int gather_is_short(int count, MPI_Datatype dtype, MPI_Comm comm);
static int gather_long(void *sbuf, int scount, MPI_Datatype sdtype, 
		       void *rbuf, int rcount, MPI_Datatype rdtype,
		       int root, MPI_Comm comm);
static int gather_short(void *sbuf, int scount, MPI_Datatype sdtype, 
			void *rbuf, int rcount, MPI_Datatype rdtype,
			int root, MPI_Comm comm);


/*
 *
 * IMPORTANT NOTE:
 *
 * THIS FUNCTION IS NOT FINISHED!!!
 *
 * It was started, but never finished due to time constrants.
 * Hence, it just returns LAMERROR.
 *
 */


/*
 * Almost identical to the pseudocode in section 4.12 of the IMPI
 * standard
 *
 * This function should only be called from the main MPI_Gather
 * function.
 */
int 
IMPI_Gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
	    int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
  int ret;
  lam_setfunc_m(BLKIMPIGATHER);

  if (gather_is_short(scount, sdtype, comm))
    ret = gather_short(sbuf, scount, sdtype, rbuf, rcount, rdtype, 
		       root, comm);
  else
    ret = gather_long(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);

  lam_resetfunc_m(BLKIMPIGATHER);
  return ret;
}


static int 
gather_is_short(int count, MPI_Datatype dtype, MPI_Comm comm)
{
  int size;

  MPI_Pack_size(count, dtype, comm, &size);
  return (size < IMPI_COLL_CROSSOVER);
}


static int 
gather_long(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
	    int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
#if 1
  return LAMERROR;
#else
  MPI_Aint extent;
  int i, nprocs, myrank, incr;
  char *p;

  MPI_Comm_rank(comm, &myrank);
  MPI_Comm_size(comm, &nprocs);

  if (myrank != root) {
    MPI_Send(sbuf, scount, sdtype, root, IMPI_GATHER_TAG, comm);
    return MPI_SUCCESS;
  }

  MPI_Type_extent(rdtype, &extent);
  incr = extent * rcount;

  for (i = 0, p = (char *) rbuf; i < nprocs; i++, p += incr) {
    if (i == myrank)
      MPI_Sendrecv(sbuf, scount, sdtype, i, IMPI_GATHER_TAG, 
                   p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, 
		   MPI_STATUS_IGNORE);
    else
      MPI_Recv(p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, 
	       MPI_STATUS_IGNORE);
  }

  return MPI_SUCCESS;
#endif
}


static int 
gather_short(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
	     int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
#if 1
  return LAMERROR;
#else
  MPI_Status status;
  int myrank, packsize, vnum, rootnum, nmasters;
  int mask, nprocs, count, size;
  int mynum, peer, i;
  int shadowsize, shadowroot;
  char *tmpbuf;

  MPI_Comm_rank(comm, &myrank);
  MPI_Comm_size(comm, &nprocs);
  MPI_Pack_size(scount, sdtype, comm, &packsize);

  if (IMPI_Is_master(myrank, comm) || myrank == root)
    /* allocate a temporary buffer tmpbuf of size nprocs*packsize */
    tmpbuf = (char*) malloc(nprocs * packsize);

  nmasters = IMPI_Num_masters(comm);

  /* local phase */
  MPI_Comm_size(comm->c_shadow, &shadowsize);
  shadowroot = IMPI_Rank_impi2shadow(root, comm);
  if (IMPI_Are_local(myrank, root, comm)) {
    /* gather the send buffers of the local processes into the 
       root's receive buffer */

    int shadow_root_rank = IMPI_Rank_impi2shadow(root, comm);
    int shadow_size;
    int shadow_rank;

    MPI_Comm_size(comm->c_shadow, &shadow_size);
    MPI_Comm_rank(comm->c_shadow, &shadow_rank);

    /* If we're the root, setup and do the receives for all local
       processes.  Even do a send to myself, 'cause we have to copy
       the data anyway, so we might as well use the data translation
       of MPI_Send/MPI_Recv. */

    if (shadow_root_rank == shadow_rank) {
      MPI_Request *reqs;
      reqs = malloc(sizeof(MPI_Request) * (shadow_size + 1));
      if (reqs == NULL)
	return LAMERROR;

      /* Do a send to myself, just to make the logic easy */

      MPI_Isend(sbuf, scount, sdtype, shadow_rank, IMPI_GATHER_TAG, 
		comm->c_shadow, &reqs[shadow_size]);

      /* Setup the receives from all the other processes (including
         myself) */

      for (i = 0; i < nprocs; i++)
	if (IMPI_Are_local(i, myrank, comm)) {
	  shadow_rank = IMPI_Rank_impi2shadow(i, comm);
	  MPI_Irecv(((char*) rbuf) + (i * packsize), rcount, rdtype, 
		    shadow_rank, IMPI_GATHER_TAG, comm->c_shadow, 
		    &reqs[shadow_rank]);
	}

      /* Wait for them all to finish */

      MPI_Waitall(shadow_size + 1, reqs, MPI_STATUSES_IGNORE);
      free(reqs);
    }

    /* Otherwise, send to the root */

    else
      MPI_Send(sbuf, scount, sdtype, shadow_root_rank, IMPI_GATHER_TAG,
	       comm->c_shadow);
  }    
  else {
    /* gather send buffers at the local master into tmpbuf */

    int impi_master_rank = IMPI_Local_master_rank(root, comm);
    int shadow_master_rank = IMPI_Rank_impi2shadow(impi_master_rank, comm);
    MPI_Gather(sbuf, scount, sdtype, tmpbuf, rcount, rdtype,
	       shadow_master_rank, comm->c_shadow);
  }

  /* At this point the master must have a buffer tmpbuf
   * containing a concatenation in rank order of the
   * local processes packed send buffers.
   */

  /* global phase */
  if ((myrank == root) || (IMPI_Is_master(myrank, comm) 
      && !IMPI_Are_local(myrank, root, comm))) {

    if (nmasters <= IMPI_MAX_LINEAR_GATHER) {
      /* linear gather to root */
      if (myrank == root) {
        for (i = 0, size = 0; i < nmasters; i++) {
          if (i == IMPI_Local_master_num(root, comm))
            continue;            /* skip root's node */

          MPI_Recv(tmpbuf + size, nprocs * packsize, MPI_BYTE, 
                   IMPI_Master_rank(i, comm), IMPI_GATHER_TAG, comm, &status);
          MPI_Get_count(&status, MPI_BYTE, &count);
          size += count;
        }
      } else {
        size = IMPI_Num_local_to_rank(myrank, comm) * packsize;
        MPI_Send(tmpbuf, size, MPI_BYTE, root, IMPI_GATHER_TAG, comm); 
      }
    } else {
      /* tree gather to root */
      mynum = IMPI_Local_master_num(myrank, comm);
      rootnum = IMPI_Local_master_num(root, comm);
      vnum = (mynum - rootnum + nmasters) % nmasters;

      if (myrank == root)
          size = 0;
      else
        size = IMPI_Num_local_to_rank(myrank, comm) * packsize;

      for (mask = 1; mask < nprocs; mask <<= 1) {
        if (vnum & mask) {
          peer = IMPI_Master_rank(((vnum & ~mask) + rootnum) % nmasters, comm);
          if (IMPI_Are_local(peer, root, comm))
            peer = root;

          MPI_Send(tmpbuf, size, MPI_BYTE, peer, IMPI_GATHER_TAG, comm);
          break;
        }
        else {
          peer = vnum | mask;
          if (peer >= nmasters) continue;
          peer = IMPI_Master_rank((peer + rootnum) % nmasters, comm);
          if (IMPI_Are_local(peer, root, comm))
            peer = root;

          MPI_Recv(tmpbuf+size, nprocs*packsize, MPI_BYTE, peer, 
                   IMPI_GATHER_TAG, comm, &status);
          MPI_Get_count(&status, MPI_BYTE, &count);
          size += count;
        }
      }
    }
  }

  /* local phase */
  if (myrank == root) {
    /* unpack the data in tmpbuf into the receive buffer; */

    if (nmasters <= IMPI_MAX_LINEAR_GATHER) {
    /* For the linear gather to root, tmpbuf contains, concatenated in
       order of master rank, the concatenations of the process send
       buffers created in the first local phase.  */

      /* Traverse each of the masters */

      for (i = 0; i < nmasters; i++) {

	/* Is this master me (i.e., the root)?  If so, skip, because
           I've already received the data into the destination
           buffer. */
	int master_rank = IMPI_Master_rank(i, comm);
	



	/* Continue here....*/


      }
    } else {
      /* For the tree gather to root, the order of these send buffers
	 can be circularly rotated by master rank number (skipping
	 over the root, which has been put directly in the root's
	 receive buffer already).  */
    }
  }

  if (IMPI_Is_master(myrank, comm) || myrank == root)
    free(tmpbuf);

  return MPI_SUCCESS;
#endif
}
