#ifndef lint
static char SCCSid[] = "@(#) ./comm/global/gscat.c 07/23/93";
#endif

#include "comm/comm.h"
#include "comm/procset.h"
#include "comm/global/global.h"
#include <stdio.h>

void gscattersetT( buf, size, issrc, procset, datatype )
char    *buf;
int     size, issrc;
ProcSet *procset;
int     datatype;
{
/* We will cheat here.  If src != the top node of the procset, send it there.
   then distribute it using the usual tree. */
int l_child, r_child, parent, root, myid, np;
int msgup, msgdn, lphase;

if (!procset) {
    if (_PIPARENT < -1)
	PISetupCollectiveTree( );
    l_child = _PILCHILD;
    r_child = _PIRCHILD;
    parent  = _PIPARENT;
    _PIPHASE= _PIPHASE ? 0 : 1;
    lphase  = _PIPHASE;
    root    = 0;
    myid    = MYPROCID;
    np      = NUMNODES;
    }
else {
    myid    = procset->myid;
    np      = procset->nump;
    l_child = procset->l_child;
    r_child = procset->r_child;
    parent  = procset->parent;
    root    = procset->root;
    GMSGPHASE(procset,lphase);
    /*
    printf( "[%d] gscatter nbrs are parent=%d,l=%d,r=%d,num=%d, phase=%d\n",
	    myid, parent, l_child, r_child, procset->npset, lphase ); 
    fflush( stdout );
    */
    if (procset->npset <= 1) return;
    }
msgup = GMSGTYPE(procset,MSG_UP|lphase);
msgdn = GMSGTYPE(procset,MSG_DN|lphase);

/* Get the data to the root of the tree */
if (issrc && myid != root) {
    SENDSYNCNOMEM(msgup|np,buf,size,root,datatype);
    }
else if (myid == root && !issrc) {
    RECVSYNCNOMEM(msgup|np,buf,size,datatype);
    }

/* From here, we can send the data down */
/* We probably actually want to pick the method based on some threshold
   (only use forcetypes on those systems that support them when the
   threshold size is exceeded) */
#if defined(NO_FORCE)
if (parent >= 0) {
    RECVSYNCNOMEM(msgdn,buf,size,datatype);
    }
if (l_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,l_child,datatype);
if (r_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,r_child,datatype);
#else
if (_PIRRSIZE < 0 || size < _PIRRSIZE || 1) {
    if (parent >= 0) {
	RECVSYNCNOMEM(msgdn,buf,size,datatype);
	}
    if (l_child >= 0)
	SENDSYNCNOMEM(msgdn,buf,size,l_child,datatype);
    if (r_child >= 0)
	SENDSYNCNOMEM(msgdn,buf,size,r_child,datatype);
    }
else {
    /* This isn't correct since the parent needs to know WHICH child
       sent the ready message */
    if (parent >= 0) {
	SENDSYNCNOMEM(msgup,buf,0,parent,MSG_OTHER);
	RECVSYNCNOMEMFORCE(msgdn,buf,size,datatype);
	}
    if (l_child >= 0) {
	int d;
	RECVSYNCNOMEM(msgup,&d,0,MSG_OTHER);
	SENDSYNCNOMEMFORCE(msgdn,buf,size,RECVFROM(),datatype);
	}
    if (r_child >= 0) {
	int d;
	RECVSYNCNOMEM(msgup,&d,0,MSG_OTHER);
	SENDSYNCNOMEMFORCE(msgdn,buf,size,RECVFROM(),datatype);
	}
    }
#endif
}

#ifdef FOO
void gscattersetT( buf, size, issrc, procset, datatype )
char    *buf;
int     size, issrc;
ProcSet *procset;
int     datatype;
{
/* We will cheat here.  If src != the top node of the procset, send it there.
   then distribute it using the usual tree. */
int l_child, r_child, parent, root, myid, np;
int msgup, msgdn;

if (!procset) {
    if (_PIPARENT < -1)
	PISetupCollectiveTree( );
    l_child = _PILCHILD;
    r_child = _PIRCHILD;
    parent  = _PIPARENT;
    _PIPHASE= _PIPHASE ? 0 : 1;
    lphase  = _PIPHASE;
    }
else {
    myid    = procset->myid;
    np      = procset->nump;
    l_child = procset->l_child;
    r_child = procset->r_child;
    parent  = procset->parent;
    root    = procset->root;
    }
msgup = GMSGTYPE(procset,MSG_UP);
msgdn = GMSGTYPE(procset,MSG_DN);

/* Get the data to the root of the tree */
if (issrc && myid != root) {
    SENDSYNCNOMEM(msgup|np,buf,size,root,datatype);
    }
else if (myid == root && !issrc) {
    RECVSYNCNOMEM(msgup|np,buf,size,datatype);
    }

/* From here, we can send the data down */
if (parent >= 0) {
    RECVSYNCNOMEM(msgdn,buf,size,datatype);
    }
if (l_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,l_child,datatype);
if (r_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,r_child,datatype);
}
#endif

/* This is like gscatterset, but the source is known by everyone.
   It needs to be improved so that it really takes advantage of the 
   known source. */
void gscattersetsrcT( buf, size, src, procset, datatype )
char    *buf;
int     size, src;
ProcSet *procset;
int     datatype;
{
/* We will cheat here.  If src != the top node of the procset, send it there.
   then distribute it using the usual tree. */
int l_child, r_child, parent, root, myid, np;
int msgup, msgdn, lphase;

if (!procset) {
    if (_PIPARENT < -1)
	PISetupCollectiveTree( );
    l_child = _PILCHILD;
    r_child = _PIRCHILD;
    parent  = _PIPARENT;
    _PIPHASE= _PIPHASE ? 0 : 1;
    lphase  = _PIPHASE;
    root    = 0;
    }
else {
    myid    = procset->myid;
    np      = procset->nump;
    l_child = procset->l_child;
    r_child = procset->r_child;
    parent  = procset->parent;
    root    = procset->root;
    GMSGPHASE(procset,lphase);
    }
msgup = GMSGTYPE(procset,MSG_UP|lphase);
msgdn = GMSGTYPE(procset,MSG_DN|lphase);

/* Get the data to the root of the tree */
if (src == myid && myid != root) {
    SENDSYNCNOMEM(msgup|np,buf,size,root,datatype);
    }
else if (myid == root && src != myid) {
    RECVSYNCNOMEM(msgup|np,buf,size,datatype);
    }

/* From here, we can send the data down */
if (parent >= 0) {
    RECVSYNCNOMEM(msgdn,buf,size,datatype);
    }
if (l_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,l_child,datatype);
if (r_child >= 0)
    SENDSYNCNOMEM(msgdn,buf,size,r_child,datatype);
}

/*
   This is a recursive scatter that sends to np/2^i for i=I0,...,0,
   where I0 is the initial size.
 */
void gscattersetR( buf, size, issrc, procset, datatype )
char    *buf;
int     size, issrc;
ProcSet *procset;
int     datatype;
{
/* We will cheat here.  If src != the top node of the procset, send it there.
   then distribute it using recursive subdivision */
int lidx, npset, root, myid, np;
int msgup, msgdn, lphase, i0, mask, j0, phase, nbr;

if (!procset) {
    _PIPHASE= _PIPHASE ? 0 : 1;
    lphase  = _PIPHASE;
    root    = 0;
    myid    = MYPROCID;
    np      = NUMNODES;
    npset   = np;
    lidx    = myid;
    }
else {
    myid    = procset->myid;
    np      = procset->nump;
    npset   = procset->npset;
    lidx    = procset->lidx;
    root    = procset->root;
    GMSGPHASE(procset,lphase);
    if (procset->npset <= 1) return;
    }
msgup = GMSGTYPE(procset,MSG_UP|lphase);
msgdn = GMSGTYPE(procset,MSG_DN|lphase);

/* Get the data to the root of the tree */
/* Eventually we can do this by rotating lidx to the node that is the root */
if (issrc && myid != root) {
    SENDSYNCNOMEM(msgup|np,buf,size,root,datatype);
    }
else if (myid == root && !issrc) {
    RECVSYNCNOMEM(msgup|np,buf,size,datatype);
    }

/* Find my I0 based on my relative index
   (eventually, this should be part of the procset structure).
   This is the method (all id's are relative):
   The initial node is node 0.
   At each step, we send to 2^j, where j is decreasing.  The
   "phase" of a node is simply the number of bits minus the index of the 
   first non-zero bit, reading from the RIGHT. 
 */

/* Compute the initial phase and IO */
i0   = 1;
mask = 1;
j0   = 0;
while (mask < npset) {
    if ((mask & lidx) && !j0 ) j0 = i0;
    i0++;
    mask <<= 1;
    }
mask >>= 1;
phase = 0;
if (j0 > 0) 
    j0    = i0 - j0;
#if defined(NO_FORCE)
while (mask > 0) {
    if (phase == j0-1) {
	/* note that nbr is (lidx - mask) mod npset */
	RECVSYNCNOMEM(msgdn,buf,size,datatype);
	}
    if (phase >= j0) {
	nbr = lidx + mask;
	if (nbr > npset) nbr -= npset;
	if (procset) nbr = procset->node_nums[nbr];
	SENDSYNCNOMEM(msgdn,buf,size,nbr,datatype);
	}
    phase++;
    mask >>= 1;
    }
#else
if (_PIRRSIZE < 0 || size < _PIRRSIZE) {
    while (mask > 0) {
	if (phase == j0-1) {
	    /* note that nbr is (lidx - mask) mod npset */
	    RECVSYNCNOMEM(msgdn,buf,size,datatype);
	    }
	if (phase >= j0) {
	    nbr = lidx + mask;
	    if (nbr > npset) nbr -= npset;
	    if (procset) nbr = procset->node_nums[nbr];
	    SENDSYNCNOMEM(msgdn,buf,size,nbr,datatype);
	    }
	phase++;
	mask >>= 1;
	}
    }
else {
    int rid, b, nidx;
    while (mask > 0) {
	if (phase == j0-1) {
	    nbr = lidx - mask;
	    if (nbr < 0) nbr += npset;
	    if (procset) nbr = procset->node_nums[nbr];
	    RECVASYNCNOMEMFORCE(msgdn,buf,size,datatype,rid);
	    SENDSYNCNOMEM(msgdn|lidx,buf,0,nbr,MSG_OTHER);
	    RECVWAITNOMEMFORCE(msgdn,buf,size,datatype,rid);
	    }
	if (phase >= j0) {
	    nbr  = lidx + mask;
	    if (nbr > npset) nbr -= npset;
	    nidx = nbr;
	    if (procset) nbr = procset->node_nums[nbr];
	    RECVSYNCNOMEM(msgdn|nidx,&b,sizeof(int),MSG_OTHER);
	    SENDSYNCNOMEMFORCE(msgdn,buf,size,nbr,datatype);
	    }
	phase++;
	mask >>= 1;
	}
    }
#endif
}

