
/*
 *           PVM 3.2:  Parallel Virtual Machine System 3.2
 *               University of Tennessee, Knoxville TN.
 *           Oak Ridge National Laboratory, Oak Ridge TN.
 *                   Emory University, Atlanta GA.
 *      Authors:  A. L. Beguelin, J. J. Dongarra, G. A. Geist,
 *    W. C. Jiang, R. J. Manchek, B. K. Moore, and V. S. Sunderam
 *                   (C) 1992 All Rights Reserved
 *
 *                              NOTICE
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted
 * provided that the above copyright notice appear in all copies and
 * that both the copyright notice and this permission notice appear in
 * supporting documentation.
 *
 * Neither the Institutions (Emory University, Oak Ridge National
 * Laboratory, and University of Tennessee) nor the Authors make any
 * representations about the suitability of this software for any
 * purpose.  This software is provided ``as is'' without express or
 * implied warranty.
 *
 * PVM 3.2 was funded in part by the U.S. Department of Energy, the
 * National Science Foundation and the State of Tennessee.
 */

/*
 *	pvmdmimd.c
 *
 *  MPP interface.
 *
 *		void mpp_init(int argc, char **argv):	
 *			Initialization. Create a table to keep track of active nodes.
 *			argc, argv: passed from main.
 *
 *		int mpp_load(int flags, char *name, char *argv, int count, int tids[],
 *					int ptid):
 *			Load executable onto nodes; create new entries in task table,
 *			encode node number and process type into task IDs, etc.
 *
 *				Construction of Task ID:
 *
 *				 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1
 *				+-+-+---------------------+-+-----------+---------------------+
 *				|s|g|		host index	  |n| instance  |	node # (2048)	  |
 *				+-+-+---------------------+-+-----------+---------------------+
 *
 *				The "n" bit is set for node task but clear for host task.
 *
 *			flags:	exec options;
 *			name:	executable to be loaded;
 *			argv:	command line argument for executable
 *			count:	number of tasks to be created;
 *			tids:	array to store new task IDs;
 *			ptid:	parent task ID.
 *
 *			mpp_new(int count, int ptid):		
 *				Allocate a set of nodes. (called by mpp_load())
 *				count: number of nodes;  ptid: parent task ID.
 *
 *		void mpp_output():	
 *			Send all pending packets to nodes via native send. Node number
 *			and process type are extracted from task ID.
 *
 *		int mpp_mcast(int src, struct pkt pp, int tids[], int ntask):	
 *			Global send.
 *			src:	source task ID;
 *			pp:		packet;
 *			tids:	list of destination task IDs;
 *			ntask:	how many.
 *
 *		int mpp_probe():	
 *			Probe for pending packets from nodes (non-blocking). Returns
 *			1 if packets are dectected, otherwise 0.
 *
 *		void mpp_input():	
 *			Receive pending packets (from nodes) via native recv.
 *
 *		struct task *mpp_find(int pid):
 *			Find a task in task table by its Unix pid.
 *
 *		void mpp_free(int tid):
 *			Remove node/process-type from active list.
 *			tid: task ID.
 *
$Log: pvmdmimd.c,v $
 * Revision 1.1  1993/08/30  23:35:09  manchek
 * Initial revision
 *
 */
/*
 * Jul 10 16:50:52 1993
 * 		changed mcast tids[] array from int to short.
 *
 */

#include <sys/param.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <signal.h>
#ifdef  SYSVSTR
#include <string.h>
#else
#include <strings.h>
#endif

#include "pvm3.h"
#include "global.h"
#include "ddpro.h"
#include "tdpro.h"
#include "protoglarp.h"
#include "pvmalloc.h"
#include "mesg.h"
#include "pkt.h"
#include "task.h"
#include "listmac.h"
#include "pvmmimd.h"
#include "bfunc.h"

#ifndef min
#define min(a,b)    ((a)<(b)?(a):(b))
#endif

/* Global */

extern int debugmask;			/* from pvmd.c */
extern char **epaths;			/* from pvmd.c */
extern int myhostpart;			/* from pvmd.c */
extern int myndf;				/* from pvmd.c */
extern int tidhmask;			/* from pvmd.c */
extern struct task *locltasks;	/* from task.c */
extern int ourudpmtu;			/* from pvmd.c */

int tidtmask = TIDPTYPE;		/* mask for ptype field of tids */
int tidnmask = TIDNODE;			/* mask for node field of tids */

/* private */

static char rcsid[] = "$Id: pvmdmimd.c,v 1.1 1993/08/30 23:35:09 manchek Exp $";
static struct nodeset *busynodes;	/* active nodes; ordered by proc type */
static char etext[512];			/* scratch for error log */
static int ptypemask;			/* mask; we use these bits of ptype in tids */


void
mpp_init(argc, argv)
	int *argc;
	char **argv;
{
	busynodes = TALLOC(1, struct nodeset, "nsets");
	BZERO((char*)busynodes, sizeof(struct nodeset));
	busynodes->n_link = busynodes;
	busynodes->n_rlink = busynodes;

	ptypemask = tidtmask >> (ffs(tidtmask) - 1);
}

/*
 * allocate a set of nodes; assign ptype sequentially, but give the same
 * ptype to all siblings if possible (so they can send msg directly)
 */
struct nodeset *
mpp_new(count, ptid)
	int count;		/* number of nodes requested */
	int ptid;		/* parent's tid */
{
	struct nodeset *sp, *newp;
	int last = -1;
	int ptype = 0;
	int i;

	if (!(newp = TALLOC(1, struct nodeset, "nsets"))) {
		pvmlogerror("nodes_new() can't get memory\n");
		pvmbailout(0);
	}
	BZERO((char*)newp, sizeof(struct nodeset));
	
	for (i = 1; i < NUMPTYPE; i++, ptype++) {
		for (sp = busynodes->n_link; sp != busynodes; sp = sp->n_link)
			if (ptype == sp->n_ptype)
			/* these bits are used in tid's type field; must be unique */
				break;
		if (sp == busynodes)
			goto done;		
	}
	pvmlogerror("mpp_new() out of descriptors: too many spawns\n");

done:
	if (debugmask & PDMNODE) {
		sprintf(etext, "mpp_new() %d nodes %d ... ptype=%d ptid=%x\n",
			count, last+1, ptype, ptid);
		pvmlogerror(etext);
	}
	newp->n_ptype = ptype;
	newp->n_ptid = ptid;
	newp->n_alive = newp->n_size = count;
	LISTPUTAFTER(sp, newp, n_link, n_rlink);

	return newp;
}

/* remove nodes/ptype from active list */
void
mpp_free(tid)
	int tid;
{
	struct nodeset *sp;
	int node;
	int ptype;
	struct task *tp;

	if (!TIDISNODE(tid))
		return;

	node = tid & tidnmask;
	ptype = TIDTOTYPE(tid);
	for (sp = busynodes->n_link; sp != busynodes; sp = sp->n_link) {
		if ((sp->n_ptype & ptypemask) == ptype) {

			if (debugmask & PDMNODE) {
				sprintf(etext, "mpp_free() t%x type=%ld alive=%d\n",
					tid, sp->n_ptype, sp->n_alive);
				pvmlogerror(etext);
			}
			if (--sp->n_alive == 0) {
#if 0
				if (busynodes->n_ptype <= sp->n_ptype)
				/* ptype cannot be recycled */
					busynodes->n_ptype = sp->n_ptype + 1;
#endif
				if (tp = task_find(sp->n_ptid)) 
					tp->t_flag |= TF_CLOSE;
				LISTDELETE(sp, n_link, n_rlink);
				PVM_FREE(sp);
			}
			return;
		}
	}
	sprintf(etext, "mpp_free() t%x not active\n", tid);
	pvmlogerror(etext);
}

/* load executable onto the given set of nodes */
int
mpp_load(flags, name, argv, count, tids, ptid)
	int flags;              /* exec options */
	char *name;             /* executable */
	char **argv;            /* arg list (argv[-1] must be there) */
	int count;				/* how many */
	int tids[];				/* array to store new tids */
	int ptid;				/* parent task ID */
{
	int j;
	int ptypepart;
	struct task *tp;
	struct pkt *hosttxq;		/* out-going queue of pvmhost */
	int err = 0;
	struct nodeset *sp;
	char c[32];

	sp = mpp_new(count, ptid);
	ptypepart = (sp->n_ptype << (ffs(tidtmask) - 1)) | TIDONNODE;
	sprintf(c, "PVMENTASK=%d", ptypepart + count - 1);
	pvmputenv(c);

	if (err = forkexec(flags, name, argv, 0, (char **)0, &tp))
		goto done;
	tp->t_ptid = ptid;
	PVM_FREE(tp->t_a_out);
	sprintf(c, "%s.host", name);
	tp->t_a_out = STRALLOC(c);
	sp->n_ptid = tp->t_tid;			/* pvmhost's tid */
	hosttxq = tp->t_txq;

	if (debugmask & PDMTASK) {
		sprintf(etext, "mpp_load() %d type=%d ptid=%x t%x...\n",
			count, sp->n_ptype, ptid, myhostpart + ptypepart);
		pvmlogerror(etext);
	}

	/* create new task structs */

	for (j = 0; j < count; j++) {
		tp = task_new(myhostpart + ptypepart + j);
		tp->t_a_out = STRALLOC(name);
		tp->t_ptid = ptid;
		tp->t_flag |= TF_CONN;		/* no need for the auth crap */
		tids[j] = tp->t_tid;
		PVM_FREE(tp->t_txq);
		tp->t_txq = hosttxq;		/* node tasks share pvmhost's txq */
	}
	return 0;

done:
	for (j = 0; j < count; j++)
		tids[j] = err;
	return err;
}


/* use global send to multicast */
int
mpp_mcast(src, pp, tids, ntask)
	int src;		/* sender's tid */
	struct pkt *pp;	/* packet to send */
	int tids[];		/* target tasks */
	int ntask;		/* how many */
{
	int i;
	int count = 0;
	struct nodeset *sp;
	int ptype;
	int ptypepart;
	short *dsts = 0;
	char *cp;
	struct pkt *pp2;
	struct task *tp;
	int len;

	ptype = TIDTOTYPE(tids[0]);
	for (sp = busynodes->n_link; sp != busynodes; sp = sp->n_link)
		if ((sp->n_ptype & ptypemask) == ptype) {
			ptype = sp->n_ptype;
			break;
		}
	if (sp == busynodes) {
		sprintf(etext, "mpp_mcast() pkt from t%x scrapped (no ptype)\n", src);
		pvmlogerror(etext);
		return -1;
	}

	ptypepart = tids[0] & tidtmask;
	dsts = TALLOC(ntask, short, "mc_dsts");
	for (i = 0; i < ntask; i++) {
		if ((tids[i] & tidtmask) != ptypepart 
				|| (tids[i] & tidhmask) != myhostpart)
			continue;
		dsts[count++] = tids[i] & tidnmask;
	}
	if (count) {
		len = count*sizeof(short);
		pp2 = pk_new(TDFRAGHDR + len);
		pp2->pk_src = TIDPVMD;
		pp2->pk_dst = sp->n_ptid;
		pp2->pk_dat += TDFRAGHDR;
		BCOPY(dsts, pp2->pk_dat, len);
		pp2->pk_len = len;
		pp2->pk_flag = FFSOM | FFEOM;
		if (!(tp = task_find(sp->n_ptid))) {
			sprintf(etext, "mpp_mcast() pkt to pvmhost t%x scrapped\n",
				sp->n_ptid);
			pvmlogerror(etext);
			return -1;
		}
		LISTPUTBEFORE(tp->t_txq, pp2, pk_link, pk_rlink);
		LISTPUTBEFORE(tp->t_txq, pp, pk_link, pk_rlink);
		if (debugmask & PDMMESSAGE) {
			sprintf(etext, "mpp_mcast() pkt from t%x len=%d to type=%d %d\n",
					src, pp->pk_len, ptype, count);
			pvmlogerror(etext);
		}
		PVM_FREE(dsts);
	}
	return 0;
}


void
mpp_kill(tp, signum)
	struct task *tp;
	int signum;
{
	if (TIDISNODE(tp->t_tid)) {
		if (signum == SIGKILL || signum == SIGTERM) {
			mpp_free(tp->t_tid);
			task_cleanup(tp);
			tp->t_txq = 0;		/* don't free pvmhost's txq */
			task_free(tp);
		} else {
			sprintf(etext,"mpp_kill() signal %d to node t%x ignored\n",
				signum, tp->t_tid);
			pvmlogerror(etext);
		}
	} else
		(void)kill(tp->t_pid, signum);
}


/* break pkt into smaller fragments and put them on txq */
pkt_to_task(tp, pp)
	struct task *tp;
	struct pkt *pp;
{
	struct pkt *pp2;
	int lim = ourudpmtu - TDFRAGHDR;
	char *cp = pp->pk_dat;
	int togo;
	int n;
	int ff = pp->pk_flag & FFSOM;
	int fe = pp->pk_flag & FFEOM;

	for (togo = pp->pk_len; togo > 0; togo -= n) {
		n = min(togo, lim);
		pp2 = pk_new(0);
		pp2->pk_src = pp->pk_src;
		pp2->pk_dst = pp->pk_dst;
		if (n == togo)
			ff |= fe;
		pp2->pk_flag = ff;
		ff = 0;
		pp2->pk_buf = pp->pk_buf;
		pp2->pk_max = pp->pk_max;
		pp2->pk_dat = cp;
		pp2->pk_len = n;
		da_ref(pp->pk_buf);
		cp += n;
		LISTPUTBEFORE(tp->t_txq, pp2, pk_link, pk_rlink);
	}
	pk_free(pp);
	return 0;
}


/*
 * Add pvmhost's socket to wfds if there are packets waiting to
 * be sent to a related node task. Node tasks have no sockets;
 * they share pvmhost's packet queue (txq). Pvmhost simply
 * forwards any packets it receives to the appropriate node.
 */

void
mpp_output(dummy1, dummy2)
	struct task *dummy1;
	struct pkt *dummy2;
{
	struct nodeset *sp;
	struct task *tp;

	for (sp = busynodes->n_link; sp != busynodes; sp = sp->n_link)
		if ((tp = task_find(sp->n_ptid)) && tp->t_txq->pk_link->pk_buf)
			wrk_fds_add(tp->t_sock, 2);
}
