static const char file_id[] = "CGRecurScheduler.cc";

#ifdef __GNUG__
#pragma implementation
#endif

#include "CGRecurScheduler.h"
#include "Galaxy.h"
#include "DataStruct.h"
#include "CGDDFWormhole.h"
#include "distributions.h"
#include "CodeBlock.h"
#include <math.h>

/**************************************************************************
Version identification:
@(#)CGRecurScheduler.cc	1.5	11/25/92

Copyright (c) 1990, 1991, 1992 The Regents of the University of California.
All rights reserved.

Permission is hereby granted, without written agreement and without
license or royalty fees, to use, copy, modify, and distribute this
software and its documentation for any purpose, provided that the above
copyright notice and the following two paragraphs appear in all copies
of this software.

IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 
ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 
THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 
SUCH DAMAGE.

THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
ENHANCEMENTS, OR MODIFICATIONS.
							COPYRIGHTENDKEY

 Programmer:  Soonhoi Ha

 Methods for CGRecurScheduler class.
**************************************************************************/

CGRecurScheduler :: CGRecurScheduler() {
	LOG_NEW; body = (Profile**) new Profile* [4];
	createSchedule();
}

CGRecurScheduler :: ~CGRecurScheduler() {
	LOG_DEL; delete body;
	body = 0;
}

int CGRecurScheduler :: getStatistics(Target* tg) {

	// read the type of the distribution
	if (!dist.setType(tg)) return FALSE;

	// set the paramter information
	return (dist.myParam()->setParams(tg)); 
}

int wrongTopology() {
	Error::abortRun("wrong Topology for recursion construct");
	return FALSE;
}

static int downLoaded;

int CGRecurScheduler :: examine(Galaxy& galaxy) {
	downLoaded = FALSE;
        Recur::clearSchedules();
        if (!Recur::checkTopology(galaxy)) {
                Error::abortRun(galaxy, " has a wrong topology for Recur construct");
                return FALSE;
        }
	return TRUE;
}


int CGRecurScheduler :: closerExamine() {

	int num = 0;
	pathLeng = 0;
	CGDDFWormhole* worm;
	CGDDFStar* wormStar;

	// check up the "schedules" list one by one.
	ListIter top(schedules[0]);
	Star* s;
	while ((s = (Star*) top++) != 0) {
		if (s->isItWormhole()) {
			num++;
			if (num > 1) {
				return wrongTopology();
			} else {
			   	wormStar = (CGDDFStar*) s;
			   	wormA = wormStar->myWormhole();
				body[0] = wormA->myProfile();
				pathLeng += wormA->getTotalWork();
			}
		} else break;
	}
			

	// non-recurion arc: check whether there is only one wormhole or more.
	if (schedules[1].size() != 1) {
		return wrongTopology();
	} else {
		wormStar = (CGDDFStar*) schedules[1].head();
		wormB = wormStar->myWormhole();
		body[1] = wormB->myProfile();
	}

	// recursion arc
	num = 0;
	int temp = 0;
	ListIter recur(schedules[2]);
	while ((s = (Star*) recur++) != 0) {
		if (s->isItWormhole()) {
			num++;
			if (num > 1) {
				return wrongTopology();
			} else {
			   	wormStar = (CGDDFStar*) s;
			   	worm = wormStar->myWormhole();
				pathLeng += worm->getTotalWork();
				if (!temp) {
					body[2] = worm->myProfile();
					wormC = worm;
				} else {
					body[3] = worm->myProfile();
					wormD = worm;
				}
			}
		} else {
			num = 0;
			temp++;
		}
	}

	numSelf = temp;
	return TRUE;
}

	///////////////////////
	//  assumeExecTime
	///////////////////////

int CGRecurScheduler :: assumeExecTime() {

	// get the assumed depth of recursion
	double avg = dist.myParam()->assumedValue();
	double depth;
	if (numSelf <= 1) depth = avg;
	else {
		depth = pow(numSelf, avg) - 1;
		depth = depth / (numSelf - 1);
	}

	return int(depth * double(pathLeng));
}

// local to this file
static int tau;
static int tauO;
static int bestK;		// level of parallelism.
static int bestX;		// assumed depth of recursion.
static int satisfy;		// flag to be set if bestX >= bestK after
				// the normal calculation.

	///////////////////////
	//  setProfile
	///////////////////////

int CGRecurScheduler :: setProfile(int num, int resWork,
			Profile* prof) {

	// compute the optimal number of assigned processors.
	Profile& pa = body[0][num - 1];
	Profile& pb = body[1][num - 1];
	Profile& pc = body[2][num - 1];
	Profile& pd = body[3][num - 1];
	
	// check the effective number of processors
	if (pa.getEffP() < num && pc.getEffP() < num &&
		pd.getEffP() < num) return FALSE;

	// calculate best K value.
	if (numSelf > 1) {
		double temp1 = double(numProcs) / double(num);
		double temp2 = log(temp1) / log(numSelf);
		bestK = int(temp2);
	} else
		bestK = 0;
		
	// calculate tau
	tau = pa.getMakespan() + pc.getMakespan() + pd.getMakespan();
	int preTau = pa.getMakespan() + pc.getMakespan();
	int postTau = pd.getMakespan();
	tauO = pa.getMakespan() + pb.getMakespan();

	// compute the profile
	// If the bestX is smaller than bestK, reduce bestK and repeat.
	double totalCost;
	satisfy = FALSE;
	int tempK = bestK;
	totalCost = calcCost(num, resWork);
	int saveX = bestX;
	while ((!satisfy) && bestK > 0) {
		bestK--;
		double tempCost = calcCost(num, resWork);
		if (tempCost < totalCost) {
			totalCost = tempCost;
			tempK = bestK;
			saveX = bestX;
		}
	} 
	bestK = tempK;
	bestX = saveX;

	// compute the makespan
	int span = 0;
	if (numSelf > 1) {
		float tmp1 = pow(numSelf,bestX-bestK);
		float tmp2 = (tmp1 - 1) / float(numSelf-1);
		span += tau * (int(tmp2) + bestK) + tauO * int(tmp1);
	} else {
		span += tau * bestX + tauO;
	}
	
	// record the profile: shrewed index manipulation.
	int m = int(pow(numSelf, bestK));
	int stop = 0;
	for (int i = 0; i <= bestK; i++) {

		// compute the offset.
		int move = i;
		int start = stop;
		stop = int(pow(numSelf, i));
		for (int j = start; j < stop; j++) {

			// record the profile.
			for (int k = 0; k < num; k++) {
				int index = k + j*num;
				prof->setStartTime(index, move*preTau);
				prof->setFinishTime(index,span - move*postTau);
			}
		}
	}
	
	// add control and synchronization code to the profile
	totalCost += addControlCodes(num, prof);

	prof->setEffP(m*num);
	prof->setTotalCost(totalCost);
	prof->summary();
	return TRUE;
}
	
        ///////////////////////
        //  addControlCodes
        ///////////////////////

int CGRecurScheduler :: addControlCodes(int num, Profile* prof) {

        int ct = myControlTime();
        int st = mySyncTime();
        int temp = (ct+st) * bestK;
	int total = 0;

        int stop = 0;
        for (int i = 0; i <= bestK; i++) {

                // adjust the profile
		// hard to interpret the code.... Sorry.
                int moveFront = i * ct;
		int moveBack  = temp - i * st;
		int added = moveBack - moveFront;
                int start = stop;
                stop = int(pow(numSelf, i));
                for (int j = start; j < stop; j++) {

                        // record the profile.
                        for (int k = 0; k < num; k++) {
                                int index = k + j*num;
				total += added;
                                prof->setStartTime(index,
				  prof->getStartTime(index) + moveFront); 
                                prof->setFinishTime(index,
				  prof->getFinishTime(index) + moveBack);
                        }
                }
        }
	return total;
}

	///////////////////////
	//  calcCost
	///////////////////////

double CGRecurScheduler :: calcCost(int num, int resWork) {

	// calculate bestX based on the distribution of the
	// the iteration steps.
	switch (dist.readType()) {
		case GEOMETRIC:	return costInGeometric(num,resWork);
		case UNIFORM:	return costInUniform(num,resWork);
		case GENERAL:	return costInGeneral(num,resWork);
		default:   return FALSE;
	}
}
		
// Geomoteric distribution
double CGRecurScheduler :: costInGeometric(int num, int resWork) {

	// calculate x
	double procRatio = double(num) / double(numProcs);
	DistGeometric* geo = (DistGeometric*) dist.myParam();
	double p = geo->getP();
	int tmin = geo->getMin();

	// value checking
	if (p * numSelf >= 1) {
		Error::abortRun("infinite time required for recursion");
		return 0;
	}

	double z = pow(numSelf,bestK);
	double ratio = log(procRatio * z) / log(p);
	bestX = adjustX(tmin, int(ratio), resWork, num);
	if (bestX < bestK) {
		bestX = bestK;
	} else {
		satisfy = TRUE;
	}
	
	// calculate total cost due to the non-deterministic part.
	double total;
	if (numSelf > 1) {
		double t11 = pow(numSelf, bestX);
		double t12 = (t11 - 1) / (numSelf - 1);
		double t1  = num*(tau*t12 + tauO*t11);
		double t21 = pow(p, bestX - tmin + 1)/(1 - p*numSelf);
		double t22 = pow(numSelf, bestX-bestK);
		double t23 = tau + tauO * (numSelf - 1);
		double t2  = numProcs * t23 * t22 * t21;
		total = t1 + t2;
	} else {
	        total = num * (tau * bestX + tauO) + numProcs*tau*
			pow(p, bestX - tmin + 1) / (1 - p);
	}
	return total;
}

// Uniform distribution
double CGRecurScheduler :: costInUniform(int num, int resWork) {

	// calculate x
	double procRatio = double(num) / double(numProcs);
	DistUniform* uni = (DistUniform*) dist.myParam();
	int tmax = uni->getMax();
	int tmin = uni->getMin();
	double p = tmax - tmin + 1;
	double tsum = procRatio * p * pow(numSelf, bestK);
	int tx = tmax - int(floor(tsum)) - tmin;
	bestX = adjustX(tmin, tx, resWork, num);
	if (bestX < bestK) {
		bestX = bestK;
	} else {
		satisfy = TRUE;
	}
	
	// calculate total cost
	double total;
	if (numSelf > 0) {
		double dk = double (numSelf);
		double t11 = pow(numSelf, bestX);
		double t12 = (t11 - 1) / (dk - 1);
		double t1 = num*(t12 + tauO * t11);
		double t21 = (dk-pow(numSelf,tmax-bestX+1))/(1 - dk)
			    - tmax + bestX;
		double t22 = pow(numSelf, bestX-bestK)/(dk - 1);
		double t23 = tau + tauO * (dk - 1);
		double t2  = numProcs * t23 * t22 * t21 / p;
		total = t1 + t2;
	} else {
		total = num * (tau * bestX + tauO) +
			tau*numProcs*(tmax - bestX)*(tmax - bestX + 1)/2/p;
	}

	return total;
}

// adjust "x" - if too much idle time, reduce "x".
int CGRecurScheduler :: adjustX(int min, int x, int resWork, int num) {

	double pk = pow(numSelf, bestK);	// # parallelism.
	if (numProcs == num * int(pk)) return (x + min);

	double make = double (resWork) / double (numProcs - num * int(pk));

	//makespan
	int tempx = x + min;
	double span;
	if (numSelf > 1) {
		span = (pow(numSelf,tempx-bestK+1)-numSelf)/(numSelf-1)+bestK;
	} else {
		span = tempx;
	}

	// compare with remaining work, the processor-time space in the
	// non-iteration processors.
	int newX = tempx;
	if ( (tau*int(span)) > int(make)) {
		// adjust "x" - reduce it.
		double y = make / double(tau);
		if (numSelf > 1) {
			y = (y - bestK) * (numSelf - 1) + numSelf;
			y = log(y) / log(numSelf) + bestK;
		}
		newX = int(y); 
	}
	if (newX < min) newX = min;
	return newX;
}
		

// General distribution
double CGRecurScheduler :: costInGeneral(int num, int resWork) {

	// calculate x
	double procRatio = double(num) / double(numProcs);
	DistGeneral* geo = (DistGeneral*) dist.myParam();
	double ratio = procRatio * pow(numSelf, bestK);
	double psum = 0;
	int i = 0;
	// scan the table to get the first guess of "x"
	while (psum < ratio) {
		psum += geo->getTable(i).value;
		i++;
	}
	int j = i - 1;
	int tempx = geo->getTable(i).index;

	// adjust "x" if too much idle time.
	double span;
	if (numSelf > 1) {
		span = (pow(numSelf,tempx-bestK+1)-numSelf)/(numSelf-1)+bestK;
	} else {
		span = tempx;
	}

	// compare with remaining work, the processor-time space in the
	double pk = pow(numSelf, bestK);
	if (numProcs > num * int(pk)) {
		double make = double(resWork)/double(numProcs-num*int(pk));
		// compare with remaining work, the processor-time space in 
		// the non-iteration processors.
		if ( (tau*int(span)) > int(make)) {
			// adjust "x" - reduce it.
			double y = make / double(tau);
			if (numSelf > 1) {
				y = (y - bestK) * (numSelf - 1) + numSelf;
				y = log(y) / log(numSelf) + bestK;
			}
			tempx = int(y);
			j = 0;
			while (geo->getTable(j).index > tempx)
				j++;
			j--;
		}
	}
	bestX = tempx;
	if (bestX < bestK) {
		bestX = bestK;
	} else {
		satisfy = TRUE;
	}

	// calculate total cost due to the non-deterministic part.
	double total;
	if (numSelf > 1) {
		double t1 = pow(numSelf,bestX);
		total = num * (tau * (t1 - 1)/(numSelf - 1) + tauO * t1);
	} else {
		total = num * (tau * bestX + tauO);
	}

	double sub = 0;
	for (i = 0; i <= j; i++) {
		if (numSelf > 1) {
			double nu = pow(numSelf,geo->getTable(i).index-bestK)
				- pow(numSelf, bestX-bestK);
			sub += geo->getTable(i).value * nu / (numSelf - 1);
		} else {
			sub += geo->getTable(i).value *
				(geo->getTable(i).index - bestX);
		}
	}
	total += sub*numProcs*(tau + tauO * (numSelf - 1));

	return total;
}

static int assignNum;
void examineProfile(int, Profile*);

        ///////////////////////
        //  download the code
        ///////////////////////

static CodeBlock Start("\n .... start of (Recur) ....\n");
static CodeBlock End("\n .... end of (Recur) ....\n");
static CodeBlock Preface("<PREFACE>\n");
static CodeBlock Recurrence("<RECURRENCE>\n");
static CodeBlock Appendix("<APPENDIX>\n");

void CGRecurScheduler :: downLoadCode(int ix, Target* tg, Profile* prof) {
	
	if (!downLoaded) {
		// examine the profile to figure out "assignNum" and  "bestK".
		examineProfile(numSelf, prof);
		downLoaded = TRUE;
	}

	/// processor index
	int pId = ix % assignNum;

        addCode(tg, Start.getText());

	int testFlag = (numSelf > 1) && (ix < pow(numSelf, bestK-1));
	if (testFlag) {
		addCode(tg, Preface.getText());
		// wormhole a,c
		wormA->downLoadCode(pId);
		wormC->downLoadCode(pId);

		// add control code
		int temp = ix / assignNum;
		int pgId;		// processor group index
		if (temp == 0) pgId = 0;
		else pgId = int (log(temp) / log(numSelf)) + 1;
		addCode(tg, controlCode(pgId));
	}

	addCode(tg, Recurrence.getText());
	wormA->downLoadCode(pId);
	addCode(tg, commonControlCode());
	wormB->downLoadCode(pId);
	wormC->downLoadCode(pId);
	addCode(tg, commonSyncCode());
	wormD->downLoadCode(pId);

	if (testFlag) {
		addCode(tg, Appendix.getText());
		// wormhole d
		wormD->downLoadCode(pId);

		// add control code
		int temp = ix / assignNum;
		int pgId;		// processor group index
		if (temp == 0) pgId = 0;
		else pgId = int (log(temp) / log(numSelf)) + 1;
		addCode(tg, syncCode(pgId));
	}

        addCode(tg, End.getText());
}

void examineProfile(int n, Profile* prof) {
	int m = prof->getEffP();
	int ix = 0;
	int maxVal = 0;

	while (ix < m && maxVal <= prof->getFinishTime(ix)) {
		maxVal = prof->getFinishTime(ix);
		ix++;
	} 

	assignNum = ix;
	float f1 = float(m) / float(assignNum);
	if (n > 1)
		bestK = int( log(f1) / log(float(n)));
}
	
// virtual methods --- default

static CodeBlock Control("-- control code of processor group\n ");
static CodeBlock CommonControl("-- common control code\n");
static CodeBlock Sync("-- sync code of processor group\n ");
static CodeBlock CommonSync("-- common sync code\n");

int CGRecurScheduler :: myControlTime() { return 2;}
int CGRecurScheduler :: mySyncTime() { return 2;}

const char* CGRecurScheduler :: controlCode(int) {
	return Control.getText();
}

const char* CGRecurScheduler :: commonControlCode() {
	return CommonControl.getText();
}

const char* CGRecurScheduler :: syncCode(int) {
	return Sync.getText();
}

const char* CGRecurScheduler :: commonSyncCode() {
	return CommonSync.getText();
}

