/*
 * adjlog.c:   Compute an clock adjustment, given some synchronizing events.
*
 * Algorithm:
 *     Build a matrix of time events; solve it for the offset and skew for
 *     each clock.  For the first pass, this "matrix" will have just the 
 *     "synchronization" events.
 *
 * This version uses floating-point (IEEE-double) to do the computations,
 * since 53 bits is enough to hold most expected clock values
 *
 * This is the formula:
 * Processor 0 has the standard clock.
 * The time is formed as:
 * 
 * gtime = (ltime - ltime0) * (1 + dp) + gtime0
 *
 * where 1+dp is a clock skew (if the clocks all run at the exact same
 * rate, dp == 0).  The values that are computed are gtime0 and dp; we
 * define ltime0 as the time of the first synchronization observation.
 * To compute dp, we record the times at two points in the program
 * where the processors have been synchronized (note that synchronization
 * is not exact).  Let these two points be ls0 and ls1, with gs0 and gs1
 * the points for the global reference clock (processor 0).  Then we
 * have:
 * 
 *            gs1 - gs0   gs1 - ls1 + ls1 - (gs0 - ls0 + ls0)
 * (1 + dp) = --------- = -----------------------------------
 *            ls1 - ls0              ls1 - ls0
 *
 *                gs1 - ls1 - (gs0 - ls0)
 *          = 1 + -----------------------
 *                      ls1 - ls0
 * 
 * I'd really like to measure dp directly rather than by forming the
 * difference.  
 *
 * Now, to compute gtime0 for each processor, we can approximate it
 * by taking ltime0 (the time of the first sync).  However, this is
 * only approximate.  We can adjust this value by doing various 
 * pairwise exchanges.  More on this later (like when I implement it).
 * 
 */

#include <stdio.h>
#include <math.h>
#include "tools.h"

/* For now, we just handle a set of timing events (np-1 of them)
   between processor i and i+1 (processor 0 participates in only
   1 event) */
typedef struct {
    double  a1, b1, a2;         /* Times for the events */
    int     p0, p1;             /* processors that participated in
					 this time-exchange */
    } OffsetEvents;

/* 
   Convert from local to global time.

   mint = lt0[p]
   sk   = skew[p]
   goff = globaloffset[p]
 */
double BLOGGlobalTime( time, mint, sk, goff )
double time, mint, sk, goff;
{
double gtime;

gtime = (time - mint) * skew + globaloffset;
return gtime;
}

/*
    This routine takes offset events and solves for the offsets.  The
    approach is:

    Let the global time be given by (local_time - offset)*scale ,
    with a different offset and scale on each processor.  Each processor
    originates exactly one communication event (except processor 0),
    generating an a1 and a2 event.  A corresponding number of b2 events
    are generated, but note that one processor may have more than 1 b2
    event (if using Dunnigan's synchronization, there will be np-1 b2 events
    on processor 0, and none anywhere else).

    These events are:

   pi   a1 (send to nbr)                        (recv) a2
   pj                     (recv) b1 (send back)

    We base the analysis on the assumption that in the GLOBAL time
    repreresentation, a2-a1 is twice the time to do a (send) and
    a (recv).  This is equivalent to assuming that global((a1+a2)/2) ==
    global(b1).  Then, with the unknowns the offsets (the scales
    are assumed known from the syncevent calculation), the matrix is

    1
    -s0 s1
       ....
       -sj ... si

    where si is the scale for the i'th processor (note s0 = 1).
    The right hand sides are (1/2)(a1(i)+a2(i)) *s(i) - b1(j)*s(j).
    Because of the triangular nature of the matrix, this reduces to

       o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j))

    Note that if s(i)==s(j) and b1 == (a1+a2)/2, this gives o(i)==o(j).

    This works with ANY triangular matrix; we can use a master-slave
    version (all exchange with processor 0), a log-tree version
    (everyone exchanges with binary tree parent), or a linear version
    (2p+1 exchanges with 2p).  Others are possible.    
 */
int BLOGComputeOffsets( np, offsetevents, skew, globaloffset, lt0, lt1 )
int          np;
OffsetEvents *offsetevents;
double       *lt0, *lt1, *skew, *globaloffset;
{
int    i, j;
double d1, delta;

/* Set the defaults */
for (i=0; i<np; i++) {
    /* Skew is computed from the time differences between two syncs */
    skew[i]         = (lt1[0] - lt0[0]) / (lt1[i] - lt0[i]);
    /* Global offset is estimated as the first sync time */
    globaloffset[i] = lt0[i];  /*   - mintime ?? ; */
    }

/* If there aren't enough events, return */
if (noffsetevents != np - 1) {
    if (noffsetevents != 0) 
	fprintf( stderr, 
	   "Incorrect number of offset events to compute clock offsets\n" );
    else
	fprintf( stderr, "No clock offset events\n" );
    return 0;
    }

/* Take globaloffset[0] from sync */
for (i=1; i<np; i++) {
    /* o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j)) */
    j     = offsetevents[i].p1;
    d1    = (offsetevents[i].a2 + offsetevents[i].a1)/2;
    delta = (skew[j] / skew[i]) * (offsetevents[i].b1 - globaloffset[j] );

    globaloffset[i] = d1 - delta;
    }
return 1;
}

