/* dap6.c -- categorical models */

/*  Copyright (C) 2001  Susan Bassein
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "dap_make.h"
#include "externs.h"

extern dataobs dap_obs[];
extern FILE *dap_lst;
extern FILE *dap_err;
extern char *dap_dapname;

static double *allparam;
static char *sel;
static char *selred;
static double (*ex)();
static double **tab;
static int nc;

static double loglike(double *selparam)
{
int s;
int p;
double ll;
int c;
double expected;

for (s = 0, p = 0; sel[s]; s++)
	{
	if (sel[s] != '0')
		allparam[s] = selparam[p++];
	}
for (ll = 0.0, c = 0; c < nc; c++)
	{
	expected = ex(allparam, tab[c] + 1);	/* skip cell count */
	ll += tab[c][0] * log(expected) - expected;
	}
return ll;
}

static void categ1(double **tab, int ncell, int *varv, int nvar,
			double (*expect)(), double *param, char *select)
{
int sparam;	/* number of selected parameters */
int sparamr;	/* number of selected parameters in reduced model */
int nparam;	/* number of parameters */
int c;
double *x;
double *xch;
int p;
double step, tol;
double tmp;
int v;
double likerat;
double likered;
double pearson;
int typen;
double *infomem;
double **info;
int p1, p2;
double lpp, lpm, lmp, lmm;
double h, halfh;
int s;

sparamr = 0;
likered = 0.0;
if ((typen = dap_varnum("_type_")) < 0)
	{
	fputs("(categ1) missing _type_ variable\n", dap_err);
	exit(1);
	}
nc = ncell;
ex = expect;
x = (double *) dap_malloc(sizeof(double) * strlen(sel), "");
xch = (double *) dap_malloc(sizeof(double) * strlen(sel), "");
if (selred)
	{
	sel = selred;
	for (nparam = 0, sparamr = 0; sel[nparam]; nparam++)
		{
		if (sel[nparam] != '0')
			{
			allparam[nparam] = param[nparam];
			sparamr++;
			}
		else
			allparam[nparam] = 0.0;
		}
	for (p = 0, nparam = 0; sel[nparam]; nparam++)
		{
		if (sel[nparam] != '0')
			x[p++] = param[nparam];
		}
	for (step = 0.0, p = 0; p < sparamr; p++)
		{
		tmp = x[p];
		step += tmp * tmp;
		}
	if (step > 0.0)
		step = 0.1 * sqrt(step);
	else
		step = 0.01;
	tol = 0.0000005 * step;
	dap_maximize(&loglike, sparamr, x, step, tol, "");
	for (c = 0, likerat = 0.0; c < ncell; c++)
		{
		likered += (tab[c][0] + dap_addtozero) *
			log((tab[c][0] + dap_addtozero) / expect(allparam, tab[c] + 1));
		output();
		}
	likered *= 2.0;
	}
sel = select;
for (nparam = 0, sparam = 0; sel[nparam]; nparam++)
	{
	if (sel[nparam] != '0')
		{
		allparam[nparam] = param[nparam];
		sparam++;
		}
	else
		allparam[nparam] = 0.0;
	}
for (p = 0, nparam = 0; sel[nparam]; nparam++)
	{
	if (sel[nparam] != '0')
		x[p++] = param[nparam];
	}
for (step = 0.0, p = 0; p < sparam; p++)
	{
	tmp = x[p];
	step += tmp * tmp;
	}
if (step > 0.0)
	step = 0.1 * sqrt(step);
else
	step = 0.01;
tol = 0.0000005 * step;
dap_maximize(&loglike, sparam, x, step, tol, "");
for (c = 0, likerat = 0.0, pearson = 0.0; c < ncell; c++)
	{
	for (v = 0; v < nvar; v++)
		dap_obs[0].do_dbl[varv[v]] = tab[c][v];
	strcpy(dap_obs[0].do_str[typen], "OBS");
	output();
	strcpy(dap_obs[0].do_str[typen], "FIT");
	dap_obs[0].do_dbl[varv[0]] = expect(allparam, tab[c] + 1);
	likerat += (tab[c][0] + dap_addtozero) *
		log((tab[c][0] + dap_addtozero) / dap_obs[0].do_dbl[varv[0]]);
	tmp = dap_obs[0].do_dbl[varv[0]] - tab[c][0];
	pearson += tmp * tmp / dap_obs[0].do_dbl[varv[0]];
	output();
	}
likerat *= 2.0;
infomem = (double *) dap_malloc(sizeof(double) * sparam * sparam, "");
info = (double **) dap_malloc(sizeof(double *) * sparam, "");
for (p = 0; p < sparam; p++)
	info[p] = infomem + p * sparam;
h = 0.0001;
halfh = h / 2.0;
for (p1 = 0; p1 < sparam; p1++)
	{
	for (p = 0; p < sparam; p++)
		xch[p] = x[p];
	lpm = loglike(xch);
	xch[p1] += h;
	lpp = loglike(xch);
	xch[p1] = x[p1] - h;
	lmm = loglike(xch);
	info[p1][p1] = -(lpp - 2.0 * lpm + lmm) / (h * h);
	}
for (p1 = 0; p1 < sparam; p1++)
	for (p2 = 0; p2 < p1; p2++)
		{
		for (p = 0; p < sparam; p++)
			xch[p] = x[p];
		xch[p1] += halfh;
		xch[p2] += halfh;
		lpp = loglike(xch);
		xch[p1] = x[p1] - halfh;
		lmp = loglike(xch);
		xch[p2] = x[p2] - halfh;
		lmm = loglike(xch);
		xch[p1] = x[p1] + halfh;
		lpm = loglike(xch);
		info[p1][p2] = -(lpp - lpm - lmp + lmm) / (h * h);
		info[p2][p1] = info[p1][p2];
		}
if (!dap_invert(info, sparam))
	{
	fputs("(categ1) covariance matrix is singular\n", dap_err);
	exit(1);
	}
fputs("Maximum likelihood estimation\n\n", dap_lst);
fprintf(dap_lst, "Cell count: %s\n", dap_obs[0].do_nam[varv[0]]);
fputs("Class and aux variables:", dap_lst);
for (v = 1; v < nvar; v++)
	fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[v]]);
putc('\n', dap_lst);
fputs("\nStatistic              df      Prob\n", dap_lst);
fprintf(dap_lst, "G2[Model]   = %6.2f  %3d    %.4f\n",
	likerat, ncell - sparam,
	ceil(10000.0 * probchisq(likerat, ncell - sparam)) / 10000.0);
if (selred)
	{
	fprintf(dap_lst, "G2[Reduced] = %6.2f  %3d    %.4f\n",
		likered, ncell - sparamr,
		ceil(10000.0 * probchisq(likered, ncell - sparamr)) / 10000.0);
	fprintf(dap_lst, "G2[Diff]    = %6.2f  %3d    %.4f\n",
		likered - likerat, sparam - sparamr,
		ceil(10000.0 * probchisq(likered - likerat, sparam - sparamr)) / 10000.0);
	}
fprintf(dap_lst, "X2[Model]   = %6.2f  %3d    %.4f\n",
	pearson, ncell - sparam,
	ceil(10000.0 * probchisq(pearson, ncell - sparam)) / 10000.0);
putc('\n', dap_lst);
fputs("Parameters\n", dap_lst);
fputs("    Estimate         ASE\n", dap_lst);
for (p = 0; p < nparam; p++)
for (p = 0, s = 0; p < nparam; p++)
	{
	fprintf(dap_lst, "%12g ", allparam[p]);
	if (sel[p] != '0')
		{
		fprintf(dap_lst, "%12g ", sqrt(info[s][s]));
		s++;
		}
	putc('\n', dap_lst);
	}
dap_free(x);
dap_free(xch);
dap_free(infomem);
dap_free(info);
}

void categ(char *dataset, char *varlist, double (*expect)(),
			double *param, char *select, char *part)
{
int p;
char *catset;
int *varv;
int *partv;
int nvar;
int npart;
int more;
double *tabmem;
int v;
int ncell;
int s;

varv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
partv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
catset = dap_malloc(strlen(dataset) + 5, "");
strcpy(catset, dataset);
strcat(catset, ".cat");
inset(dataset);
outset(catset, "");
nvar = dap_list(varlist, varv, dap_maxvar);
npart = dap_list(part, partv, dap_maxvar);
tabmem = (double *) dap_malloc(sizeof(double) * nvar * dap_maxcell, "");
tab = (double **) dap_malloc (sizeof(double *) * dap_maxcell, "");
for (ncell = 0; ncell < dap_maxcell; ncell++)
	tab[ncell] = tabmem + ncell * nvar;
allparam = (double *) dap_malloc(sizeof(double) * strlen(select), "");
for (p = 0; p < strlen(select); p++)
	allparam[p] = param[p];
sel = select;
if (index(select, '?'))
	{
	selred = dap_malloc(strlen(select) + 1, "");
	for (s = 0; select[s]; s++)
		{
		if (select[s] == '?')
			selred[s] = '0';
		else
			selred[s] = select[s];
		}
	selred[s] = '\0';
	}
else
	selred = NULL;
for (ncell = 0, more = 1; more; )
	{
	more = step();
	if (dap_newpart(partv, npart))
		{
		dap_swap();
		dap_head(partv, npart);
		categ1(tab, ncell, varv, nvar, expect, param, select);
		dap_swap();
		ncell = 0;
		}
	for (v = 0; v < nvar; v++)
		tab[ncell][v] = dap_obs[0].do_dbl[varv[v]];
	ncell++;
	}
if (selred)
	{
	dap_free(selred);
	selred = NULL;
	}
dap_free(varv);
dap_free(partv);
dap_free(catset);
dap_free(tabmem);
dap_free(tab);
dap_free(allparam);
}
