/**
 * IMPLEMENTATION NOTES.
 * 
 * The sparse array here implemented is basically an hash table where the hash
 * is the index associated to each stored element.
 * 
 * To evaluate the performances of the hash table, several indicators can are
 * calculated.
 * 
 * The hash table contains N slots, each slot points to a
 * concatenated list of s(i) elements with i=1,...,N. The total number of
 * elements in the table is then
 * 
 *     sisum = sum s(i)  with  i=1,...,N
 * 
 * The max slot length is
 * 
 *     maxsi = max( s(i) with i=1,...,N )
 * 
 * The number of used slots is the number of slots containing at least one element:
 * 
 *     used = no. of slots for which s(i) >= 1
 * 
 * The average slot length calculated over the used slots is
 * 
 *     avg = sisum / used
 * 
 * The variance of the slot length calculated over the used slots is
 * 
 *     var = ( sum (s(i) - avg)^2 ) / used   i= over the s(i)>=1 only
 *         = ( sum s(i)^2 )/used - avg^2   i= over all the slots
 * 
 * Here we define the quality factor Q as the average number of comparisons
 * needed to find the elements of the hash table, assuming all the elements
 * to find are in the hash table and have the same probability. The number of
 * comparisons to find back the s(i) elements in the i-th slot of the hash table
 * is given by the well known Gauss formula
 * 
 *     s(i)*(s(i) + 1)/2
 * 
 * The total number of comparisons to find all the elements of the hash
 * table is the sum of this expression over all the entries of the hash table:
 * 
 *     totalcmp = sum s(i)*(s(i)+1)/2   for   i=1,...,N
 * 
 * Since sum s(i) is the total number of elements, the mean number of
 * comparisons is the ratio between the total number of comparisons and the
 * total number of elements:
 * 
 *     Q = totalcmp / sum s(i) = (sum s(i)^2 / sum s(i) + 1)/2
 * 
 * This number is then a good indicator of the hash table quality.
 * The minimum value is Q=1 when s(i) are all either 0 or 1.
 * The maximum value is Q=(sum s(i) + 1)/2 when all the elements fall into
 * a single slot of the hash table and the search degenerates into a sequential
 * search. A value of, say, Q=1.2 means that for a uniform distribution of
 * elements to search there will be an average of 1.2 comparisons to find the
 * given entry in the hash table.
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>

#include "memory.h"

#define sparsearray_IMPORT
#include "sparsearray.h"
#undef sparsearray_IMPORT


typedef struct _Slot {
	struct _Slot * next;
	int index;
	void * entry;
} Slot;

struct sparsearray_Type {
	
	/** If elements must not be released. */
	int keep_elements;
	
	/** Length of the hash table, each entry containing a list of slots. */
	int size;
	
	/** Array of pointer to list of slots. Each list of slots collect elements
	 * whose hashes (calculated from the index) collide. */
	Slot ** slots;
};


static void sparsearray_destruct(void *p)
{
	sparsearray_Type * sa = (sparsearray_Type *) p;
	int i;
	Slot *s, *snext;
	
	if( sa == NULL )
		return;
	
	for(i = sa->size-1; i >= 0; i--){
		s = sa->slots[i];
		while(s != NULL){
			if( ! sa->keep_elements )
				memory_dispose(s->entry);
			snext = s->next;
			memory_dispose(s);
			s = snext;
		}
	}
	memory_dispose(sa->slots);
}


sparsearray_Type * sparsearray_new(int size, int keep_elements)
{
	sparsearray_Type * sa;
	int p;

	/*
	 * Sizes the table to the smallest power of two minus 1 not less that the
	 * expected number of elements to store. Experimentally this give to me the
	 * smallest values of the mean number of comparisons (Q) for the purposes
	 * of this application.
	 */
	p = 3;
	while(p < size)
		p = (p << 1) + 1;
	
	size = p;
	sa = memory_allocate(sizeof(sparsearray_Type), sparsearray_destruct);
	sa->keep_elements = keep_elements;
	sa->size = size;
	sa->slots = memory_allocate(size * sizeof(Slot *), NULL);
	memset(sa->slots, 0, size * sizeof(Slot *));
	return sa;
}


/**
 * Calculates the index to the slots array given the index of the element.
 * @param sa Involved sparse array.
 * @param index Client's provided index of the element.
 * @return Index to the slots array.
 */
static int getIndexHash(sparsearray_Type *sa, int index)
{
	return ((index >= 0)? index : - index - 1) % sa->size;
}

/**
 * Retrieves a slot given the index and the corresponding calculated hash.
 * @param sa Involved sparse array.
 * @param index Client's provided index of the element.
 * @param hash Hash calculated from the index.
 * @return Found slot corresponding to the element at the given index, or NULL
 * if not found.
 */
static Slot * getSlot(sparsearray_Type * sa, int index, int hash)
{
	Slot *s;
	
	s = sa->slots[hash];
	while( s != NULL ){
		if( s->index == index )
			return s;
		s = s->next;
	}
	return NULL;
}


void sparsearray_set(sparsearray_Type *sa, int index, void *entry)
{
	int hash;
	Slot *s;
	
	hash = getIndexHash(sa, index);
	s = getSlot(sa, index, hash);
	if( s != NULL ){
		if( ! sa->keep_elements )
			memory_dispose(s->entry);
		s->entry = entry;
		return;
	}
	s = memory_allocate(sizeof(Slot), NULL);
	s->next = sa->slots[hash];
	s->index = index;
	s->entry = entry;
	sa->slots[hash] = s;
}


void * sparsearray_get(sparsearray_Type * sa, int index)
{
	Slot *s;
	
	s = getSlot(sa, index, getIndexHash(sa, index));
	if( s == NULL )
		return NULL;
	else
		return s->entry;
}


double sparsearray_getAverageComparisonsPerSearch(sparsearray_Type *sa)
{
	int i, si, simax, sisum, used;
	double si2sum;
	Slot *s;
	
	simax = 0;
	sisum = 0;
	si2sum = 0.0;
	used = 0;
	for(i = sa->size-1; i >= 0; i--){
		s = sa->slots[i];
		if( s != NULL )
			used++;
		si = 0;
		while(s != NULL){
			si++;
			s = s->next;
		}
		if( si > simax )
			simax = si;
		sisum += si;
		si2sum += si*si;
	}
	return 0.5*(si2sum/sisum + 1.0);
}


void sparsearray_report(sparsearray_Type *sa, char * title)
{
	int i, si, simax, sisum, used;
	double si2sum, avg, var, Q;
	Slot *s;
	
	printf("Sparse array report about %s:\n", title);
	simax = 0;
	sisum = 0;
	si2sum = 0.0;
	used = 0;
	for(i = sa->size-1; i >= 0; i--){
		s = sa->slots[i];
		if( s != NULL )
			used++;
		si = 0;
		while(s != NULL){
			si++;
			s = s->next;
		}
		if( si > simax )
			simax = si;
		sisum += si;
		si2sum += si*si;
	}
	avg = (double) sisum / used; /* mean length of occupied slots */
	var = si2sum / used - avg*avg; /* variance of the occupied slot length */
	Q = 0.5*(si2sum/sisum + 1.0); /* mean no. of comparisons per search */
	printf("    %d total table length\n", sa->size);
	printf("    %d occupied table entries\n", used);
	printf("    %d max table entry length\n", simax);
	printf("    %g average table entry length\n", avg);
	printf("    %g variance table entry length\n", var);
	printf("    %g mean no. of comparisons per search\n", Q);
}