40#ifndef __EST_SIMPLESTATS_H__
41#define __EST_SIMPLESTATS_H__
43#include "EST_String.h"
45#include "EST_StringTrie.h"
50typedef size_t int_iter;
64 EST_StrVector namevector;
71 EST_Discrete() {nametrie.
clear(); p_def_val = -1;}
73 EST_Discrete(
const EST_Discrete &d) { copy(d); }
75 EST_Discrete(
const EST_StrList &vocab);
79 void copy(
const EST_Discrete &d);
81 bool init(
const EST_StrList &vocab);
84 const int length(
void)
const {
return namevector.length(); }
90 return (((i=(
int*)nametrie.lookup(n)) != NULL) ? *i : p_def_val);
106 friend ostream& operator <<(ostream& s,
const EST_Discrete &d);
110 { copy(a);
return *
this; }
120 Discretes() {max=50;next_free=0;discretes=
new EST_Discrete*[max];}
122 const int def(
const EST_StrList &members);
123 EST_Discrete &discrete(
const int t)
const {
return *discretes[t-10];}
124 EST_Discrete &operator [] (
const int t)
const {
return *discretes[t-10];}
143 EST_SuffStats() {n = p_sum = p_sumx = 0.0;}
145 EST_SuffStats(
double in,
double isum,
double isumx)
146 {n = in; p_sum = isum; p_sumx = isumx;}
148 EST_SuffStats(
const EST_SuffStats &s) { copy(s); }
150 void copy(
const EST_SuffStats &s)
151 {n=s.n; p_sum = s.p_sum; p_sumx = s.p_sumx;}
153 void reset(
void) {n = p_sum = p_sumx = 0.0;}
154 void set(
double in,
double isum,
double isumx)
155 {n = in; p_sum = isum; p_sumx = isumx;}
157 double samples(
void) {
return n;}
159 double sum() {
return p_sum; }
161 double sumx() {
return p_sumx; }
163 double mean(
void)
const {
return (n==0)?0.0:(p_sum / n); }
165 double variance(
void)
const
166 {
return ((n*p_sumx)-(p_sum*p_sum))/((double)n*(n-1)); }
168 double stddev(
void)
const {
return sqrt(variance()); }
170 void cumulate(
double a,
double count=1.0)
171 { n+=count; p_sum+=a*count; p_sumx+=count*(a*a); }
174 EST_SuffStats &operator +=(
double a)
175 { cumulate(a,1.0);
return *
this;}
177 EST_SuffStats &operator + (
double a)
178 { cumulate(a,1.0);
return *
this;}
180 EST_SuffStats &operator = (
const EST_SuffStats &a)
181 { copy(a);
return *
this;}
184enum EST_tprob_type {tprob_string, tprob_int, tprob_discrete};
210class EST_DiscreteProbDistribution {
219 EST_StrD_KVL scounts;
221 EST_DiscreteProbDistribution() : type(tprob_string), discrete(NULL), icounts(0), scounts() {
init();}
223 EST_DiscreteProbDistribution(
const EST_DiscreteProbDistribution &b);
233 const double n_samples,
244 bool init(
const EST_StrList &vocab);
250 double samples(
void)
const {
return num_samples; }
254 void cumulate(EST_Litem *i,
double count=1);
255 void cumulate(
int i,
double count=1);
263 double probability(
const EST_String &s)
const;
265 double probability(
const int i)
const;
269 double frequency(
const int i)
const;
273 EST_Litem *
item_next(EST_Litem *idx)
const;
const EST_Discrete *const get_discrete() const
Returns discrete vocabulary of distribution.
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index.
void set_num_samples(const double c)
EST_Litem * item_start() const
Used for iterating through members of the distribution.
EST_DiscreteProbDistribution(const EST_Discrete *d)
Create using given \Ref{EST_Discrete} class as the vocabulary.
void item_prob(EST_Litem *idx, EST_String &s, double &prob) const
During iteration returns name and probability given index.
const EST_String & most_probable(double *prob=NULL) const
Return the most probable member of the distribution.
~EST_DiscreteProbDistribution()
Destructor function.
const EST_String & item_name(EST_Litem *idx) const
During iteration returns name given index.
double samples(void) const
Total number of example found.
void clear(void)
Reset, clearing all counts and vocabulary.
EST_DiscreteProbDistribution(const EST_TList< EST_String > &vocab)
Create with given vocabulary.
void override_frequency(const EST_String &s, double c)
Sets the frequency of named item, without modifying {\tt num_samples}.
double entropy(void) const
void copy(const EST_DiscreteProbDistribution &b)
Copy all data from another DPD to this.
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.
void set_frequency(const EST_String &s, double c)
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
bool init(const EST_StrList &vocab)
(re-)initialise
const int index(const EST_String &n) const
void def_val(const EST_String &v)
set the default value when a name isn't found (-1 by default)
const EST_String & name(const int n) const
The name given the index.
const int length(void) const
The number of members in the discrete.
int name(const EST_String &n) const
An alternative method for getting the int form the name.
void clear(void)
Delete the tree.