43#include "EST_simplestats.h"
44#include "EST_rw_status.h"
56class EST_bracketed_string {
62 int find_num_nodes(LISP
string);
63 int set_leaf_indices(LISP
string,
int i,LISP *symbols);
64 int num_leafs(LISP l)
const;
65 void find_valid(
int i,LISP t)
const;
69 EST_bracketed_string();
71 EST_bracketed_string(LISP
string);
73 ~EST_bracketed_string();
76 void set_bracketed_string(LISP
string);
78 int length()
const {
return p_length;}
80 LISP string()
const {
return bs; }
82 const EST_String symbol_at(
int i)
const
83 {
return EST_String(get_c_string(car(symbols[i]))); }
85 int valid(
int i,
int k)
const {
return valid_spans[i][k]; }
88 int operator !=(
const EST_bracketed_string &a)
const
89 {
return (!(
this == &a)); }
90 int operator ==(
const EST_bracketed_string &a)
const
91 {
return ((
this == &a)); }
93 friend ostream& operator << (ostream &s,
const EST_bracketed_string &a)
94 { (void)a; s <<
"[a bracketed string]" << endl;
return s; }
101enum est_scfg_rtype {est_scfg_unset, est_scfg_binary_rule,
102 est_scfg_unary_rule};
126 est_scfg_rtype p_type;
130 EST_SCFG_Rule() {p_type=est_scfg_unset; p_prob=0;}
132 EST_SCFG_Rule(
const EST_SCFG_Rule &r)
133 {p_mother = r.p_mother; p_daughter1 = r.p_daughter1;
134 p_daughter2 = r.p_daughter2; p_type=r.p_type; p_prob = r.p_prob;}
136 EST_SCFG_Rule(
double prob,
int p,
int m);
138 EST_SCFG_Rule(
double prob,
int p,
int q,
int r);
140 double prob()
const {
return p_prob;}
142 void set_prob(
double p) { p_prob=p;}
144 est_scfg_rtype type()
const {
return p_type; }
146 int mother()
const {
return p_mother;}
150 int daughter1()
const {
return p_daughter1;}
152 int daughter2()
const {
return p_daughter2;}
154 void set_rule(
double prob,
int p,
int m);
156 void set_rule(
double prob,
int p,
int q,
int r);
182 int p_distinguished_symbol;
188 void rule_prob_cache();
190 void delete_rule_prob_cache();
196 EST_SCFG(LISP
rules);
208 int distinguished_symbol()
const {
return p_distinguished_symbol; }
226 double prob_B(
int p,
int q,
int r)
const {
return p_prob_B[p][q][r]; }
228 double prob_U(
int p,
int m)
const {
return p_prob_U[p][m]; }
254class EST_SCFG_traintest :
public EST_SCFG {
267 double f_I_cal(
int c,
int p,
int i,
int k);
269 double f_I(
int c,
int p,
int i,
int k)
271 if ((r=inside[p][i][k]) != -1)
return r;
272 else return f_I_cal(c,p,i,k); }
274 double f_O_cal(
int c,
int p,
int i,
int k);
276 double f_O(
int c,
int p,
int i,
int k)
278 if ((r=outside[p][i][k]) != -1)
return r;
279 else return f_O_cal(c,p,i,k); }
285 double f_P(
int c,
int p);
287 void reestimate_rule_prob_B(
int c,
int ri,
int p,
int q,
int r);
289 void reestimate_rule_prob_U(
int c,
int ri,
int p,
int m);
291 void reestimate_grammar_probs(
int passes,
295 const EST_String &outfile);
297 double cross_entropy();
299 void init_io_cache(
int c,
int nt);
301 void clear_io_cache(
int c);
303 EST_SCFG_traintest();
304 ~EST_SCFG_traintest();
316 void test_crossbrackets();
326 void load_corpus(
const EST_String &filename);
336 void train_inout(
int passes,
340 const EST_String &outfile);
345LISP scfg_bracketing_only(LISP parse);
348void count_bracket_crossing(
const EST_bracketed_string &ref,
349 const EST_bracketed_string &test,
int terminal(const EST_String &m) const
Convert terminal string to index.
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
int num_nonterminals() const
Number of nonterminals.
int num_terminals() const
Number of terminals.
double prob_B(int p, int q, int r) const
The rule probability of given binary rule.
void find_terms_nonterms(EST_StrList &nt, EST_StrList &t, LISP rules)
EST_read_status load(const EST_String &filename)
Load grammar from named file.
LISP get_rules()
Return rules as LISP list.
void set_rule_prob_cache()
(re-)set rule probability caches
SCFGRuleList rules
The rules themselves.
EST_write_status save(const EST_String &filename)
Save current grammar to named file.
EST_String terminal(int m) const
Convert terminal index to string form.
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
double prob_U(int p, int m) const
The rule probability of given unary rule.
int nonterminal(const EST_String &p) const
Convert nonterminal string to index.