44#include "EST_SCFG_Chart.h"
46EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge()
50EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge(
double prob,
60EST_SCFG_Chart_Edge::~EST_SCFG_Chart_Edge()
64LISP EST_SCFG_Chart::print_edge(
int start,
int end,
int p,
73 else if (start+1 == end)
76 LISP r = cons(rintern(grammar->nonterminal(p)),
77 cons(flocons(e->
prob()),
80 cons(rintern(grammar->terminal(e->
d1())),
87 EST_SCFG_Chart_Edge *d1, *d2;
89 d1 = edges[start][e->
pos()][e->
d1()];
90 d2 = edges[e->
pos()][end][e->
d2()];
93 cons(print_edge(start,e->
pos(),e->
d1(),d1),
94 cons(print_edge(e->
pos(),end,e->
d2(),d2),
96 LISP r = cons(rintern(grammar->nonterminal(p)),
97 cons(flocons(e->
prob()),
105EST_SCFG_Chart::EST_SCFG_Chart()
111 grammar_local = TRUE;
112 grammar =
new EST_SCFG;
115EST_SCFG_Chart::~EST_SCFG_Chart()
128 grammar_local = FALSE;
129 grammar = &imported_grammar;
152 for (n_vertices=1,p=s; p != e; p=inext(p))
156 for (n=0,p=s; p != e; p=inext(p),n++)
158 int term = grammar->terminal(p->f(name).
string());
161 cerr <<
"SCFG_Chart: unknown terminal symbol \"" <<
162 p->f(name).
string() <<
"\"" << endl;
169void EST_SCFG_Chart::delete_edge_table()
173 if (wfst == 0)
return;
175 for (i=0; i < n_vertices; i++)
178 for (j=0; j < n_vertices; j++)
181 if (edges[i][j][k] != emptyedge)
182 delete edges[i][j][k];
183 delete [] edges[i][j];
196void EST_SCFG_Chart::setup_edge_table()
199 int nt = grammar->num_nonterminals();
201 wfst =
new EST_SCFG_Chart_Edge*[n_vertices];
202 edges =
new EST_SCFG_Chart_Edge ***[n_vertices];
203 emptyedge =
new EST_SCFG_Chart_Edge(0,0,0,0);
205 for (i=0; i < n_vertices; i++)
208 edges[i] =
new EST_SCFG_Chart_Edge **[n_vertices];
209 for (j=0; j < n_vertices; j++)
211 edges[i][j] =
new EST_SCFG_Chart_Edge *[nt];
212 for (k=0; k < nt; k++)
218double EST_SCFG_Chart::find_best_tree_cal(
int start,
int end,
int p)
222 int best_q = -1, best_r = -1;
223 double best_prob = 0;
227 best_prob = grammar->prob_U(p,wfst[start]->d1());
229 edges[start][end][p] =
230 new EST_SCFG_Chart_Edge(best_prob*wfst[start]->prob(),
231 wfst[start]->d1(),0,-1);
233 edges[start][end][p] = emptyedge;
239 double s=0,t_prob,left,right;
241 int nt = grammar->num_nonterminals();
243 for (q=0; q < nt; q++)
244 for (r=0; r < nt; r++)
246 double pBpqr = grammar->prob_B(p,q,r);
249 for (j=start+1; j < end; j++)
251 left=find_best_tree(start,j,q);
254 right = find_best_tree(j,end,r);
255 t_prob = pBpqr * left * right;
256 if (t_prob > best_prob)
270 edges[start][end][p] =
271 new EST_SCFG_Chart_Edge(s,best_q,best_r,best_j);
273 edges[start][end][p] = emptyedge;
281 if (n_vertices - 1 > 0)
282 find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
291 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
296 return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
318 for (num_words=0,p=s; p != e; p=inext(p))
321 if (num_words != (n_vertices-1))
323 cerr <<
"SCFG_Chart: extract_parse, number of items in link stream " <<
324 " different from those in parse tree" << endl;
331 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
339 extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
342 if ((force) && (!daughter1(ss)))
343 extract_forced_parse(0,n_vertices-1,ss,w);
348void EST_SCFG_Chart::extract_forced_parse(
int start,
int end,
356 s->append_daughter(w);
357 s->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
362 extract_forced_parse(start,start+1,s->append_daughter(),w);
363 EST_Item *st = s->append_daughter();
364 st->set_name(grammar->nonterminal(grammar->distinguished_symbol()));
366 EST_Item *nw = inext(w);
367 extract_forced_parse(start+1,end,st,nw);
371void EST_SCFG_Chart::extract_edge(
int start,
int end,
int p,
382 else if (start+1 == end)
385 s->append_daughter((*word));
386 s->set_name(grammar->nonterminal(p));
387 s->
set(
"prob",(
float)e->
prob());
388 *word = inext(*word);
394 EST_SCFG_Chart_Edge *d1, *d2;
396 d1 = edges[start][e->
pos()][e->
d1()];
397 d2 = edges[e->
pos()][end][e->
d2()];
400 s->append_daughter();
401 s->append_daughter();
403 extract_edge(start,e->
pos(),e->
d1(),d1,daughter1(s),word);
404 extract_edge(e->
pos(),end,e->
d2(),d2,daughter2(s),word);
406 s->set_name(grammar->nonterminal(p));
407 s->
set(
"prob",(
float)e->
prob());
413void EST_SCFG_chart_load_relation(
EST_Relation &s,LISP sent)
419 for (w=sent; w != NIL; w=cdr(w))
425 word->set_name(get_c_string(car(car(w))));
426 if (consp(car(cdr(car(w)))))
427 for (f=car(cdr(car(w))); f != NIL; f=cdr(f))
429 if (FLONUMP(car(cdr(car(f)))))
430 word->
set(get_c_string(car(car(f))),
431 get_c_float(car(cdr(car(f)))));
433 word->
set(get_c_string(car(car(f))),
434 get_c_string(car(cdr(car(f)))));
437 word->
set(
"name",get_c_string(car(cdr(car(w)))));
440 word->
set(
"name",get_c_string(car(w)));
460LISP scfg_parse(LISP
string, LISP grammar)
469 EST_SCFG_chart_load_relation(words,
string);
477LISP scfg_parse(LISP
string,
EST_SCFG &grammar)
486 EST_SCFG_chart_load_relation(words,
string);
494LISP scfg_bracketing_only(LISP parse)
496 if (consp(siod_nth(4,parse)))
500 for (d=cdr(cdr(cdr(cdr(parse)))),ds=NIL; d != NIL; d=cdr(d))
501 ds = cons(scfg_bracketing_only(car(d)),ds);
505 return siod_nth(4,parse);
509void EST_SCFG_traintest::test_crossbrackets()
519 int fully_contained=0;
521 for (c=0; c < corpus.length(); c++)
523 LISP flat = siod_flatten(corpus.a_no_check(c).string());
525 parse = scfg_parse(flat,*
this);
532 EST_bracketed_string parsed(scfg_bracketing_only(parse));
535 count_bracket_crossing(corpus.a_no_check(c),parsed,vs);
542 cout <<
"cross bracketing " << cb.mean()*100 <<
" (" << failed <<
543 " failed " << (float)(100.0*fully_contained)/corpus.length() <<
544 "% fully consistent from " << corpus.length()
545 <<
" sentences)" << endl;
549void count_bracket_crossing(
const EST_bracketed_string &ref,
550 const EST_bracketed_string &test,
555 if (ref.length() != test.length())
557 EST_error(
"bracket_crossing: sentences of different lengths");
560 for (i=0; i < ref.length(); i++)
561 for (j=i+1; j <= ref.length(); j++)
562 if (test.valid(i,j) == 1)
564 if (ref.valid(i,j) == 0)
void set(const EST_String &name, int ival)
int pos(void)
Postion, 0 1 or 2, where 0 is empty, 1 is incomplete 2 is complete.
double prob(void)
Edge probability.
int d1()
(Non)terminal of daughter 1
int d2()
(Non)terminal of daughter 2
void extract_parse(EST_Relation *syn, EST_Relation *word, int force=0)
Extract parse tree and add it to syn linking leafs to word.
void setup_wfst(EST_Relation *s, const EST_String &name="name")
void parse()
Parses the loaded WFST with the loaded grammar.
LISP find_parse()
Return the parse in full LISP form.
void set_grammar_rules(LISP r)
Initialize from LISP rules set.
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
int num_nonterminals() const
Number of nonterminals.
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
const EST_String & string(void) const