summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rwxr-xr-xdecoder/apply_fsa_models.cc63
-rwxr-xr-xdecoder/cfg.h21
-rwxr-xr-xdecoder/hg_cfg.h11
3 files changed, 79 insertions, 16 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 5493d29f..fd3e1289 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -24,6 +24,8 @@
#define DPFSA(x) x
//prefix trie
+#define DBUILDTRIE(x) x
+
#define PRINT_PREFIX 1
#if PRINT_PREFIX
# define IF_PRINT_PREFIX(x) x
@@ -146,6 +148,12 @@ struct PrefixTrieNode {
return ret;
}
+ unsigned size() const {
+ unsigned a=adj.size();
+ unsigned e=edge_for.size();
+ return a>e?a:e;
+ }
+
void print_back_str(std::ostream &o) const {
BPs back=back_vec();
unsigned i=back.size();
@@ -201,6 +209,7 @@ public:
m.clear();
for (int i=0;i<adj.size();++i) {
PrefixTrieEdge const& e=adj[i];
+ SHOWM2(DPFSA,"index_adj",i,e);
m[e.w]=e;
}
}
@@ -211,27 +220,49 @@ public:
// assert(e.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p.
NTHandle n=e.w;
assert(n>=0);
+ SHOWM3(DPFSA,"index_lhs",i,e,n);
v[n]=e.dest;
}
}
+ template <class PV>
+ void done_root(PV &v) {
+ done_building_r();
+// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it.
+ index_lhs(v);
+ }
+
// call only once.
void done_building_r() {
done_building();
for (int i=0;i<adj.size();++i)
- adj[i].dest->done_building_r();
+ if (adj[i].dest) // skip final edge
+ adj[i].dest->done_building_r();
}
// for done_building; compute incremental (telescoped) edge p
- PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type const& pair) const {
- PrefixTrieEdge &e=const_cast<PrefixTrieEdge&>(pair.second);
+ PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const {
+ PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second);
e.p=(e.dest->p)/p;
return e;
}
// call only once.
void done_building() {
+ SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1);
+#if 0
adj.reinit_map(edge_for,*this);
+#else
+ adj.reinit(edge_for.size());
+ Adj::iterator o=adj.begin();
+ for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) {
+ SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second);
+ PrefixTrieEdge &edge=i->second;
+ edge.p=(edge.dest->p)/p;
+ *o++=edge;
+// (*this)(*i);
+ }
+#endif
// if (final) p_final/=p;
std::sort(adj.begin(),adj.end());
//TODO: store adjacent differences on edges (compared to
@@ -294,12 +325,22 @@ public:
~PrefixTrieNode() {
destroy_children();
}
+ void print(std::ostream &o) const {
+ o << lhs << "->" << p;
+ o << ',' << size() << ',';
+ print_back_str(o);
+ }
+ PRINT_SELF(PrefixTrieNode)
};
//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT)
// trie ends with final edge, which points to a per-lhs prefix node
struct PrefixTrie {
+ void print(std::ostream &o) const {
+ o << cfgp << ' ' << root;
+ }
+ PRINT_SELF(PrefixTrie);
CFG *cfgp;
Rules const* rulesp;
Rules const& rules() const { return *rulesp; }
@@ -312,10 +353,9 @@ struct PrefixTrie {
PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) {
// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r
print_cfg=cfgp;
+ SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size());
cfg.VisitRuleIds(*this);
- root.done_building_r();
- root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing?
- root.index_lhs(lhs2);
+ root.done_root(lhs2);
}
void operator()(int ri) const {
@@ -323,12 +363,15 @@ struct PrefixTrie {
NTHandle lhs=r.lhs;
best_t p=r.p;
NodeP n=const_cast<PrefixTrieNode&>(root).build_lhs(lhs,p);
+ SHOWM3(DBUILDTRIE,"Prefixtrie rule id",ri,root,p);
for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) {
+ SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n);
if (i==e) {
n->set_final(lhs,p);
break;
}
n=n->build(*i,p);
+ SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n);
}
// root.build(lhs,r.p)->build(r.rhs,r.p);
}
@@ -390,8 +433,7 @@ struct ItemKey {
}
}
NTHandle lhs() const { return dot->lhs; }
- typedef ItemKey self_type;
- SELF_TYPE_PRINT
+ PRINT_SELF(ItemKey)
};
inline ItemHash hash_value(ItemKey const& x) {
return x.hash();
@@ -424,8 +466,7 @@ struct Item : ItemPrio,ItemKey {
o<<" next="<<next;
o<< ']';
}
- typedef Item self_type;
- SELF_TYPE_PRINT
+ PRINT_SELF(Item)
};
struct GetItemKey {
@@ -553,6 +594,8 @@ template <class F>
void ApplyFsa<F>::ApplyEarley()
{
hgcfg.GiveCFG(cfg);
+ print_cfg=&cfg;
+ print_fsa=&fsa;
Chart<F> chart(cfg,smeta,fsa);
// don't need to uniq - option to do that already exists in cfg_options
//TODO:
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 79ae6f33..95cb5fd7 100755
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -1,6 +1,8 @@
#ifndef CDEC_CFG_H
#define CDEC_CFG_H
+#define DVISITRULEID(x)
+
// for now, debug means remembering and printing the TRule behind each CFG rule
#ifndef CFG_DEBUG
# define CFG_DEBUG 1
@@ -133,6 +135,10 @@ struct CFG {
swap(f,o.f);
IF_CFG_TRULE(swap(rule,o.rule);)
}
+ friend inline void swap(Rule &a,Rule &b) {
+ a.Swap(b);
+ }
+
template<class V>
void visit_rhs_nts(V &v) const {
for (RHS::const_iterator i=rhs.begin(),e=rhs.end();i!=e;++i) {
@@ -195,6 +201,9 @@ struct CFG {
swap(ruleids,o.ruleids);
swap(from,o.from);
}
+ friend inline void swap(NT &a,NT &b) {
+ a.Swap(b);
+ }
};
CFG() : hg_() { uninit=true; }
@@ -294,14 +303,19 @@ struct CFG {
// call after rules are indexed.
template <class V>
void VisitRuleIds(V &v) {
- for (int i=0,e=nts.size();i<e;++i)
- for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j)
+ for (int i=0,e=nts.size();i<e;++i) {
+ SHOWM(DVISITRULEID,"VisitRuleIds nt",i);
+ for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) {
+ SHOWM2(DVISITRULEID,"VisitRuleIds",i,*j);
v(*j);
+ }
+ }
+
}
template <class V>
void VisitRuleIds(V const& v) {
for (int i=0,e=nts.size();i<e;++i)
- for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j)
+ for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j)
v(*j);
}
@@ -351,7 +365,6 @@ inline std::size_t hash_value(CFG::NT const& r) {
return r.hash_impl();
}
-
inline void swap(CFG &a,CFG &b) {
a.Swap(b);
}
diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h
index ba936990..b90aca47 100755
--- a/decoder/hg_cfg.h
+++ b/decoder/hg_cfg.h
@@ -7,8 +7,12 @@ class Hypergraph;
// in case you might want the CFG whether or not you apply FSA models:
struct HgCFG {
- HgCFG(Hypergraph const& ih) : ih(ih) {
+ void set_defaults() {
have_cfg=binarized=have_features=uniqed=false;
+ want_features=true;
+ }
+ HgCFG(Hypergraph const& ih) : ih(ih) {
+ set_defaults();
}
Hypergraph const& ih;
CFG cfg;
@@ -32,11 +36,14 @@ struct HgCFG {
if (!have_cfg)
InitCFG(to);
else {
+ cfg.VisitRuleIds(*this);
have_cfg=false;
to.Clear();
- to.Swap(cfg);
+ swap(to,cfg);
}
}
+ void operator()(int ri) const {
+ }
CFG const& GetCFG() const {
assert(have_cfg);
return cfg;