summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rwxr-xr-xdecoder/apply_fsa_models.cc86
-rwxr-xr-xdecoder/cfg.h10
2 files changed, 72 insertions, 24 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index dddbddd9..4a928206 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -1,13 +1,16 @@
-#include <queue>
#include "apply_fsa_models.h"
+#include <stdexcept>
+#include <cassert>
+#include <queue>
+#include <stdint.h>
+
+#include "writer.h"
#include "hg.h"
#include "ff_fsa_dynamic.h"
#include "ff_from_fsa.h"
#include "feature_vector.h"
#include "stringlib.h"
#include "apply_models.h"
-#include <stdexcept>
-#include <cassert>
#include "cfg.h"
#include "hg_cfg.h"
#include "utoa.h"
@@ -16,7 +19,7 @@
#include "d_ary_heap.h"
#include "agenda.h"
#include "show.h"
-#include <stdint.h>
+#include "string_to.h"
#define DFSA(x) x
//fsa earley chart
@@ -24,7 +27,7 @@
#define DPFSA(x) x
//prefix trie
-#define DBUILDTRIE(x) x
+#define DBUILDTRIE(x)
#define PRINT_PREFIX 1
#if PRINT_PREFIX
@@ -101,23 +104,54 @@ struct TrieBackP {
FsaFeatureFunction const* print_fsa=0;
CFG const* print_cfg=0;
-inline void print_cfg_rhs(std::ostream &o,WordID w) {
- if (print_cfg)
- print_cfg->print_rhs_name(o,w);
+inline ostream& print_cfg_rhs(std::ostream &o,WordID w,CFG const*pcfg=print_cfg) {
+ if (pcfg)
+ pcfg->print_rhs_name(o,w);
else
CFG::static_print_rhs_name(o,w);
+ return o;
+}
+
+inline std::string nt_name(WordID n,CFG const*pcfg=print_cfg) {
+ if (pcfg) return pcfg->nt_name(n);
+ return CFG::static_nt_name(n);
+}
+
+template <class V>
+ostream& print_by_nt(std::ostream &o,V const& v,CFG const*pcfg=print_cfg,char const* header="\nNT -> X\n") {
+ o<<header;
+ for (int i=0;i<v.size();++i)
+ o << nt_name(i,pcfg) << " -> "<<v[i]<<"\n";
+ return o;
+}
+
+template <class V>
+ostream& print_map_by_nt(std::ostream &o,V const& v,CFG const*pcfg=print_cfg,char const* header="\nNT -> X\n") {
+ o<<header;
+ for (typename V::const_iterator i=v.begin(),e=v.end();i!=e;++i) {
+ print_cfg_rhs(o,i->first,pcfg) << " -> "<<i->second<<"\n";
+ }
+ return o;
}
+
struct PrefixTrieEdge {
-// PrefixTrieEdge() { }
+ PrefixTrieEdge()
+ // : dest(0),w(TD::max_wordid)
+ {}
+ PrefixTrieEdge(WordID w,NodeP dest)
+ : dest(dest),w(w)
+ {}
// explicit PrefixTrieEdge(best_t p) : p(p),dest(0) { }
- best_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob
+
+ best_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob. note: for final edge, set this.
//DPFSA()
// we can probably just store deltas, but for debugging remember the full p
// best_t delta; //
NodeP dest;
bool is_final() const { return dest==0; }
- WordID w; // for lhs, this will be nonneg NTHandle instead. // not set if is_final() // actually, set to lhs nt index
+ best_t p_dest() const;
+ WordID w; // for root and and is_final(), this will be (negated) NTHandle.
// for sorting most probable first in adj; actually >(p)
inline bool operator <(PrefixTrieEdge const& o) const {
@@ -218,7 +252,7 @@ public:
for (int i=0,e=adj.size();i!=e;++i) {
PrefixTrieEdge const& edge=adj[i];
// assert(edge.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p.
- NTHandle n=edge.w;
+ NTHandle n=-edge.w;
assert(n>=0);
SHOWM3(DPFSA,"index_lhs",i,edge,n);
v[n]=edge.dest;
@@ -228,7 +262,10 @@ public:
template <class PV>
void done_root(PV &v) {
assert(is_root());
+ SHOWM1(DBUILDTRIE,"done_root",OSTRF1(print_map_by_nt,edge_for));
done_building_r(); //sets adj
+ SHOWM1(DBUILDTRIE,"done_root",OSTRF1(print_by_nt,adj));
+// SHOWM1(DBUILDTRIE,done_root,adj);
// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it. also we stopped clearing edge_for.
index_lhs(v); // uses adj
}
@@ -244,7 +281,7 @@ public:
// for done_building; compute incremental (telescoped) edge p
PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const {
PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second);
- e.p=(e.dest->p)/p;
+ e.p=e.p_dest()/p;
return e;
}
@@ -265,6 +302,7 @@ public:
// (*this)(*i);
}
#endif
+ SHOWM1(DBUILDTRIE,"done building adj",prange(adj.begin(),adj.end(),true));
assert(adj.size()==edge_for.size());
// if (final) p_final/=p;
std::sort(adj.begin(),adj.end());
@@ -287,18 +325,18 @@ public:
inline NodeP build(W w,best_t rulep) {
return build(lhs,w,rulep);
}
- inline NodeP build_lhs(NTHandle w,best_t rulep) {
- return build(w,w,rulep);
+ inline NodeP build_lhs(NTHandle n,best_t rulep) {
+ return build(n,-n,rulep);
}
NodeP build(NTHandle lhs_,W w,best_t rulep) {
PrefixTrieEdgeFor::iterator i=edge_for.find(w);
if (i!=edge_for.end())
return improve_edge(i->second,rulep);
- PrefixTrieEdge &e=i->second;
NodeP r=new PrefixTrieNode(lhs_,rulep);
IF_PRINT_PREFIX(r->backp=BP(w,this));
- e.dest=r;
+// edge_for.insert(i,PrefixTrieEdgeFor::value_type(w,PrefixTrieEdge(w,r)));
+ add(edge_for,w,PrefixTrieEdge(w,r));
SHOWM4(DBUILDTRIE,"built node",this,w,*r,r);
return r;
}
@@ -306,7 +344,7 @@ public:
void set_final(NTHandle lhs_,best_t pf) {
assert(no_adj());
final=true;
- PrefixTrieEdge &e=edge_for[-1];
+ PrefixTrieEdge &e=edge_for[null_wordid];
e.p=pf;
e.dest=0;
e.w=lhs_;
@@ -335,6 +373,10 @@ public:
PRINT_SELF(PrefixTrieNode)
};
+inline best_t PrefixTrieEdge::p_dest() const {
+ return dest ? dest->p : p; // for final edge, p was set (no sentinel node)
+}
+
//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT)
// trie ends with final edge, which points to a per-lhs prefix node
@@ -358,7 +400,9 @@ struct PrefixTrie {
SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size());
cfg.VisitRuleIds(*this);
root.done_root(lhs2);
- SHOWM4(DBUILDTRIE,"done w/ PrefixTrie: ",root,root.adj.size(),lhs2.size(),lhs2[0]);
+ SHOWM3(DBUILDTRIE,"done w/ PrefixTrie: ",root,root.adj.size(),lhs2.size());
+ DBUILDTRIE(print_by_nt(cerr,lhs2,cfgp));
+ SHOWM1(DBUILDTRIE,"lhs2",OSTRF2(print_by_nt,lhs2,cfgp));
}
void operator()(int ri) {
@@ -526,12 +570,8 @@ struct Chart {
} else {
break;
}
-
}
-
-
}
-
}
Chart(CFG &cfg,SentenceMetadata const& smeta,FsaFF const& fsa,unsigned reserve=FSA_AGENDA_RESERVE)
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 95cb5fd7..8cb29bb9 100755
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -77,8 +77,16 @@ struct CFG {
if (w<=0) return nt_name(-w);
else return TD::Convert(w);
}
+ static void static_print_nt_name(std::ostream &o,NTHandle n) {
+ o<<'['<<n<<']';
+ }
+ static std::string static_nt_name(NTHandle w) {
+ std::ostringstream o;
+ static_print_nt_name(o,w);
+ return o.str();
+ }
static void static_print_rhs_name(std::ostream &o,WordID w) {
- if (w<=0) o<<'['<<-w<<']';
+ if (w<=0) static_print_nt_name(o,-w);
else o<<TD::Convert(w);
}
static std::string static_rhs_name(WordID w) {