summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdecoder/apply_fsa_models.cc63
-rwxr-xr-xdecoder/cfg.h21
-rwxr-xr-xdecoder/hg_cfg.h11
-rw-r--r--utils/d_ary_heap.h2
-rwxr-xr-xutils/value_array.h90
5 files changed, 149 insertions, 38 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 5493d29f..fd3e1289 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -24,6 +24,8 @@
#define DPFSA(x) x
//prefix trie
+#define DBUILDTRIE(x) x
+
#define PRINT_PREFIX 1
#if PRINT_PREFIX
# define IF_PRINT_PREFIX(x) x
@@ -146,6 +148,12 @@ struct PrefixTrieNode {
return ret;
}
+ unsigned size() const {
+ unsigned a=adj.size();
+ unsigned e=edge_for.size();
+ return a>e?a:e;
+ }
+
void print_back_str(std::ostream &o) const {
BPs back=back_vec();
unsigned i=back.size();
@@ -201,6 +209,7 @@ public:
m.clear();
for (int i=0;i<adj.size();++i) {
PrefixTrieEdge const& e=adj[i];
+ SHOWM2(DPFSA,"index_adj",i,e);
m[e.w]=e;
}
}
@@ -211,27 +220,49 @@ public:
// assert(e.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p.
NTHandle n=e.w;
assert(n>=0);
+ SHOWM3(DPFSA,"index_lhs",i,e,n);
v[n]=e.dest;
}
}
+ template <class PV>
+ void done_root(PV &v) {
+ done_building_r();
+// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it.
+ index_lhs(v);
+ }
+
// call only once.
void done_building_r() {
done_building();
for (int i=0;i<adj.size();++i)
- adj[i].dest->done_building_r();
+ if (adj[i].dest) // skip final edge
+ adj[i].dest->done_building_r();
}
// for done_building; compute incremental (telescoped) edge p
- PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type const& pair) const {
- PrefixTrieEdge &e=const_cast<PrefixTrieEdge&>(pair.second);
+ PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const {
+ PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second);
e.p=(e.dest->p)/p;
return e;
}
// call only once.
void done_building() {
+ SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1);
+#if 0
adj.reinit_map(edge_for,*this);
+#else
+ adj.reinit(edge_for.size());
+ Adj::iterator o=adj.begin();
+ for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) {
+ SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second);
+ PrefixTrieEdge &edge=i->second;
+ edge.p=(edge.dest->p)/p;
+ *o++=edge;
+// (*this)(*i);
+ }
+#endif
// if (final) p_final/=p;
std::sort(adj.begin(),adj.end());
//TODO: store adjacent differences on edges (compared to
@@ -294,12 +325,22 @@ public:
~PrefixTrieNode() {
destroy_children();
}
+ void print(std::ostream &o) const {
+ o << lhs << "->" << p;
+ o << ',' << size() << ',';
+ print_back_str(o);
+ }
+ PRINT_SELF(PrefixTrieNode)
};
//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT)
// trie ends with final edge, which points to a per-lhs prefix node
struct PrefixTrie {
+ void print(std::ostream &o) const {
+ o << cfgp << ' ' << root;
+ }
+ PRINT_SELF(PrefixTrie);
CFG *cfgp;
Rules const* rulesp;
Rules const& rules() const { return *rulesp; }
@@ -312,10 +353,9 @@ struct PrefixTrie {
PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) {
// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r
print_cfg=cfgp;
+ SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size());
cfg.VisitRuleIds(*this);
- root.done_building_r();
- root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing?
- root.index_lhs(lhs2);
+ root.done_root(lhs2);
}
void operator()(int ri) const {
@@ -323,12 +363,15 @@ struct PrefixTrie {
NTHandle lhs=r.lhs;
best_t p=r.p;
NodeP n=const_cast<PrefixTrieNode&>(root).build_lhs(lhs,p);
+ SHOWM3(DBUILDTRIE,"Prefixtrie rule id",ri,root,p);
for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) {
+ SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n);
if (i==e) {
n->set_final(lhs,p);
break;
}
n=n->build(*i,p);
+ SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n);
}
// root.build(lhs,r.p)->build(r.rhs,r.p);
}
@@ -390,8 +433,7 @@ struct ItemKey {
}
}
NTHandle lhs() const { return dot->lhs; }
- typedef ItemKey self_type;
- SELF_TYPE_PRINT
+ PRINT_SELF(ItemKey)
};
inline ItemHash hash_value(ItemKey const& x) {
return x.hash();
@@ -424,8 +466,7 @@ struct Item : ItemPrio,ItemKey {
o<<" next="<<next;
o<< ']';
}
- typedef Item self_type;
- SELF_TYPE_PRINT
+ PRINT_SELF(Item)
};
struct GetItemKey {
@@ -553,6 +594,8 @@ template <class F>
void ApplyFsa<F>::ApplyEarley()
{
hgcfg.GiveCFG(cfg);
+ print_cfg=&cfg;
+ print_fsa=&fsa;
Chart<F> chart(cfg,smeta,fsa);
// don't need to uniq - option to do that already exists in cfg_options
//TODO:
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 79ae6f33..95cb5fd7 100755
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -1,6 +1,8 @@
#ifndef CDEC_CFG_H
#define CDEC_CFG_H
+#define DVISITRULEID(x)
+
// for now, debug means remembering and printing the TRule behind each CFG rule
#ifndef CFG_DEBUG
# define CFG_DEBUG 1
@@ -133,6 +135,10 @@ struct CFG {
swap(f,o.f);
IF_CFG_TRULE(swap(rule,o.rule);)
}
+ friend inline void swap(Rule &a,Rule &b) {
+ a.Swap(b);
+ }
+
template<class V>
void visit_rhs_nts(V &v) const {
for (RHS::const_iterator i=rhs.begin(),e=rhs.end();i!=e;++i) {
@@ -195,6 +201,9 @@ struct CFG {
swap(ruleids,o.ruleids);
swap(from,o.from);
}
+ friend inline void swap(NT &a,NT &b) {
+ a.Swap(b);
+ }
};
CFG() : hg_() { uninit=true; }
@@ -294,14 +303,19 @@ struct CFG {
// call after rules are indexed.
template <class V>
void VisitRuleIds(V &v) {
- for (int i=0,e=nts.size();i<e;++i)
- for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j)
+ for (int i=0,e=nts.size();i<e;++i) {
+ SHOWM(DVISITRULEID,"VisitRuleIds nt",i);
+ for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) {
+ SHOWM2(DVISITRULEID,"VisitRuleIds",i,*j);
v(*j);
+ }
+ }
+
}
template <class V>
void VisitRuleIds(V const& v) {
for (int i=0,e=nts.size();i<e;++i)
- for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j)
+ for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j)
v(*j);
}
@@ -351,7 +365,6 @@ inline std::size_t hash_value(CFG::NT const& r) {
return r.hash_impl();
}
-
inline void swap(CFG &a,CFG &b) {
a.Swap(b);
}
diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h
index ba936990..b90aca47 100755
--- a/decoder/hg_cfg.h
+++ b/decoder/hg_cfg.h
@@ -7,8 +7,12 @@ class Hypergraph;
// in case you might want the CFG whether or not you apply FSA models:
struct HgCFG {
- HgCFG(Hypergraph const& ih) : ih(ih) {
+ void set_defaults() {
have_cfg=binarized=have_features=uniqed=false;
+ want_features=true;
+ }
+ HgCFG(Hypergraph const& ih) : ih(ih) {
+ set_defaults();
}
Hypergraph const& ih;
CFG cfg;
@@ -32,11 +36,14 @@ struct HgCFG {
if (!have_cfg)
InitCFG(to);
else {
+ cfg.VisitRuleIds(*this);
have_cfg=false;
to.Clear();
- to.Swap(cfg);
+ swap(to,cfg);
}
}
+ void operator()(int ri) const {
+ }
CFG const& GetCFG() const {
assert(have_cfg);
return cfg;
diff --git a/utils/d_ary_heap.h b/utils/d_ary_heap.h
index da99902c..49e040d8 100644
--- a/utils/d_ary_heap.h
+++ b/utils/d_ary_heap.h
@@ -130,7 +130,7 @@
Equal const& equal = Equal()
)
: better(better), data(), distance(distance),
- index_in_heap(index_in_heap) {
+ index_in_heap(index_in_heap),equal(equal) {
data.reserve(container_reserve);
}
/* Implicit copy constructor */
diff --git a/utils/value_array.h b/utils/value_array.h
index f1cc2a84..3a8b3292 100755
--- a/utils/value_array.h
+++ b/utils/value_array.h
@@ -3,6 +3,8 @@
//TODO: option for non-constructed version (type_traits pod?), option for small array optimization (if sz < N, store inline in union, see small_vector.h)
+#define DBVALUEARRAY(x) x
+
#include <cstdlib>
#include <algorithm>
#include <new>
@@ -83,6 +85,7 @@ public:
protected:
void destroy()
{
+ if (!array) return;
// it's cool that this destroys in reverse order of construction, but why bother?
for (size_type i = sz; i != 0;)
A::destroy(array + --i);
@@ -92,7 +95,7 @@ protected:
sz=0;
}
void alloc(size_type s) {
- array=A::allocate(sz);
+ array = s==0 ? 0 : A::allocate(sz);
sz=s;
}
@@ -103,6 +106,16 @@ protected:
}
}
+ void reinit_noconstruct(size_type s) {
+ destroy();
+ realloc(s);
+ }
+
+ template <class C,class F>
+ inline void init_map(C & c,F const& f) {
+ alloc(c.size());
+ copy_construct_map(c.begin(),c.end(),array,f);
+ }
template <class C,class F>
inline void init_map(C const& c,F const& f) {
alloc(c.size());
@@ -119,22 +132,44 @@ protected:
alloc(std::distance(itr,end));
copy_construct(itr,end,array);
}
- inline void init(size_type s, const_reference t = T()) {
+ inline void fill(const_reference t) {
+ for (T *i=array,*e=array+sz;i!=e;++i)
+ new(i) T(t);
+ }
+ inline void fill() {
+ for (T *i=array,*e=array+sz;i!=e;++i)
+ new(i) T();
+ }
+
+ inline void init(size_type s) {
+ sz=s;
+ array=s ? A::allocate(s) : 0;
+ fill();
+ }
+ inline void init(size_type s, const_reference t) {
sz=s;
array=s ? A::allocate(s) : 0;
- for (size_type i = 0; i != sz; ++i) { A::construct(array + i,t); }
+ fill(t);
}
public:
- explicit ValueArray(size_type s, const_reference t = T())
+ ValueArray(size_type s, const_reference t)
{
init(s,t);
}
- void reinit(size_type s, const_reference t = T()) {
- clear();
- init(s,t);
+ explicit ValueArray(size_type s)
+ {
+ init(s);
+ }
+ void reinit(size_type s, const_reference t) {
+ reinit_noconstruct(s);
+ fill(t);
+ }
+ void reinit(size_type s) {
+ reinit_noconstruct(s);
+ fill();
}
- //copy any existing data like std::vector. not A::construct exception safe. try blah blah?
+ //copy any existing data like std::vector. not A::construct exception safe. try blah blah? swap?
void resize(size_type s, const_reference t = T()) {
pointer na=A::allocate(s);
size_type nc=s<sz ? s : sz;
@@ -150,20 +185,27 @@ public:
template <class I>
void reinit(I itr, I end) {
- clear();
- init_range(itr,end);
+ reinit_noconstruct(std::distance(itr,end));
+ copy_construct(itr,end,array);
}
template <class I,class F>
- void reinit_map(I itr,I end,F const& map) {
- clear();
- init_range_map(itr,end,map);
+ void reinit_map(I itr,I end,F const& f) {
+ reinit_noconstruct(std::distance(itr,end));
+ copy_construct_map(itr,end,array,f);
}
+
// warning: std::distance is likely slow on maps,lists (anything other than random access containers. so container version below using size() will be better
template <class C,class F>
- void reinit_map(C const& c,F const& map) {
- clear();
- init_map(c,map);
+ void reinit_map(C const& c,F const& f) {
+ reinit_noconstruct(c.size());
+ copy_construct_map(c.begin(),c.end(),array,f);
+ }
+
+ template <class C,class F>
+ void reinit_map(C & c,F const& f) {
+ reinit_noconstruct(c.size());
+ copy_construct_map(c.begin(),c.end(),array,f);
}
template <class I>
@@ -260,16 +302,22 @@ public:
private:
- template <class I1, class I2>
- void copy_construct(I1 itr, I1 end, I2 into)
+ template <class I1>
+ void copy_construct(I1 itr, I1 end, T *into)
{
for (; itr != end; ++itr, ++into) A::construct(into,*itr);
}
- template <class I1, class I2,class F>
- void copy_construct_map(I1 itr, I1 end, I2 into,F const& f)
+ //TODO: valgrind doesn't think this works.
+ template <class I1,class F>
+ void copy_construct_map(I1 itr, I1 end, T *into,F const& f)
{
- for (; itr != end; ++itr, ++into) A::construct(into,f(*itr));
+ while ( itr != end) {
+ DBVALUEARRAY(assert(into<array+sz));
+ A::construct(into,f(*itr));
+ ++itr;++into;
+ }
+
}
//friend class boost::serialization::access;
public: