diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-27 23:52:20 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-27 23:52:20 +0000 |
commit | 8ff664a63254c2e64dc296caffc3a0261e2473af (patch) | |
tree | b8b3d8ab07b6fc5228ff3104bc2339efbbf99d70 | |
parent | 1eab70e16f0e0d5531f3babfea2062c82f6362e1 (diff) |
still compiles
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@627 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | decoder/apply_fsa_models.cc | 63 | ||||
-rwxr-xr-x | decoder/cfg.h | 21 | ||||
-rwxr-xr-x | decoder/hg_cfg.h | 11 | ||||
-rw-r--r-- | utils/d_ary_heap.h | 2 | ||||
-rwxr-xr-x | utils/value_array.h | 90 |
5 files changed, 149 insertions, 38 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index 5493d29f..fd3e1289 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -24,6 +24,8 @@ #define DPFSA(x) x //prefix trie +#define DBUILDTRIE(x) x + #define PRINT_PREFIX 1 #if PRINT_PREFIX # define IF_PRINT_PREFIX(x) x @@ -146,6 +148,12 @@ struct PrefixTrieNode { return ret; } + unsigned size() const { + unsigned a=adj.size(); + unsigned e=edge_for.size(); + return a>e?a:e; + } + void print_back_str(std::ostream &o) const { BPs back=back_vec(); unsigned i=back.size(); @@ -201,6 +209,7 @@ public: m.clear(); for (int i=0;i<adj.size();++i) { PrefixTrieEdge const& e=adj[i]; + SHOWM2(DPFSA,"index_adj",i,e); m[e.w]=e; } } @@ -211,27 +220,49 @@ public: // assert(e.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p. NTHandle n=e.w; assert(n>=0); + SHOWM3(DPFSA,"index_lhs",i,e,n); v[n]=e.dest; } } + template <class PV> + void done_root(PV &v) { + done_building_r(); +// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it. + index_lhs(v); + } + // call only once. void done_building_r() { done_building(); for (int i=0;i<adj.size();++i) - adj[i].dest->done_building_r(); + if (adj[i].dest) // skip final edge + adj[i].dest->done_building_r(); } // for done_building; compute incremental (telescoped) edge p - PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type const& pair) const { - PrefixTrieEdge &e=const_cast<PrefixTrieEdge&>(pair.second); + PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const { + PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second); e.p=(e.dest->p)/p; return e; } // call only once. void done_building() { + SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1); +#if 0 adj.reinit_map(edge_for,*this); +#else + adj.reinit(edge_for.size()); + Adj::iterator o=adj.begin(); + for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) { + SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second); + PrefixTrieEdge &edge=i->second; + edge.p=(edge.dest->p)/p; + *o++=edge; +// (*this)(*i); + } +#endif // if (final) p_final/=p; std::sort(adj.begin(),adj.end()); //TODO: store adjacent differences on edges (compared to @@ -294,12 +325,22 @@ public: ~PrefixTrieNode() { destroy_children(); } + void print(std::ostream &o) const { + o << lhs << "->" << p; + o << ',' << size() << ','; + print_back_str(o); + } + PRINT_SELF(PrefixTrieNode) }; //Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT) // trie ends with final edge, which points to a per-lhs prefix node struct PrefixTrie { + void print(std::ostream &o) const { + o << cfgp << ' ' << root; + } + PRINT_SELF(PrefixTrie); CFG *cfgp; Rules const* rulesp; Rules const& rules() const { return *rulesp; } @@ -312,10 +353,9 @@ struct PrefixTrie { PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) { // cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r print_cfg=cfgp; + SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size()); cfg.VisitRuleIds(*this); - root.done_building_r(); - root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing? - root.index_lhs(lhs2); + root.done_root(lhs2); } void operator()(int ri) const { @@ -323,12 +363,15 @@ struct PrefixTrie { NTHandle lhs=r.lhs; best_t p=r.p; NodeP n=const_cast<PrefixTrieNode&>(root).build_lhs(lhs,p); + SHOWM3(DBUILDTRIE,"Prefixtrie rule id",ri,root,p); for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { + SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n); if (i==e) { n->set_final(lhs,p); break; } n=n->build(*i,p); + SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n); } // root.build(lhs,r.p)->build(r.rhs,r.p); } @@ -390,8 +433,7 @@ struct ItemKey { } } NTHandle lhs() const { return dot->lhs; } - typedef ItemKey self_type; - SELF_TYPE_PRINT + PRINT_SELF(ItemKey) }; inline ItemHash hash_value(ItemKey const& x) { return x.hash(); @@ -424,8 +466,7 @@ struct Item : ItemPrio,ItemKey { o<<" next="<<next; o<< ']'; } - typedef Item self_type; - SELF_TYPE_PRINT + PRINT_SELF(Item) }; struct GetItemKey { @@ -553,6 +594,8 @@ template <class F> void ApplyFsa<F>::ApplyEarley() { hgcfg.GiveCFG(cfg); + print_cfg=&cfg; + print_fsa=&fsa; Chart<F> chart(cfg,smeta,fsa); // don't need to uniq - option to do that already exists in cfg_options //TODO: diff --git a/decoder/cfg.h b/decoder/cfg.h index 79ae6f33..95cb5fd7 100755 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -1,6 +1,8 @@ #ifndef CDEC_CFG_H #define CDEC_CFG_H +#define DVISITRULEID(x) + // for now, debug means remembering and printing the TRule behind each CFG rule #ifndef CFG_DEBUG # define CFG_DEBUG 1 @@ -133,6 +135,10 @@ struct CFG { swap(f,o.f); IF_CFG_TRULE(swap(rule,o.rule);) } + friend inline void swap(Rule &a,Rule &b) { + a.Swap(b); + } + template<class V> void visit_rhs_nts(V &v) const { for (RHS::const_iterator i=rhs.begin(),e=rhs.end();i!=e;++i) { @@ -195,6 +201,9 @@ struct CFG { swap(ruleids,o.ruleids); swap(from,o.from); } + friend inline void swap(NT &a,NT &b) { + a.Swap(b); + } }; CFG() : hg_() { uninit=true; } @@ -294,14 +303,19 @@ struct CFG { // call after rules are indexed. template <class V> void VisitRuleIds(V &v) { - for (int i=0,e=nts.size();i<e;++i) - for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) + for (int i=0,e=nts.size();i<e;++i) { + SHOWM(DVISITRULEID,"VisitRuleIds nt",i); + for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) { + SHOWM2(DVISITRULEID,"VisitRuleIds",i,*j); v(*j); + } + } + } template <class V> void VisitRuleIds(V const& v) { for (int i=0,e=nts.size();i<e;++i) - for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) + for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) v(*j); } @@ -351,7 +365,6 @@ inline std::size_t hash_value(CFG::NT const& r) { return r.hash_impl(); } - inline void swap(CFG &a,CFG &b) { a.Swap(b); } diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h index ba936990..b90aca47 100755 --- a/decoder/hg_cfg.h +++ b/decoder/hg_cfg.h @@ -7,8 +7,12 @@ class Hypergraph; // in case you might want the CFG whether or not you apply FSA models: struct HgCFG { - HgCFG(Hypergraph const& ih) : ih(ih) { + void set_defaults() { have_cfg=binarized=have_features=uniqed=false; + want_features=true; + } + HgCFG(Hypergraph const& ih) : ih(ih) { + set_defaults(); } Hypergraph const& ih; CFG cfg; @@ -32,11 +36,14 @@ struct HgCFG { if (!have_cfg) InitCFG(to); else { + cfg.VisitRuleIds(*this); have_cfg=false; to.Clear(); - to.Swap(cfg); + swap(to,cfg); } } + void operator()(int ri) const { + } CFG const& GetCFG() const { assert(have_cfg); return cfg; diff --git a/utils/d_ary_heap.h b/utils/d_ary_heap.h index da99902c..49e040d8 100644 --- a/utils/d_ary_heap.h +++ b/utils/d_ary_heap.h @@ -130,7 +130,7 @@ Equal const& equal = Equal() ) : better(better), data(), distance(distance), - index_in_heap(index_in_heap) { + index_in_heap(index_in_heap),equal(equal) { data.reserve(container_reserve); } /* Implicit copy constructor */ diff --git a/utils/value_array.h b/utils/value_array.h index f1cc2a84..3a8b3292 100755 --- a/utils/value_array.h +++ b/utils/value_array.h @@ -3,6 +3,8 @@ //TODO: option for non-constructed version (type_traits pod?), option for small array optimization (if sz < N, store inline in union, see small_vector.h) +#define DBVALUEARRAY(x) x + #include <cstdlib> #include <algorithm> #include <new> @@ -83,6 +85,7 @@ public: protected: void destroy() { + if (!array) return; // it's cool that this destroys in reverse order of construction, but why bother? for (size_type i = sz; i != 0;) A::destroy(array + --i); @@ -92,7 +95,7 @@ protected: sz=0; } void alloc(size_type s) { - array=A::allocate(sz); + array = s==0 ? 0 : A::allocate(sz); sz=s; } @@ -103,6 +106,16 @@ protected: } } + void reinit_noconstruct(size_type s) { + destroy(); + realloc(s); + } + + template <class C,class F> + inline void init_map(C & c,F const& f) { + alloc(c.size()); + copy_construct_map(c.begin(),c.end(),array,f); + } template <class C,class F> inline void init_map(C const& c,F const& f) { alloc(c.size()); @@ -119,22 +132,44 @@ protected: alloc(std::distance(itr,end)); copy_construct(itr,end,array); } - inline void init(size_type s, const_reference t = T()) { + inline void fill(const_reference t) { + for (T *i=array,*e=array+sz;i!=e;++i) + new(i) T(t); + } + inline void fill() { + for (T *i=array,*e=array+sz;i!=e;++i) + new(i) T(); + } + + inline void init(size_type s) { + sz=s; + array=s ? A::allocate(s) : 0; + fill(); + } + inline void init(size_type s, const_reference t) { sz=s; array=s ? A::allocate(s) : 0; - for (size_type i = 0; i != sz; ++i) { A::construct(array + i,t); } + fill(t); } public: - explicit ValueArray(size_type s, const_reference t = T()) + ValueArray(size_type s, const_reference t) { init(s,t); } - void reinit(size_type s, const_reference t = T()) { - clear(); - init(s,t); + explicit ValueArray(size_type s) + { + init(s); + } + void reinit(size_type s, const_reference t) { + reinit_noconstruct(s); + fill(t); + } + void reinit(size_type s) { + reinit_noconstruct(s); + fill(); } - //copy any existing data like std::vector. not A::construct exception safe. try blah blah? + //copy any existing data like std::vector. not A::construct exception safe. try blah blah? swap? void resize(size_type s, const_reference t = T()) { pointer na=A::allocate(s); size_type nc=s<sz ? s : sz; @@ -150,20 +185,27 @@ public: template <class I> void reinit(I itr, I end) { - clear(); - init_range(itr,end); + reinit_noconstruct(std::distance(itr,end)); + copy_construct(itr,end,array); } template <class I,class F> - void reinit_map(I itr,I end,F const& map) { - clear(); - init_range_map(itr,end,map); + void reinit_map(I itr,I end,F const& f) { + reinit_noconstruct(std::distance(itr,end)); + copy_construct_map(itr,end,array,f); } + // warning: std::distance is likely slow on maps,lists (anything other than random access containers. so container version below using size() will be better template <class C,class F> - void reinit_map(C const& c,F const& map) { - clear(); - init_map(c,map); + void reinit_map(C const& c,F const& f) { + reinit_noconstruct(c.size()); + copy_construct_map(c.begin(),c.end(),array,f); + } + + template <class C,class F> + void reinit_map(C & c,F const& f) { + reinit_noconstruct(c.size()); + copy_construct_map(c.begin(),c.end(),array,f); } template <class I> @@ -260,16 +302,22 @@ public: private: - template <class I1, class I2> - void copy_construct(I1 itr, I1 end, I2 into) + template <class I1> + void copy_construct(I1 itr, I1 end, T *into) { for (; itr != end; ++itr, ++into) A::construct(into,*itr); } - template <class I1, class I2,class F> - void copy_construct_map(I1 itr, I1 end, I2 into,F const& f) + //TODO: valgrind doesn't think this works. + template <class I1,class F> + void copy_construct_map(I1 itr, I1 end, T *into,F const& f) { - for (; itr != end; ++itr, ++into) A::construct(into,f(*itr)); + while ( itr != end) { + DBVALUEARRAY(assert(into<array+sz)); + A::construct(into,f(*itr)); + ++itr;++into; + } + } //friend class boost::serialization::access; public: |