From 171027795ba3a01ba2ed82d7036610ac397e1fe8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 14 Oct 2011 11:51:12 +0100 Subject: remove FSA integration code. will have to be resurrected another day --- decoder/apply_fsa_models.cc | 798 -------------------------------------------- 1 file changed, 798 deletions(-) delete mode 100755 decoder/apply_fsa_models.cc (limited to 'decoder/apply_fsa_models.cc') diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc deleted file mode 100755 index 3e93cadd..00000000 --- a/decoder/apply_fsa_models.cc +++ /dev/null @@ -1,798 +0,0 @@ -//see apply_fsa_models.README for notes on the l2r earley fsa+cfg intersection -//implementation in this file (also some comments in this file) -#define SAFE_VALGRIND 1 - -#include "apply_fsa_models.h" -#include -#include -#include -#include - -#include "writer.h" -#include "hg.h" -#include "ff_fsa_dynamic.h" -#include "ff_from_fsa.h" -#include "feature_vector.h" -#include "stringlib.h" -#include "apply_models.h" -#include "cfg.h" -#include "hg_cfg.h" -#include "utoa.h" -#include "hash.h" -#include "value_array.h" -#include "d_ary_heap.h" -#include "agenda.h" -#include "show.h" -#include "string_to.h" - - -#define DFSA(x) x -//fsa earley chart - -#define DPFSA(x) x -//prefix trie - -#define DBUILDTRIE(x) - -#define PRINT_PREFIX 1 -#if PRINT_PREFIX -# define IF_PRINT_PREFIX(x) x -#else -# define IF_PRINT_PREFIX(x) -#endif -// keep backpointers in prefix trie so you can print a meaningful node id - -static const unsigned FSA_AGENDA_RESERVE=10; // TODO: increase to 1<<24 (16M) - -using namespace std; - -//impl details (not exported). flat namespace for my ease. - -typedef CFG::RHS RHS; -typedef CFG::BinRhs BinRhs; -typedef CFG::NTs NTs; -typedef CFG::NT NT; -typedef CFG::NTHandle NTHandle; -typedef CFG::Rules Rules; -typedef CFG::Rule Rule; -typedef CFG::RuleHandle RuleHandle; - -namespace { - -/* - -1) A -> x . * (trie) - -this is somewhat nice. cost pushed for best first, of course. similar benefit as left-branching binarization without the explicit predict/complete steps? - -vs. just - -2) * -> x . y - -here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete. of course, the prediction of items -> . x y can occur lazy best-first. - -vs. - -3) * -> x . * - -with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse. this is not a good option at all. - -I'm using option 1. -*/ - -// if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course. - -template -struct fsa_map_type { - typedef std::map type; // change to HASH_MAP ? -}; -//template typedef - and macro to make it less painful -#define FSA_MAP(k,v) fsa_map_type >::type - -struct PrefixTrieNode; -typedef PrefixTrieNode *NodeP; -typedef PrefixTrieNode const *NodePc; - -// for debugging prints only -struct TrieBackP { - WordID w; - NodePc from; - TrieBackP(WordID w=0,NodePc from=0) : w(w),from(from) { } -}; - -FsaFeatureFunction const* print_fsa=0; -CFG const* print_cfg=0; -inline ostream& print_cfg_rhs(std::ostream &o,WordID w,CFG const*pcfg=print_cfg) { - if (pcfg) - pcfg->print_rhs_name(o,w); - else - CFG::static_print_rhs_name(o,w); - return o; -} - -inline std::string nt_name(WordID n,CFG const*pcfg=print_cfg) { - if (pcfg) return pcfg->nt_name(n); - return CFG::static_nt_name(n); -} - -template -ostream& print_by_nt(std::ostream &o,V const& v,CFG const*pcfg=print_cfg,char const* header="\nNT -> X\n") { - o< "< -ostream& print_map_by_nt(std::ostream &o,V const& v,CFG const*pcfg=print_cfg,char const* header="\nNT -> X\n") { - o<first,pcfg) << " -> "<second<<"\n"; - } - return o; -} - -struct PrefixTrieEdge { - PrefixTrieEdge() - // : dest(0),w(TD::max_wordid) - {} - PrefixTrieEdge(WordID w,NodeP dest) - : dest(dest),w(w) - {} -// explicit PrefixTrieEdge(best_t p) : p(p),dest(0) { } - - best_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob. note: for final edge, set this. - //DPFSA() - // we can probably just store deltas, but for debugging remember the full p - // best_t delta; // - NodeP dest; - bool is_final() const { return dest==0; } - best_t p_dest() const; - WordID w; // for root and and is_final(), this will be (negated) NTHandle. - - // for sorting most probable first in adj; actually >(p) - inline bool operator <(PrefixTrieEdge const& o) const { - return o.p"< BPs; - void back_vec(BPs &ns) const { - IF_PRINT_PREFIX(if(backp.from) { ns.push_back(backp); backp.from->back_vec(ns); }) - } - - BPs back_vec() const { - BPs ret; - back_vec(ret); - return ret; - } - - unsigned size() const { - unsigned a=adj.size(); - unsigned e=edge_for.size(); - return a>e?a:e; - } - - void print_back_str(std::ostream &o) const { - BPs back=back_vec(); - unsigned i=back.size(); - if (!i) { - o<<"PrefixTrieNode@"<<(uintptr_t)this; - return; - } - bool first=true; - while (i--<=0) { - if (!first) o<<','; - first=false; - WordID w=back[i].w; - print_cfg_rhs(o,w); - } - } - std::string back_str() const { - std::ostringstream o; - print_back_str(o); - return o.str(); - } - -// best_t p_final; // additional prob beyond what we already paid. while building, this is the total prob -// instead of storing final, we'll say that an edge with a NULL dest is a final edge. this way it gets sorted into the list of adj. - - // instead of completed map, we have trie start w/ lhs. - NTHandle lhs; // nonneg. - instead of storing this in Item. - IF_PRINT_PREFIX(BP backp;) - - enum { ROOT=-1 }; - explicit PrefixTrieNode(NTHandle lhs=ROOT,best_t p=1) : p(p),lhs(lhs),IF_PRINT_PREFIX(backp()) { - //final=false; - } - bool is_root() const { return lhs==ROOT; } // means adj are the nonneg lhs indices, and we have the index edge_for still available - - // outgoing edges will be ordered highest p to worst p - - typedef FSA_MAP(WordID,PrefixTrieEdge) PrefixTrieEdgeFor; -public: - PrefixTrieEdgeFor edge_for; //TODO: move builder elsewhere? then need 2nd hash or edge include pointer to builder. just clear this later - bool have_adj() const { - return adj.size()>=edge_for.size(); - } - bool no_adj() const { - return adj.empty(); - } - - void index_adj() { - index_adj(edge_for); - } - template - void index_adj(M &m) { - assert(have_adj()); - m.clear(); - for (int i=0;i - void index_lhs(PV &v) { - for (int i=0,e=adj.size();i!=e;++i) { - PrefixTrieEdge const& edge=adj[i]; - // assert(edge.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p. - NTHandle n=-edge.w; - assert(n>=0); -// SHOWM3(DPFSA,"index_lhs",i,edge,n); - v[n]=edge.dest; - } - } - - template - void done_root(PV &v) { - assert(is_root()); - SHOWM1(DBUILDTRIE,"done_root",OSTRF1(print_map_by_nt,edge_for)); - done_building_r(); //sets adj - SHOWM1(DBUILDTRIE,"done_root",OSTRF1(print_by_nt,adj)); -// SHOWM1(DBUILDTRIE,done_root,adj); -// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it. also we stopped clearing edge_for. - index_lhs(v); // uses adj - } - - // call only once. - void done_building_r() { - done_building(); - for (int i=0;idone_building_r(); - } - - // for done_building; compute incremental (telescoped) edge p - PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const { - PrefixTrieEdge &e=pair.second;//const_cast(pair.second); - e.p=e.p_dest()/p; - return e; - } - - // call only once. - void done_building() { - SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1); -#if 1 - adj.reinit_map(edge_for,*this); -#else - adj.reinit(edge_for.size()); - SHOWM3(DBUILDTRIE,"done_building_reinit",edge_for.size(),adj.size(),2); - Adj::iterator o=adj.begin(); - for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) { - SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second); - PrefixTrieEdge &edge=i->second; - edge.p=(edge.dest->p)/p; - *o++=edge; -// (*this)(*i); - } -#endif - SHOWM1(DBUILDTRIE,"done building adj",prange(adj.begin(),adj.end(),true)); - assert(adj.size()==edge_for.size()); -// if (final) p_final/=p; - std::sort(adj.begin(),adj.end()); - //TODO: store adjacent differences on edges (compared to - } - - typedef ValueArray Adj; -// typedef vector Adj; - Adj adj; - - typedef WordID W; - - // let's compute p_min so that every rule reachable from the created node has p at least this low. - NodeP improve_edge(PrefixTrieEdge const& e,best_t rulep) { - NodeP d=e.dest; - maybe_improve(d->p,rulep); - return d; - } - - inline NodeP build(W w,best_t rulep) { - return build(lhs,w,rulep); - } - inline NodeP build_lhs(NTHandle n,best_t rulep) { - return build(n,-n,rulep); - } - - NodeP build(NTHandle lhs_,W w,best_t rulep) { - PrefixTrieEdgeFor::iterator i=edge_for.find(w); - if (i!=edge_for.end()) - return improve_edge(i->second,rulep); - NodeP r=new PrefixTrieNode(lhs_,rulep); - IF_PRINT_PREFIX(r->backp=BP(w,this)); -// edge_for.insert(i,PrefixTrieEdgeFor::value_type(w,PrefixTrieEdge(w,r))); - add(edge_for,w,PrefixTrieEdge(w,r)); - SHOWM4(DBUILDTRIE,"built node",this,w,*r,r); - return r; - } - - void set_final(NTHandle lhs_,best_t pf) { - assert(no_adj()); -// final=true; - PrefixTrieEdge &e=edge_for[null_wordid]; - e.p=pf; - e.dest=0; - e.w=lhs_; - maybe_improve(p,pf); - } - -private: - void destroy_children() { - assert(adj.size()>=edge_for.size()); - for (int i=0,e=adj.size();i" << p; - o << ',' << size() << ','; - print_back_str(o); - } - PRINT_SELF(PrefixTrieNode) -}; - -inline best_t PrefixTrieEdge::p_dest() const { - return dest ? dest->p : p; // for final edge, p was set (no sentinel node) -} - - -//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT) -// trie ends with final edge, which points to a per-lhs prefix node -struct PrefixTrie { - void print(std::ostream &o) const { - o << cfgp << ' ' << root; - } - PRINT_SELF(PrefixTrie); - CFG *cfgp; - Rules const* rulesp; - Rules const& rules() const { return *rulesp; } - CFG const& cfg() const { return *cfgp; } - PrefixTrieNode root; - typedef std::vector LhsToTrie; // will have to check lhs2[lhs].p for best cost of some rule with that lhs, then use edge deltas after? they're just caching a very cheap computation, really - LhsToTrie lhs2; // no reason to use a map or hash table; every NT in the CFG will have some rule rhses. lhs_to_trie[i]=root.edge_for[i], i.e. we still have a root trie node conceptually, we just access through this since it's faster. - typedef LhsToTrie LhsToComplete; - LhsToComplete lhs2complete; // the sentinel "we're completing" node (dot at end) for that lhs. special case of suffix-set=same trie minimization (aka right branching binarization) // these will be used to track kbest completions, along with a l state (r state will be in the list) - PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) { -// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r - print_cfg=cfgp; - SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size()); - cfg.VisitRuleIds(*this); - root.done_root(lhs2); - SHOWM3(DBUILDTRIE,"done w/ PrefixTrie: ",root,root.adj.size(),lhs2.size()); - DBUILDTRIE(print_by_nt(cerr,lhs2,cfgp)); - SHOWM1(DBUILDTRIE,"lhs2",OSTRF2(print_by_nt,lhs2,cfgp)); - } - - void operator()(int ri) { - Rule const& r=rules()[ri]; - NTHandle lhs=r.lhs; - best_t p=r.p; -// NodeP n=const_cast(root).build_lhs(lhs,p); - NodeP n=root.build_lhs(lhs,p); - SHOWM4(DBUILDTRIE,"Prefixtrie rule id, root",ri,root,p,*n); - for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { - SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n); - if (i==e) { - n->set_final(lhs,p); - break; - } - n=n->build(*i,p); - SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n); - } -// root.build(lhs,r.p)->build(r.rhs,r.p); - } - inline NodeP lhs2_ex(NTHandle n) const { - NodeP r=lhs2[n]; - if (!r) throw std::runtime_error("PrefixTrie: no CFG rule w/ lhs "+cfgp->nt_name(n)); - return r; - } -private: - PrefixTrie(PrefixTrie const& o); -}; - - - -typedef std::size_t ItemHash; - - -struct ItemKey { - explicit ItemKey(NodeP start,Bytes const& start_state) : dot(start),q(start_state),r(start_state) { } - explicit ItemKey(NodeP dot) : dot(dot) { } - NodeP dot; // dot is a function of the stuff already recognized, and gives a set of suffixes y to complete to finish a rhs for lhs() -> dot y. for a lhs A -> . *, this will point to lh2[A] - Bytes q,r; // (q->r are the fsa states; if r is empty it means - bool operator==(ItemKey const& o) const { - return dot==o.dot && q==o.q && r==o.r; - } - inline ItemHash hash() const { - ItemHash h=GOLDEN_MEAN_FRACTION*(ItemHash)(dot-NULL); // i.e. lower order bits of ptr are nonrandom - using namespace boost; - hash_combine(h,q); - hash_combine(h,r); - return h; - } - template - void print(O &o) const { - o<<"lhs="<print_back_str(o); - if (print_fsa) { - o<<'/'; - print_fsa->print_state(o,&q[0]); - o<<"->"; - print_fsa->print_state(o,&r[0]); - } - } - NTHandle lhs() const { return dot->lhs; } - PRINT_SELF(ItemKey) -}; -inline ItemHash hash_value(ItemKey const& x) { - return x.hash(); -} -ItemKey null_item((PrefixTrieNode*)0); - -struct Item; -typedef Item *ItemP; - -/* we use a single type of item so it can live in a single best-first queue. we hold them by pointer so they can have mutable state, e.g. priority/location, but also lists of predictions and kbest completions (i.e. completions[L,r] = L -> * (r,s), by 1best for each possible s. we may discover more s later. we could use different subtypes since we hold by pointer, but for now everything will be packed as variants of Item */ -#undef INIT_LOCATION -#if D_ARY_TRACK_OUT_OF_HEAP -# define INIT_LOCATION , location(D_ARY_HEAP_NULL_INDEX) -#elif !defined(NDEBUG) || SAFE_VALGRIND - // avoid spurious valgrind warning - FIXME: still complains??? -# define INIT_LOCATION , location() -#else -# define INIT_LOCATION -#endif - -// these should go in a global best-first queue -struct ItemPrio { - // NOTE: sum = viterbi (max) - ItemPrio() : priority(init_0()),inner(init_0()) { } - explicit ItemPrio(best_t priority) : priority(priority),inner(init_0()) { } - best_t priority; // includes inner prob. (forward) - /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all - constrained paths of length i that end in state X[k]->x.y*/ - best_t inner; - /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all - paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */ - template - void print(O &o) const { - o<=0; - } - explicit Item(FFState const& state,NodeP dot,best_t prio,int next=0) : ItemPrio(prio),ItemKey(dot,state),trienext(next),from(0) - INIT_LOCATION - { -// t=ADJ; -// if (dot->adj.size()) - dot->p_delta(next,priority); -// SHOWM1(DFSA,"Item(state,dot,prio)",prio); - } - typedef std::queue Predicted; -// Predicted predicted; // this is empty, unless this is a predicted L -> .asdf item, or a to-complete L -> asdf . - int trienext; // index of dot->adj to complete (if dest==0), or predict (if NT), or scan (if word). note: we could store pointer inside adj since it and trie are @ fixed addrs. less pointer arith, more space. - ItemP from; //backpointer - 0 for L -> . asdf for the rest; L -> a .sdf, it's the L -> .asdf item. - ItemP predicted_from() const { - ItemP p=(ItemP)this; - while(p->from) p=p->from; - return p; - } - template - void print(O &o) const { - o<< '['; - o< -struct ApplyFsa { - ApplyFsa(HgCFG &i, - const SentenceMetadata& smeta, - const FsaFeatureFunction& fsa, - DenseWeightVector const& weights, - ApplyFsaBy const& by, - Hypergraph* oh - ) - :hgcfg(i),smeta(smeta),fsa(fsa),weights(weights),by(by),oh(oh) - { - stateless=!fsa.state_bytes(); - } - void Compute() { - if (by.IsBottomUp() || stateless) - ApplyBottomUp(); - else - ApplyEarley(); - } - void ApplyBottomUp(); - void ApplyEarley(); - CFG const& GetCFG(); -private: - CFG cfg; - HgCFG &hgcfg; - SentenceMetadata const& smeta; - FsaFF const& fsa; -// WeightVector weight_vector; - DenseWeightVector weights; - ApplyFsaBy by; - Hypergraph* oh; - std::string cfg_out; - bool stateless; -}; - -template -void ApplyFsa::ApplyBottomUp() -{ - assert(by.IsBottomUp()); - FeatureFunctionFromFsa buff(&fsa); - buff.Init(); // mandatory to call this (normally factory would do it) - vector ffs(1,&buff); - ModelSet models(weights, ffs); - IntersectionConfiguration i(stateless ? BU_FULL : by.BottomUpAlgorithm(),by.pop_limit); - ApplyModelSet(hgcfg.ih,smeta,models,i,oh); -} - -template -void ApplyFsa::ApplyEarley() -{ - hgcfg.GiveCFG(cfg); - print_cfg=&cfg; - print_fsa=&fsa; - Chart chart(cfg,smeta,fsa); - // don't need to uniq - option to do that already exists in cfg_options - //TODO: - chart.best_first(); - *oh=hgcfg.ih; -} - - -void ApplyFsaModels(HgCFG &i, - const SentenceMetadata& smeta, - const FsaFeatureFunction& fsa, - DenseWeightVector const& weight_vector, - ApplyFsaBy const& by, - Hypergraph* oh) -{ - ApplyFsa a(i,smeta,fsa,weight_vector,by,oh); - a.Compute(); -} - -/* -namespace { -char const* anames[]={ - "BU_CUBE", - "BU_FULL", - "EARLEY", - 0 -}; -} -*/ - -//TODO: named enum type in boost? - -std::string ApplyFsaBy::name() const { -// return anames[algorithm]; - return GetName(algorithm); -} - -std::string ApplyFsaBy::all_names() { - return FsaByNames(" "); - /* - std::ostringstream o; - for (int i=0;i=N_ALGORITHMS) - throw std::runtime_error("Unknown ApplyFsaBy type id: "+itos(i)+" - legal types: "+all_names()); -*/ - GetName(i); // checks validity - algorithm=i; -} - -int ApplyFsaBy::BottomUpAlgorithm() const { - assert(IsBottomUp()); - return algorithm==BU_CUBE ? - IntersectionConfiguration::CUBE - :IntersectionConfiguration::FULL; -} - -void ApplyFsaModels(Hypergraph const& ih, - const SentenceMetadata& smeta, - const FsaFeatureFunction& fsa, - DenseWeightVector const& weights, // pre: in is weighted by these (except with fsa featval=0 before this) - ApplyFsaBy const& cfg, - Hypergraph* out) -{ - HgCFG i(ih); - ApplyFsaModels(i,smeta,fsa,weights,cfg,out); -} -- cgit v1.2.3