diff options
Diffstat (limited to 'decoder')
| -rwxr-xr-x | decoder/apply_fsa_models.cc | 63 | ||||
| -rwxr-xr-x | decoder/cfg.h | 21 | ||||
| -rwxr-xr-x | decoder/hg_cfg.h | 11 | 
3 files changed, 79 insertions, 16 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index 5493d29f..fd3e1289 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -24,6 +24,8 @@  #define DPFSA(x) x  //prefix trie +#define DBUILDTRIE(x) x +  #define PRINT_PREFIX 1  #if PRINT_PREFIX  # define IF_PRINT_PREFIX(x) x @@ -146,6 +148,12 @@ struct PrefixTrieNode {      return ret;    } +  unsigned size() const { +    unsigned a=adj.size(); +    unsigned e=edge_for.size(); +    return a>e?a:e; +  } +    void print_back_str(std::ostream &o) const {      BPs back=back_vec();      unsigned i=back.size(); @@ -201,6 +209,7 @@ public:      m.clear();      for (int i=0;i<adj.size();++i) {        PrefixTrieEdge const& e=adj[i]; +      SHOWM2(DPFSA,"index_adj",i,e);        m[e.w]=e;      }    } @@ -211,27 +220,49 @@ public:        // assert(e.p.is_1());  // actually, after done_building, e will have telescoped dest->p/p.        NTHandle n=e.w;        assert(n>=0); +      SHOWM3(DPFSA,"index_lhs",i,e,n);        v[n]=e.dest;      }    } +  template <class PV> +  void done_root(PV &v) { +    done_building_r(); +//    index_adj(); // we want an index for the root node?.  don't think so - index_lhs handles it. +    index_lhs(v); +  } +    // call only once.    void done_building_r() {      done_building();      for (int i=0;i<adj.size();++i) -      adj[i].dest->done_building_r(); +      if (adj[i].dest) // skip final edge +        adj[i].dest->done_building_r();    }    // for done_building; compute incremental (telescoped) edge p -  PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type const& pair) const { -    PrefixTrieEdge &e=const_cast<PrefixTrieEdge&>(pair.second); +  PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const { +    PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second);      e.p=(e.dest->p)/p;      return e;    }    // call only once.    void done_building() { +    SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1); +#if 0      adj.reinit_map(edge_for,*this); +#else +    adj.reinit(edge_for.size()); +    Adj::iterator o=adj.begin(); +    for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) { +      SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second); +      PrefixTrieEdge &edge=i->second; +      edge.p=(edge.dest->p)/p; +      *o++=edge; +//      (*this)(*i); +    } +#endif  //    if (final) p_final/=p;      std::sort(adj.begin(),adj.end());      //TODO: store adjacent differences on edges (compared to @@ -294,12 +325,22 @@ public:    ~PrefixTrieNode() {      destroy_children();    } +  void print(std::ostream &o) const { +    o << lhs << "->" << p; +    o << ',' << size() << ','; +    print_back_str(o); +  } +  PRINT_SELF(PrefixTrieNode)  };  //Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT)  // trie ends with final edge, which points to a per-lhs prefix node  struct PrefixTrie { +  void print(std::ostream &o) const { +    o << cfgp << ' ' << root; +  } +  PRINT_SELF(PrefixTrie);    CFG *cfgp;    Rules const* rulesp;    Rules const& rules() const { return *rulesp; } @@ -312,10 +353,9 @@ struct PrefixTrie {    PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) {  //    cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r      print_cfg=cfgp; +    SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size());      cfg.VisitRuleIds(*this); -    root.done_building_r(); -    root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing? -    root.index_lhs(lhs2); +    root.done_root(lhs2);    }    void operator()(int ri) const { @@ -323,12 +363,15 @@ struct PrefixTrie {      NTHandle lhs=r.lhs;      best_t p=r.p;      NodeP n=const_cast<PrefixTrieNode&>(root).build_lhs(lhs,p); +    SHOWM3(DBUILDTRIE,"Prefixtrie rule id",ri,root,p);      for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { +      SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n);        if (i==e) {          n->set_final(lhs,p);          break;        }        n=n->build(*i,p); +      SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n);      }  //    root.build(lhs,r.p)->build(r.rhs,r.p);    } @@ -390,8 +433,7 @@ struct ItemKey {      }    }    NTHandle lhs() const { return dot->lhs; } -  typedef ItemKey self_type; -  SELF_TYPE_PRINT +  PRINT_SELF(ItemKey)  };  inline ItemHash hash_value(ItemKey const& x) {    return x.hash(); @@ -424,8 +466,7 @@ struct Item : ItemPrio,ItemKey {      o<<" next="<<next;      o<< ']';    } -  typedef Item self_type; -  SELF_TYPE_PRINT +  PRINT_SELF(Item)  };  struct GetItemKey { @@ -553,6 +594,8 @@ template <class F>  void ApplyFsa<F>::ApplyEarley()  {    hgcfg.GiveCFG(cfg); +  print_cfg=&cfg; +  print_fsa=&fsa;    Chart<F> chart(cfg,smeta,fsa);    // don't need to uniq - option to do that already exists in cfg_options    //TODO: diff --git a/decoder/cfg.h b/decoder/cfg.h index 79ae6f33..95cb5fd7 100755 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -1,6 +1,8 @@  #ifndef CDEC_CFG_H  #define CDEC_CFG_H +#define DVISITRULEID(x) +  // for now, debug means remembering and printing the TRule behind each CFG rule  #ifndef CFG_DEBUG  # define CFG_DEBUG 1 @@ -133,6 +135,10 @@ struct CFG {        swap(f,o.f);        IF_CFG_TRULE(swap(rule,o.rule);)      } +    friend inline void swap(Rule &a,Rule &b) { +      a.Swap(b); +    } +      template<class V>      void visit_rhs_nts(V &v) const {        for (RHS::const_iterator i=rhs.begin(),e=rhs.end();i!=e;++i) { @@ -195,6 +201,9 @@ struct CFG {        swap(ruleids,o.ruleids);        swap(from,o.from);      } +    friend inline void swap(NT &a,NT &b) { +      a.Swap(b); +    }    };    CFG() : hg_() { uninit=true; } @@ -294,14 +303,19 @@ struct CFG {    // call after rules are indexed.    template <class V>    void VisitRuleIds(V &v) { -    for (int i=0,e=nts.size();i<e;++i) -      for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) +    for (int i=0,e=nts.size();i<e;++i) { +      SHOWM(DVISITRULEID,"VisitRuleIds nt",i); +      for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) { +        SHOWM2(DVISITRULEID,"VisitRuleIds",i,*j);          v(*j); +      } +    } +    }    template <class V>    void VisitRuleIds(V const& v) {      for (int i=0,e=nts.size();i<e;++i) -      for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) +      for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j)          v(*j);    } @@ -351,7 +365,6 @@ inline std::size_t hash_value(CFG::NT const& r) {    return r.hash_impl();  } -  inline void swap(CFG &a,CFG &b) {    a.Swap(b);  } diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h index ba936990..b90aca47 100755 --- a/decoder/hg_cfg.h +++ b/decoder/hg_cfg.h @@ -7,8 +7,12 @@ class Hypergraph;  // in case you might want the CFG whether or not you apply FSA models:  struct HgCFG { -  HgCFG(Hypergraph const& ih) : ih(ih) { +  void set_defaults() {      have_cfg=binarized=have_features=uniqed=false; +    want_features=true; +  } +  HgCFG(Hypergraph const& ih) : ih(ih) { +    set_defaults();    }    Hypergraph const& ih;    CFG cfg; @@ -32,11 +36,14 @@ struct HgCFG {      if (!have_cfg)        InitCFG(to);      else { +      cfg.VisitRuleIds(*this);        have_cfg=false;        to.Clear(); -      to.Swap(cfg); +      swap(to,cfg);      }    } +  void operator()(int ri) const { +  }    CFG const& GetCFG() const {      assert(have_cfg);      return cfg;  | 
