diff options
| -rw-r--r-- | decoder/apply_models.cc | 2 | ||||
| -rwxr-xr-x | decoder/ff_from_fsa.h | 5 | ||||
| -rwxr-xr-x | decoder/ff_fsa.h | 11 | ||||
| -rw-r--r-- | decoder/hg.h | 55 | ||||
| -rwxr-xr-x | decoder/oracle_bleu.h | 2 | 
5 files changed, 59 insertions, 16 deletions
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 11d43e93..77d35c92 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -336,7 +336,7 @@ struct NoPruningRescorer {        Hypergraph::TailNodeVector tail(arity);        for (int i = 0; i < arity; ++i)          tail[i] = nodemap[in_edge.tail_nodes_[i]][tail_iter[i]]; -      Hypergraph::Edge* new_edge = out.AddEdge(in_edge.rule_, tail); +      Hypergraph::Edge* new_edge = out.AddEdge(in_edge, tail);        new_edge->feature_values_ = in_edge.feature_values_;        new_edge->i_ = in_edge.i_;        new_edge->j_ = in_edge.j_; diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index adb704de..d32e90df 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -5,9 +5,8 @@  #define FSA_FF_DEBUG  #ifdef FSA_FF_DEBUG -# define FSAFFDBG(e,x) do { if (debug) { FSADBGae(e,x) }  } while(0) -# define FSAFFDBGnl(e) do { if (debug) { std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0) - +# define FSAFFDBG(e,x) FSADBGif(debug,e,x) +# define FSAFFDBGnl(e) FSADBGif_nl(debug,e)  #else  # define FSAFFDBG(e,x)  # define FSAFFDBGnl(e) diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index f48fac60..93e3bd5e 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -6,6 +6,7 @@    state is some fixed width byte array.  could actually be a void *, WordID sequence, whatever. +  TODO: fsa feature aggregator that presents itself as a single fsa; benefit: when wrapped in ff_from_fsa, only one set of left words is stored.  */  //SEE ALSO: ff_fsa_dynamic.h, ff_from_fsa.h @@ -13,12 +14,14 @@  //TODO: decide whether to use init_features / add_value vs. summing elsewhere + set_value once (or inefficient for from_fsa: sum distinct feature_vectors.  but L->R if we only scan 1 word at a time, that's fine  //#define FSA_DEBUG - -# define FSADBGae(e,x) std::cerr << x; INFO_EDGE(e,x); +#define FSA_DEBUG_CERR 0 +#define FSA_DEBUG_DEBUG 0 +# define FSADBGif(i,e,x) do { if (i) { if (FSA_DEBUG_CERR){std::cerr<<x;}  INFO_EDGE(e,x); if (FSA_DEBUG_DEBUG){std::cerr<<"FSADBGif edge.info "<<&e<<" = "<<e.info()<<std::endl;}} } while(0) +# define FSADBGif_nl(i,e) do { if (i) { if (FSA_DEBUG_CERR) std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0)  #ifdef FSA_DEBUG  # include <iostream> -# define FSADBG(e,x) do { if (d().debug()) { FSADBGae(e,x) } } while(0) -# define FSADBGnl(e) do { if (d().debug) { std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0) +# define FSADBG(e,x) FSADBGif(d().debug(),e,x) +# define FSADBGnl(e) FSADBGif_nl(d().debug(),e,x)  #else  # define FSADBG(e,x)  # define FSADBGnl(e) diff --git a/decoder/hg.h b/decoder/hg.h index 10a24910..95a6525a 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -70,6 +70,9 @@ public:    // product of the weight vector and the feature values)    struct Edge {      Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {} +    Edge(int id,Edge const& copy_add_from) : id_(id) { copy_add(copy_add_from); } +    Edge(int id,Edge const& copy_add_from,TailNodeVector const& tail) +      : tail_nodes_(tail),id_(id) { copy_add(copy_add_from); }      inline int Arity() const { return tail_nodes_.size(); }      int head_node_;               // refers to a position in nodes_      TailNodeVector tail_nodes_;   // contents refer to positions in nodes_ @@ -91,15 +94,18 @@ public:      short int prev_i_;      short int prev_j_; -    void copy_fixed(Edge const& o) { +    void copy_add(Edge const& o) {        rule_=o.rule_;        feature_values_ = o.feature_values_; -      edge_prob_ = o.edge_prob_;        i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;  #if USE_INFO_EDGE -      info_.str(o.info_.str()); +      set_info(o.info_.str());  #endif      } +    void copy_fixed(Edge const& o) { +      copy_add(o); +      edge_prob_ = o.edge_prob_; +    }      void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {        copy_fixed(o);        head_node_=n2[o.head_node_]; @@ -109,16 +115,23 @@ public:  #if USE_INFO_EDGE      std::ostringstream info_; - -    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) {  } +    void set_info(std::string const& s) { +      info_.str(s); +      info_.seekp(0,std::ios_base::end); +    } +    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) { +//      info_.seekp(0,std::ios_base::end); + }      void operator=(Edge const& o) { -      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;  info_.str(o.info_.str()); +      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; +      set_info(o.info_.str());      }      std::string info() const { return info_.str(); }      void reset_info() { info_.str(""); info_.clear(); }  #else      std::string info() const { return std::string(); }      void reset_info() {  } +    void set_info(std::string const& s) {  }  #endif      void show(std::ostream &o,unsigned mask=SPAN|RULE) const {        o<<'{'; @@ -215,12 +228,40 @@ public:      if (e) edges_.reserve(e);    } +private: +  void index_tails(Edge const& edge) { +    for (int i = 0; i < edge.tail_nodes_.size(); ++i) +      nodes_[edge.tail_nodes_[i]].out_edges_.push_back(edge.id_); +  } +public: +  // the below AddEdge all are used mostly for apply_models scoring and so do not set prob_ + +  // tails are already set, copy_add members are already set. +  Edge* AddEdge(Edge const& nedge) { +    int eid=edges_.size(); +    edges_.push_back(nedge); +    Edge* edge = &edges_.back(); +    edge->id_ = eid; +    index_tails(*edge); +    return edge; +  } + +  Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) { +    edges_.push_back(Edge(edges_.size(),in_edge)); +    Edge* edge = &edges_.back(); +    edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back.  perhaps optimized it's the same. +    index_tails(*edge); +    return edge; +  } + +  // oldest method in use - requires much manual assignment from source edge:    Edge* AddEdge(const TRulePtr& rule, const TailNodeVector& tail) { +    int eid=edges_.size();      edges_.push_back(Edge());      Edge* edge = &edges_.back();      edge->rule_ = rule;      edge->tail_nodes_ = tail; -    edge->id_ = edges_.size() - 1; +    edge->id_ = eid;      for (int i = 0; i < edge->tail_nodes_.size(); ++i)        nodes_[edge->tail_nodes_[i]].out_edges_.push_back(edge->id_);      return edge; diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index fbb681e0..56ff8b72 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -281,7 +281,7 @@ struct OracleBleu {    void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) {      WriteFile ko(kbest_out_filename_); -    std::cerr << "Output kbest to " << kbest_out_filename_; +    std::cerr << "Output kbest to " << kbest_out_filename_<<std::endl;      if (unique)        kbest<KBest::NoFilter>(sent_id,forest,k,ko.get(),std::cerr);  | 
