diff options
Diffstat (limited to 'decoder')
71 files changed, 1017 insertions, 812 deletions
| diff --git a/decoder/Jamfile b/decoder/Jamfile deleted file mode 100644 index 06c5bfda..00000000 --- a/decoder/Jamfile +++ /dev/null @@ -1,78 +0,0 @@ -import testing ; -import lex ; -import option ; - -if [ option.get "with-glc" ] { -  glc = ff_glc.cc string_util.cc feature-factory.cc ; -} - -lib decoder :  -  forest_writer.cc -  maxtrans_blunsom.cc -  cdec_ff.cc -  cfg.cc -  dwarf.cc -  ff_dwarf.cc -  rule_lexer.ll -  fst_translator.cc -  csplit.cc -  translator.cc -  scfg_translator.cc -  hg.cc -  hg_io.cc -  decoder.cc -  hg_intersect.cc -  hg_sampler.cc -  factored_lexicon_helper.cc -  viterbi.cc -  lattice.cc -  aligner.cc -  apply_models.cc -  earley_composer.cc -  phrasetable_fst.cc -  trule.cc -  ff.cc -  ff_rules.cc -  ff_wordset.cc -  ff_context.cc -  ff_charset.cc -  ff_lm.cc -  ff_klm.cc -  ff_ngrams.cc -  ff_spans.cc -  ff_ruleshape.cc -  ff_wordalign.cc -  ff_csplit.cc -  ff_tagger.cc -  ff_source_syntax.cc -  ff_bleu.cc -  ff_factory.cc -  lexalign.cc -  lextrans.cc -  tagger.cc -  bottom_up_parser.cc -  phrasebased_translator.cc -  JSON_parser.c -  json_parse.cc -  grammar.cc -  $(glc) -  ..//utils -  ..//mteval -  ../klm/lm//kenlm -  ..//boost_program_options -  : <include>. -  : : -  <library>..//utils -  <library>..//mteval -  <library>../klm/lm//kenlm -  <library>..//boost_program_options -  <include>. -  ; - -exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; - -all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : <testing.arg>$(TOP)/decoder/test_data ; - -install legacy : cdec -  : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ; - diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 0a792549..f8f427d3 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -17,7 +17,7 @@ trule_test_SOURCES = trule_test.cc  trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz  cdec_SOURCES = cdec.cc -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz  AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm @@ -33,6 +33,7 @@ libcdec_a_SOURCES = \    cfg.cc \    dwarf.cc \    ff_dwarf.cc \ +  ff_external.cc \    rule_lexer.cc \    fst_translator.cc \    csplit.cc \ @@ -44,6 +45,7 @@ libcdec_a_SOURCES = \    hg_remove_eps.cc \    decoder.cc \    hg_intersect.cc \ +  hg_union.cc \    hg_sampler.cc \    factored_lexicon_helper.cc \    viterbi.cc \ @@ -54,6 +56,8 @@ libcdec_a_SOURCES = \    phrasetable_fst.cc \    trule.cc \    ff.cc \ +  ffset.cc \ +  ff_basic.cc \    ff_rules.cc \    ff_wordset.cc \    ff_context.cc \ @@ -69,6 +73,7 @@ libcdec_a_SOURCES = \    ff_source_syntax.cc \    ff_bleu.cc \    ff_factory.cc \ +  incremental.cc \    lexalign.cc \    lextrans.cc \    tagger.cc \ diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 9ba59d1b..330de9e2 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -16,6 +16,7 @@  #include "verbose.h"  #include "hg.h"  #include "ff.h" +#include "ffset.h"  #define NORMAL_CP 1  #define FAST_CP 2 diff --git a/decoder/cdec.cc b/decoder/cdec.cc index c671af57..cc3fcff1 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -4,6 +4,8 @@  #include "decoder.h"  #include "ff_register.h"  #include "verbose.h" +#include "timing_stats.h" +#include "util/usage.hh"  using namespace std; @@ -27,6 +29,7 @@ int main(int argc, char** argv) {      if (buf.empty()) continue;      decoder.Decode(buf);    } +  Timer::Summarize();  #ifdef CP_TIME      cerr << "Time required for Cube Pruning execution: "      << CpTime::Get() @@ -38,6 +41,7 @@ int main(int argc, char** argv) {        cout << FD::Convert(i) << endl;      }    } +  util::PrintUsage(std::cerr);    return 0;  } diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index d64bdada..3ab0f9f6 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -1,6 +1,7 @@  #include <boost/shared_ptr.hpp>  #include "ff.h" +#include "ff_basic.h"  #include "ff_context.h"  #include "ff_spans.h"  #include "ff_lm.h" @@ -18,6 +19,7 @@  #include "ff_charset.h"  #include "ff_wordset.h"  #include "ff_dwarf.h" +#include "ff_external.h"  #ifdef HAVE_GLC  #include <cdec/ff_glc.h> @@ -70,6 +72,7 @@ void register_feature_functions() {    ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);    ff_registry.Register("WordSet", new FFFactory<WordSet>);    ff_registry.Register("Dwarf", new FFFactory<Dwarf>); +  ff_registry.Register("External", new FFFactory<ExternalFeature>);  #ifdef HAVE_GLC    ff_registry.Register("ContextCRF", new FFFactory<Model1Features>);  #endif diff --git a/decoder/cfg.h b/decoder/cfg.h index 8cb29bb9..aeeacb83 100644 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -130,7 +130,7 @@ struct CFG {      int lhs; // index into nts      RHS rhs;      prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top) -    FeatureVector f; // may be empty, unless copy_features on Init +    SparseVector<double> f; // may be empty, unless copy_features on Init      IF_CFG_TRULE(TRulePtr rule;)      int size() const { // for stats only        return rhs.size(); diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index 2f40d483..d12da261 100644 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h @@ -100,7 +100,7 @@ struct CFGFormat {      }    } -  void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const { +  void print_features(std::ostream &o,prob_t p,SparseVector<double> const& fv=SparseVector<double>()) const {      bool logp=(logprob_feat && p!=prob_t::One());      if (features || logp) {        o << partsep; diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index b8f4cf11..316c6d16 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -25,9 +25,9 @@ struct CFGTest : public TestWithParam<HgW> {    Hypergraph hg;    CFG cfg;    CFGFormat form; -  FeatureVector weights; +  SparseVector<double> weights; -  static void JsonFN(Hypergraph &hg,CFG &cfg,FeatureVector &featw,std::string file +  static void JsonFN(Hypergraph &hg,CFG &cfg,SparseVector<double> &featw,std::string file                       ,std::string const& wts="Model_0 1 EgivenF 1 f1 1")    {      istringstream ws(wts); diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a6f7b1ce..b5f4b9b6 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -4,6 +4,7 @@  #include <boost/program_options.hpp>  #include <boost/program_options/variables_map.hpp>  #include <boost/make_shared.hpp> +#include <boost/scoped_ptr.hpp>  #include "program_options.h"  #include "stringlib.h" @@ -24,10 +25,12 @@  #include "hg.h"  #include "sentence_metadata.h"  #include "hg_intersect.h" +#include "hg_union.h"  #include "oracle_bleu.h"  #include "apply_models.h"  #include "ff.h" +#include "ffset.h"  #include "ff_factory.h"  #include "viterbi.h"  #include "kbest.h" @@ -37,6 +40,7 @@  #include "sampler.h"  #include "forest_writer.h" // TODO this section should probably be handled by an Observer +#include "incremental.h"  #include "hg_io.h"  #include "aligner.h" @@ -89,11 +93,6 @@ inline void ShowBanner() {    cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n";  } -inline void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) { -  cerr<<header<<": "; -  ms.show_features(cerr,cerr,conf.count("warn_0_weight")); -} -  inline string str(char const* name,po::variables_map const& conf) {    return conf[name].as<string>();  } @@ -131,7 +130,7 @@ inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose    }    boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param);    if (!pf) exit(1); -  int nbyte=pf->NumBytesContext(); +  int nbyte=pf->StateSize();    if (verbose_feature_functions && !SILENT)      cerr<<"State is "<<nbyte<<" bytes for "<<pre<<"feature "<<ffp<<endl;    return pf; @@ -327,6 +326,8 @@ struct DecoderImpl {    bool feature_expectations; // TODO Observer    bool output_training_vector; // TODO Observer    bool remove_intersected_rule_annotations; +  boost::scoped_ptr<IncrementalBase> incremental; +    static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {      for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it) @@ -414,6 +415,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream          ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")          ("show_cfg_search_space", "Show the search space as a CFG")          ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to") +        ("incremental_search", po::value<string>(), "Run lazy search with this language model file")          ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")          ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")          ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -641,8 +643,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream        prev_weights = rp.weight_vector;      }      rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs)); -    string ps = "Pass1 "; ps[4] += pass; -    if (!SILENT) show_models(conf,*rp.models,ps.c_str());    }    // show configuration of rescoring passes @@ -730,6 +730,10 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream    sent_id = -1;    acc_obj = 0; // accumulate objective    g_count = 0;    // number of gradient pieces computed + +  if (conf.count("incremental_search")) { +    incremental.reset(IncrementalBase::Load(conf["incremental_search"].as<string>().c_str(), CurrentWeightVector())); +  }  }  Decoder::Decoder(istream* cfg) { pimpl_.reset(new DecoderImpl(conf,0,0,cfg)); } @@ -831,6 +835,12 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {    if (conf.count("show_target_graph"))      HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest); +  if (conf.count("incremental_search")) { +    incremental->Search(pop_limit, forest); +    o->NotifyDecodingComplete(smeta); +    return true; +  } +    for (int pass = 0; pass < rescoring_passes.size(); ++pass) {      const RescoringPass& rp = rescoring_passes[pass];      const vector<weight_t>& cur_weights = *rp.weight_vector; @@ -870,13 +880,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {      if (rp.fid_summary) {        if (summary_feature_type == kEDGE_PROB) {          const prob_t z = forest.PushWeightsToGoal(1.0); -        if (!isfinite(log(z)) || isnan(log(z))) { +        if (!std::isfinite(log(z)) || std::isnan(log(z))) {            cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";          } else {            for (int i = 0; i < forest.edges_.size(); ++i) {              const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge                                                                                // head node by forest.PushWeightsToGoal -            if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) { +            if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) {                cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;                abort();              } @@ -888,7 +898,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {        } else if (summary_feature_type == kNODE_RISK) {          Hypergraph::EdgeProbs posts;          const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); -        if (!isfinite(log(z)) || isnan(log(z))) { +        if (!std::isfinite(log(z)) || std::isnan(log(z))) {            cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";          } else {            for (int i = 0; i < forest.nodes_.size(); ++i) { @@ -897,7 +907,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {              for (int j = 0; j < in_edges.size(); ++j)                node_post += (posts[in_edges[j]] / z);              const double log_np = log(node_post); -            if (!isfinite(log_np) || isnan(log_np)) { +            if (!std::isfinite(log_np) || std::isnan(log_np)) {                cerr << "got bad posterior prob for node " << i << endl;                abort();              } @@ -912,13 +922,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {        } else if (summary_feature_type == kEDGE_RISK) {          Hypergraph::EdgeProbs posts;          const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); -        if (!isfinite(log(z)) || isnan(log(z))) { +        if (!std::isfinite(log(z)) || std::isnan(log(z))) {            cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";          } else {            assert(posts.size() == forest.edges_.size());            for (int i = 0; i < posts.size(); ++i) {              const double log_np = log(posts[i] / z); -            if (!isfinite(log_np) || isnan(log_np)) { +            if (!std::isfinite(log_np) || std::isnan(log_np)) {                cerr << "got bad posterior prob for node " << i << endl;                abort();              } @@ -958,7 +968,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {    // Oracle Rescoring    if(get_oracle_forest) { -    assert(!"this is broken"); FeatureVector dummy; // = last_weights +    assert(!"this is broken"); SparseVector<double> dummy; // = last_weights      Oracle oc=oracle.ComputeOracle(smeta,&forest,dummy,10,conf["forest_output"].as<std::string>());      if (!SILENT) cerr << "  +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl;      if (!SILENT) cerr << "  +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; @@ -980,7 +990,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {          bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg);          if (!succeeded) abort();        } -      new_hg.Union(forest); +      HG::Union(forest, &new_hg);        bool succeeded = writer.Write(new_hg, false);        if (!succeeded) abort();      } else { @@ -1067,7 +1077,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {              bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg);              if (!succeeded) abort();            } -          new_hg.Union(forest); +          HG::Union(forest, &new_hg);            bool succeeded = writer.Write(new_hg, false);            if (!succeeded) abort();          } else { @@ -1089,7 +1099,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {            cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl;            exit(1);          } -        assert(!isnan(log_ref_z)); +        assert(!std::isnan(log_ref_z));          ref_exp -= full_exp;          acc_vec += ref_exp;          acc_obj += (log_z - log_ref_z); diff --git a/decoder/decoder.h b/decoder/decoder.h index bef2ff5e..79c7a602 100644 --- a/decoder/decoder.h +++ b/decoder/decoder.h @@ -24,7 +24,7 @@ private:  #endif  class SentenceMetadata; -struct Hypergraph; +class Hypergraph;  struct DecoderImpl;  struct DecoderObserver { diff --git a/decoder/exp_semiring.h b/decoder/exp_semiring.h index 111eaaf1..2a9034bb 100644 --- a/decoder/exp_semiring.h +++ b/decoder/exp_semiring.h @@ -59,7 +59,7 @@ struct PRWeightFunction {    explicit PRWeightFunction(const PWeightFunction& pwf = PWeightFunction(),                              const RWeightFunction& rwf = RWeightFunction()) :      pweight(pwf), rweight(rwf) {} -  PRPair<P,R> operator()(const Hypergraph::Edge& e) const { +  PRPair<P,R> operator()(const HG::Edge& e) const {      const P p = pweight(e);      const R r = rweight(e);      return PRPair<P,R>(p, r * p); diff --git a/decoder/ff.cc b/decoder/ff.cc index 557e0b5f..a6a035b5 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -1,9 +1,3 @@ -//TODO: non-sparse vector for all feature functions?  modelset applymodels keeps track of who has what features?  it's nice having FF that could generate a handful out of 10000 possible feats, though. - -//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly?  need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once - -#include "fast_lexical_cast.hpp" -#include <stdexcept>  #include "ff.h"  #include "tdict.h" @@ -16,8 +10,7 @@ FeatureFunction::~FeatureFunction() {}  void FeatureFunction::PrepareForInput(const SentenceMetadata&) {}  void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */, -                                             SparseVector<double>* /* features */) const { -} +                                             SparseVector<double>* /* features */) const {}  string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) {    string r=name; @@ -32,188 +25,14 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const&    return r;  } -Features FeatureFunction::single_feature(WordID feat) { -  return Features(1,feat); -} - -Features ModelSet::all_features(std::ostream *warn,bool warn0) { -  //return ::all_features(models_,weights_,warn,warn0); -} - -void show_features(Features const& ffs,DenseWeightVector const& weights_,std::ostream &out,std::ostream &warn,bool warn_zero_wt) { -  out << "Weight  Feature\n"; -  for (unsigned i=0;i<ffs.size();++i) { -    WordID fid=ffs[i]; -    string const& fname=FD::Convert(fid); -    double wt=weights_[fid]; -    if (warn_zero_wt && wt==0) -      warn<<"WARNING: "<<fname<<" has 0 weight."<<endl; -    out << wt << "  " << fname<<endl; -  } -} - -void ModelSet::show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt) -{ -//  ::show_features(all_features(),weights_,out,warn,warn_zero_wt); -  //show_all_features(models_,weights_,out,warn,warn_zero_wt,warn_zero_wt); -} - -// Hiero and Joshua use log_10(e) as the value, so I do to -WordPenalty::WordPenalty(const string& param) : -  fid_(FD::Convert("WordPenalty")), -    value_(-1.0 / log(10)) { -  if (!param.empty()) { -    cerr << "Warning WordPenalty ignoring parameter: " << param << endl; -  } -} - -void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                        const Hypergraph::Edge& edge, -                                        const std::vector<const void*>& ant_states, -                                        SparseVector<double>* features, -                                        SparseVector<double>* estimated_features, -                                        void* state) const { -  throw std::runtime_error("TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog.\n"); +void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata&, +                                        const Hypergraph::Edge&, +                                        const std::vector<const void*>&, +                                        SparseVector<double>*, +                                        SparseVector<double>*, +                                        void*) const { +  cerr << "TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog\n";    abort();  } -void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                        const Hypergraph::Edge& edge, -                                        const std::vector<const void*>& ant_states, -                                        SparseVector<double>* features, -                                        SparseVector<double>* estimated_features, -                                        void* state) const { -  (void) smeta; -  (void) ant_states; -  (void) state; -  (void) estimated_features; -  features->set_value(fid_, edge.rule_->EWords() * value_); -} - -SourceWordPenalty::SourceWordPenalty(const string& param) : -    fid_(FD::Convert("SourceWordPenalty")), -    value_(-1.0 / log(10)) { -  if (!param.empty()) { -    cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; -  } -} - -Features SourceWordPenalty::features() const { -  return single_feature(fid_); -} - -Features WordPenalty::features() const { -  return single_feature(fid_); -} - - -void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                        const Hypergraph::Edge& edge, -                                        const std::vector<const void*>& ant_states, -                                        SparseVector<double>* features, -                                        SparseVector<double>* estimated_features, -                                        void* state) const { -  (void) smeta; -  (void) ant_states; -  (void) state; -  (void) estimated_features; -  features->set_value(fid_, edge.rule_->FWords() * value_); -} - -ArityPenalty::ArityPenalty(const std::string& param) : -    value_(-1.0 / log(10)) { -  string fname = "Arity_"; -  unsigned MAX=DEFAULT_MAX_ARITY; -  using namespace boost; -  if (!param.empty()) -    MAX=lexical_cast<unsigned>(param); -  for (unsigned i = 0; i <= MAX; ++i) { -    WordID fid=FD::Convert(fname+lexical_cast<string>(i)); -    fids_.push_back(fid); -  } -  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything -} - -Features ArityPenalty::features() const { -  return Features(fids_.begin(),fids_.end()); -} - -void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                         const Hypergraph::Edge& edge, -                                         const std::vector<const void*>& ant_states, -                                         SparseVector<double>* features, -                                         SparseVector<double>* estimated_features, -                                         void* state) const { -  (void) smeta; -  (void) ant_states; -  (void) state; -  (void) estimated_features; -  unsigned a=edge.Arity(); -  features->set_value(a<fids_.size()?fids_[a]:0, value_); -} - -ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : -    models_(models), -    weights_(w), -    state_size_(0), -    model_state_pos_(models.size()) { -  for (int i = 0; i < models_.size(); ++i) { -    model_state_pos_[i] = state_size_; -    state_size_ += models_[i]->NumBytesContext(); -  } -} - -void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { -  for (int i = 0; i < models_.size(); ++i) -    const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta); -} - -void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, -                                 const Hypergraph& /* hg */, -                                 const FFStates& node_states, -                                 Hypergraph::Edge* edge, -                                 FFState* context, -                                 prob_t* combination_cost_estimate) const { -  edge->reset_info(); -  context->resize(state_size_); -  if (state_size_ > 0) { -    memset(&(*context)[0], 0, state_size_); -  } -  SparseVector<double> est_vals;  // only computed if combination_cost_estimate is non-NULL -  if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); -  for (int i = 0; i < models_.size(); ++i) { -    const FeatureFunction& ff = *models_[i]; -    void* cur_ff_context = NULL; -    vector<const void*> ants(edge->tail_nodes_.size()); -    bool has_context = ff.NumBytesContext() > 0; -    if (has_context) { -      int spos = model_state_pos_[i]; -      cur_ff_context = &(*context)[spos]; -      for (int i = 0; i < ants.size(); ++i) { -        ants[i] = &node_states[edge->tail_nodes_[i]][spos]; -      } -    } -    ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); -  } -  if (combination_cost_estimate) -    combination_cost_estimate->logeq(est_vals.dot(weights_)); -  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - -void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { -  assert(1 == edge->rule_->Arity()); -  edge->reset_info(); -  for (int i = 0; i < models_.size(); ++i) { -    const FeatureFunction& ff = *models_[i]; -    const void* ant_state = NULL; -    bool has_context = ff.NumBytesContext() > 0; -    if (has_context) { -      int spos = model_state_pos_[i]; -      ant_state = &state[spos]; -    } -    ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_); -  } -  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - diff --git a/decoder/ff.h b/decoder/ff.h index 6c22d39f..3280592e 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -1,79 +1,47 @@  #ifndef _FF_H_  #define _FF_H_ -#define DEBUG_INIT 0 -#if DEBUG_INIT -# include <iostream> -# define DBGINIT(a) do { std::cerr<<a<<"\n"; } while(0) -#else -# define DBGINIT(a) -#endif - -#include <stdint.h> +#include <string>  #include <vector> -#include <cstring> -#include "fdict.h" -#include "hg.h" -#include "feature_vector.h" -#include "value_array.h" +#include "sparse_vector.h" +namespace HG { struct Edge; struct Node; } +class Hypergraph;  class SentenceMetadata; -class FeatureFunction;  // see definition below - -typedef std::vector<WordID> Features; // set of features ids  // if you want to develop a new feature, inherit from this class and  // override TraversalFeaturesImpl(...).  If it's a feature that returns /  // depends on context, you may also need to implement  // FinalTraversalFeatures(...)  class FeatureFunction { +  friend class ExternalFeature;   public:    std::string name_; // set by FF factory using usage() -  bool debug_; // also set by FF factory checking param for immediate initial "debug" -  //called after constructor, but before name_ and debug_ have been set -  virtual void Init() { DBGINIT("default FF::Init name="<<name_); } -  virtual void init_name_debug(std::string const& n,bool debug) { -    name_=n; -    debug_=debug; -  } -  bool debug() const { return debug_; }    FeatureFunction() : state_size_() {}    explicit FeatureFunction(int state_size) : state_size_(state_size) {}    virtual ~FeatureFunction();    bool IsStateful() const { return state_size_ > 0; } +  int StateSize() const { return state_size_; }    // override this.  not virtual because we want to expose this to factory template for help before creating a FF    static std::string usage(bool show_params,bool show_details) {      return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details);    }    static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details); -  static Features single_feature(int feat); -public: - -  // stateless feature that doesn't depend on source span: override and return true.  then your feature can be precomputed over rules. -  virtual bool rule_feature() const { return false; }    // called once, per input, before any feature calls to TraversalFeatures, etc.    // used to initialize sentence-specific data structures    virtual void PrepareForInput(const SentenceMetadata& smeta); -  //OVERRIDE THIS: -  virtual Features features() const { return single_feature(FD::Convert(name_)); } -  // returns the number of bytes of context that this feature function will -  // (maximally) use.  By default, 0 ("stateless" models in Hiero/Joshua). -  // NOTE: this value is fixed for the instance of your class, you cannot -  // use different amounts of memory for different nodes in the forest.  this will be read as soon as you create a ModelSet, then fixed forever on -  inline int NumBytesContext() const { return state_size_; } -    // Compute the feature values and (if this applies) the estimates of the    // feature values when this edge is used incorporated into a larger context    inline void TraversalFeatures(const SentenceMetadata& smeta, -                                Hypergraph::Edge& edge, +                                const HG::Edge& edge,                                  const std::vector<const void*>& ant_contexts, -                                FeatureVector* features, -                                FeatureVector* estimated_features, +                                SparseVector<double>* features, +                                SparseVector<double>* estimated_features,                                  void* out_state) const { -    TraversalFeaturesLog(smeta, edge, ant_contexts, +    TraversalFeaturesImpl(smeta, edge, ant_contexts,                            features, estimated_features, out_state);      // TODO it's easy for careless feature function developers to overwrite      // the end of their state and clobber someone else's memory.  These bugs @@ -83,21 +51,10 @@ public:    }    // if there's some state left when you transition to the goal state, score -  // it here.  For example, the language model computes the cost of adding +  // it here.  For example, a language model might the cost of adding    // <s> and </s>. - -protected:    virtual void FinalTraversalFeatures(const void* residual_state, -                                      FeatureVector* final_features) const; -public: -  //override either this or one of above. -  virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */, -                                      Hypergraph::Edge& /* edge */, // so you can log() -                                      const void* residual_state, -                                      FeatureVector* final_features) const { -    FinalTraversalFeatures(residual_state,final_features); -  } - +                                      SparseVector<double>* final_features) const;   protected:    // context is a pointer to a buffer of size NumBytesContext() that the @@ -107,191 +64,19 @@ public:    // of the particular FeatureFunction class.  There is one exception:    // equality of the contents (i.e., memcmp) is required to determine whether    // two states can be combined. - -  // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc.").  most features don't use this so implement the below.  it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1. -  virtual void TraversalFeaturesLog(const SentenceMetadata& smeta, -                                    Hypergraph::Edge& edge, // this is writable only so you can use log() -                                     const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, -                                     void* context) const { -    TraversalFeaturesImpl(smeta,edge,ant_contexts,features,estimated_features,context); -  } - -  // override above or below.    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     Hypergraph::Edge const& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features,                                       void* context) const;    // !!! ONLY call this from subclass *CONSTRUCTORS* !!!    void SetStateSize(size_t state_size) {      state_size_ = state_size;    } -  int StateSize() const { return state_size_; } - private: -  int state_size_; -}; - - -// word penalty feature, for each word on the E side of a rule, -// add value_ -class WordPenalty : public FeatureFunction { - public: -  Features features() const; -  WordPenalty(const std::string& param); -  static std::string usage(bool p,bool d) { -    return usage_helper("WordPenalty","","number of target words (local feature)",p,d); -  } -  bool rule_feature() const { return true; } - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, -                                     void* context) const; - private: -  const int fid_; -  const double value_; -}; - -class SourceWordPenalty : public FeatureFunction { - public: -  bool rule_feature() const { return true; } -  Features features() const; -  SourceWordPenalty(const std::string& param); -  static std::string usage(bool p,bool d) { -    return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); -  } - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, -                                     void* context) const; - private: -  const int fid_; -  const double value_; -}; - -#define DEFAULT_MAX_ARITY 9 -#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x -#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) -#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) - -class ArityPenalty : public FeatureFunction { - public: -  bool rule_feature() const { return true; } -  Features features() const; -  ArityPenalty(const std::string& param); -  static std::string usage(bool p,bool d) { -    return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature).  0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); -  } - - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, -                                     void* context) const; - private: -  std::vector<WordID> fids_; -  const double value_; -}; - -void show_features(Features const& features,DenseWeightVector const& weights,std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights - -template <class FFp> -Features all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream *warn=0,bool warn_fid_0=false) { -  using namespace std; -  Features ffs; -#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x << endl; } } while(0) -  typedef map<WordID,string> FFM; -  FFM ff_from; -  for (unsigned i=0;i<models_.size();++i) { -    string const& ffname=models_[i]->name_; -    Features si=models_[i]->features(); -    if (si.empty()) { -      WARNFF(ffname<<" doesn't yet report any feature IDs - either supply feature weight, or use --no_freeze_feature_set, or implement features() method"); -    } -    unsigned n0=0; -    for (unsigned j=0;j<si.size();++j) { -      WordID fid=si[j]; -      if (!fid) ++n0; -      if (fid >= weights_.size()) -        weights_.resize(fid+1); -      if (warn_fid_0 || fid) { -        pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname)); -        if (i_new.second) { -          if (fid) -            ffs.push_back(fid); -          else -            WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided.  Don't freeze feature ids to see the name"); -        } else { -          WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second); -        } -      } -    } -    if (n0) -      WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)"); -  } -  return ffs; -#undef WARNFF -} - -template <class FFp> -void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream &out,std::ostream &warn,bool warn_fid_0=true,bool warn_zero_wt=true) { -  return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); -} - -typedef ValueArray<uint8_t> FFState; // this is about 10% faster than string. -//typedef std::string FFState; - -//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size) -typedef std::vector<FFState> FFStates; - -// this class is a set of FeatureFunctions that can be used to score, rescore, -// etc. a (translation?) forest -class ModelSet { - public: -  ModelSet(const std::vector<double>& weights, -           const std::vector<const FeatureFunction*>& models); - -  // sets edge->feature_values_ and edge->edge_prob_ -  // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes -  // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone -  void AddFeaturesToEdge(const SentenceMetadata& smeta, -                         const Hypergraph& hg, -                         const FFStates& node_states, -                         Hypergraph::Edge* edge, -                         FFState* residual_context, -                         prob_t* combination_cost_estimate = NULL) const; - -  //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) -  void AddFinalFeatures(const FFState& residual_context, -                        Hypergraph::Edge* edge, -                        SentenceMetadata const& smeta) const; - -  // this is called once before any feature functions apply to a hypergraph -  // it can be used to initialize sentence-specific data structures -  void PrepareForInput(const SentenceMetadata& smeta); - -  bool empty() const { return models_.empty(); } - -  bool stateless() const { return !state_size_; } -  Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another).  also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id.  since 0 is a NULL feature id, it's never included.  if warn_fid_zero, then even the first 0 id is -  void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); -   private: -  std::vector<const FeatureFunction*> models_; -  const std::vector<double>& weights_;    int state_size_; -  std::vector<int> model_state_pos_;  };  #endif diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc new file mode 100644 index 00000000..f9404d24 --- /dev/null +++ b/decoder/ff_basic.cc @@ -0,0 +1,80 @@ +#include "ff_basic.h" + +#include "fast_lexical_cast.hpp" +#include "hg.h" + +using namespace std; + +// Hiero and Joshua use log_10(e) as the value, so I do to +WordPenalty::WordPenalty(const string& param) : +  fid_(FD::Convert("WordPenalty")), +    value_(-1.0 / log(10)) { +  if (!param.empty()) { +    cerr << "Warning WordPenalty ignoring parameter: " << param << endl; +  } +} + +void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                        const Hypergraph::Edge& edge, +                                        const std::vector<const void*>& ant_states, +                                        SparseVector<double>* features, +                                        SparseVector<double>* estimated_features, +                                        void* state) const { +  (void) smeta; +  (void) ant_states; +  (void) state; +  (void) estimated_features; +  features->set_value(fid_, edge.rule_->EWords() * value_); +} + + +SourceWordPenalty::SourceWordPenalty(const string& param) : +    fid_(FD::Convert("SourceWordPenalty")), +    value_(-1.0 / log(10)) { +  if (!param.empty()) { +    cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; +  } +} + +void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                        const Hypergraph::Edge& edge, +                                        const std::vector<const void*>& ant_states, +                                        SparseVector<double>* features, +                                        SparseVector<double>* estimated_features, +                                        void* state) const { +  (void) smeta; +  (void) ant_states; +  (void) state; +  (void) estimated_features; +  features->set_value(fid_, edge.rule_->FWords() * value_); +} + + +ArityPenalty::ArityPenalty(const std::string& param) : +    value_(-1.0 / log(10)) { +  string fname = "Arity_"; +  unsigned MAX=DEFAULT_MAX_ARITY; +  using namespace boost; +  if (!param.empty()) +    MAX=lexical_cast<unsigned>(param); +  for (unsigned i = 0; i <= MAX; ++i) { +    WordID fid=FD::Convert(fname+lexical_cast<string>(i)); +    fids_.push_back(fid); +  } +  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything +} + +void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                         const Hypergraph::Edge& edge, +                                         const std::vector<const void*>& ant_states, +                                         SparseVector<double>* features, +                                         SparseVector<double>* estimated_features, +                                         void* state) const { +  (void) smeta; +  (void) ant_states; +  (void) state; +  (void) estimated_features; +  unsigned a=edge.Arity(); +  features->set_value(a<fids_.size()?fids_[a]:0, value_); +} + diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h new file mode 100644 index 00000000..901c0110 --- /dev/null +++ b/decoder/ff_basic.h @@ -0,0 +1,68 @@ +#ifndef _FF_BASIC_H_ +#define _FF_BASIC_H_ + +#include "ff.h" + +// word penalty feature, for each word on the E side of a rule, +// add value_ +class WordPenalty : public FeatureFunction { + public: +  WordPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("WordPenalty","","number of target words (local feature)",p,d); +  } + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const HG::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  const int fid_; +  const double value_; +}; + +class SourceWordPenalty : public FeatureFunction { + public: +  SourceWordPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); +  } + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const HG::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  const int fid_; +  const double value_; +}; + +#define DEFAULT_MAX_ARITY 9 +#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x +#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) +#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) + +class ArityPenalty : public FeatureFunction { + public: +  ArityPenalty(const std::string& param); +  static std::string usage(bool p,bool d) { +    return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature).  0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); +  } + + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const HG::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  std::vector<WordID> fids_; +  const double value_; +}; + +#endif diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index 5544920e..344dc788 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -20,7 +20,7 @@ class BLEUModel : public FeatureFunction {    static std::string usage(bool param,bool verbose);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_charset.cc b/decoder/ff_charset.cc index 33afc1a7..6429088b 100644 --- a/decoder/ff_charset.cc +++ b/decoder/ff_charset.cc @@ -1,5 +1,7 @@  #include "ff_charset.h" +#include "tdict.h" +#include "hg.h"  #include "fdict.h"  #include "stringlib.h" @@ -7,9 +9,9 @@ using namespace std;  NonLatinCount::NonLatinCount(const string& param) : FeatureFunction(), fid_(FD::Convert("NonLatinCount")) {} -bool ContainsNonLatin(const char* word) { -  int cur = 0; -  while(word[cur]) { +bool ContainsNonLatin(const string& word) { +  unsigned cur = 0; +  while(cur < word.size()) {      const int size = UTF8Len(word[cur]);      if (size > 1) return true;      cur += size;   @@ -20,8 +22,8 @@ bool ContainsNonLatin(const char* word) {  void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                            const Hypergraph::Edge& edge,                                            const std::vector<const void*>& ant_contexts, -                                          FeatureVector* features, -                                          FeatureVector* estimated_features, +                                          SparseVector<double>* features, +                                          SparseVector<double>* estimated_features,                                            void* context) const {    const vector<WordID>& e = edge.rule_->e();    int count = 0; diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h index b1ad537e..267ef65d 100644 --- a/decoder/ff_charset.h +++ b/decoder/ff_charset.h @@ -13,10 +13,10 @@ class NonLatinCount : public FeatureFunction {    NonLatinCount(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts, -                                     FeatureVector* features, -                                     FeatureVector* estimated_features, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features,                                       void* context) const;   private:    mutable std::map<WordID, bool> is_non_latin_; diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc index 9de4d737..f2b0e67c 100644 --- a/decoder/ff_context.cc +++ b/decoder/ff_context.cc @@ -5,12 +5,14 @@  #include <cassert>  #include <cmath> +#include "hg.h"  #include "filelib.h"  #include "stringlib.h"  #include "sentence_metadata.h"  #include "lattice.h"  #include "fdict.h"  #include "verbose.h" +#include "tdict.h"  RuleContextFeatures::RuleContextFeatures(const string& param) {    //  cerr << "initializing RuleContextFeatures with parameters: " << param; diff --git a/decoder/ff_context.h b/decoder/ff_context.h index 89bcb557..19198ec3 100644 --- a/decoder/ff_context.h +++ b/decoder/ff_context.h @@ -14,7 +14,7 @@ class RuleContextFeatures : public FeatureFunction {    RuleContextFeatures(const string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index c9ed996c..e6f78f84 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -5,6 +5,7 @@  #include "klm/lm/model.hh" +#include "hg.h"  #include "sentence_metadata.h"  #include "lattice.h"  #include "tdict.h" @@ -88,7 +89,7 @@ void BasicCSplitFeaturesImpl::TraversalFeaturesImpl(    features->set_value(letters_sq_, (edge.j_ - edge.i_) * (edge.j_ - edge.i_));    features->set_value(letters_sqrt_, sqrt(edge.j_ - edge.i_));    const WordID word = edge.rule_->e_[1]; -  const char* sword = TD::Convert(word); +  const char* sword = TD::Convert(word).c_str();    const int len = strlen(sword);    int cur = 0;    int chars = 0; diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h index 38c0c5b8..64d42526 100644 --- a/decoder/ff_csplit.h +++ b/decoder/ff_csplit.h @@ -12,7 +12,7 @@ class BasicCSplitFeatures : public FeatureFunction {    BasicCSplitFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -27,7 +27,7 @@ class ReverseCharLMCSplitFeature : public FeatureFunction {    ReverseCharLMCSplitFeature(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 43528405..fe7a472e 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -4,6 +4,7 @@  #include <string>  #include <iostream>  #include <map> +#include "hg.h"  #include "ff_dwarf.h"  #include "dwarf.h"  #include "wordid.h" diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h index 083fcc7c..3d6a7da6 100644 --- a/decoder/ff_dwarf.h +++ b/decoder/ff_dwarf.h @@ -56,7 +56,7 @@ class Dwarf : public FeatureFunction {          function word alignments set by 3.    */     void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc new file mode 100644 index 00000000..dea0e20f --- /dev/null +++ b/decoder/ff_external.cc @@ -0,0 +1,60 @@ +#include "ff_external.h" + +#include <dlfcn.h> + +#include "stringlib.h" +#include "hg.h" + +using namespace std; + +ExternalFeature::ExternalFeature(const string& param) { +  size_t pos = param.find(' '); +  string nparam; +  string file = param; +  if (pos < param.size()) { +    nparam = Trim(param.substr(pos + 1)); +    file = param.substr(0, pos); +  } +  if (file.size() < 1) { +    cerr << "External requires a path to a dynamic library!\n"; +    abort(); +  } +  lib_handle = dlopen(file.c_str(), RTLD_LAZY); +  if (!lib_handle) { +    cerr << "dlopen reports: " << dlerror() << endl; +    cerr << "Did you provide a full path to the dynamic library?\n"; +    abort(); +  } +  FeatureFunction* (*fn)(const string&) = +    (FeatureFunction* (*)(const string&))(dlsym(lib_handle, "create_ff")); +  if (!fn) { +    cerr << "dlsym reports: " << dlerror() << endl; +    abort(); +  } +  ff_ext = (*fn)(nparam); +  SetStateSize(ff_ext->StateSize()); +} + +ExternalFeature::~ExternalFeature() { +  delete ff_ext; +  dlclose(lib_handle); +} + +void ExternalFeature::PrepareForInput(const SentenceMetadata& smeta) { +  ff_ext->PrepareForInput(smeta); +} + +void ExternalFeature::FinalTraversalFeatures(const void* context, +                                             SparseVector<double>* features) const { +  ff_ext->FinalTraversalFeatures(context, features); +} + +void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const Hypergraph::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const { +  ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context); +} + diff --git a/decoder/ff_external.h b/decoder/ff_external.h new file mode 100644 index 00000000..3e2bee51 --- /dev/null +++ b/decoder/ff_external.h @@ -0,0 +1,26 @@ +#ifndef _FFEXTERNAL_H_ +#define _FFEXTERNAL_H_ + +#include "ff.h" + +// dynamically loaded feature function +class ExternalFeature : public FeatureFunction { + public: +  ExternalFeature(const std::string& param); +  ~ExternalFeature(); +  virtual void PrepareForInput(const SentenceMetadata& smeta); +  virtual void FinalTraversalFeatures(const void* context, +                                      SparseVector<double>* features) const; + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const HG::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  void* lib_handle; +  FeatureFunction* ff_ext; +}; + +#endif diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 5eb68c8b..bfdd3257 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -43,7 +43,6 @@ template<class FF>  struct FFFactory : public FactoryBase<FeatureFunction> {    FP Create(std::string param) const {      FF *ret=new FF(param); -    ret->Init();      return FP(ret);    }    virtual std::string usage(bool params,bool verbose) const { @@ -57,7 +56,6 @@ template<class FF>  struct FsaFactory : public FactoryBase<FsaFeatureFunction> {    FP Create(std::string param) const {      FF *ret=new FF(param); -    ret->Init();      return FP(ret);    }    virtual std::string usage(bool params,bool verbose) const { @@ -98,8 +96,6 @@ struct FactoryRegistry : public UntypedFactoryRegistry {      if (debug)        cerr<<"debug enabled for "<<ffname<< " - remaining options: '"<<param<<"'\n";      FP res = dynamic_cast<FB const&>(*it->second).Create(param); -    res->init_name_debug(ffname,debug); -    // could add a res->Init() here instead of in Create if we wanted feature id to potentially differ based on the registered name rather than static usage() - of course, specific feature ids can be computed on the basis of feature param as well; this only affects the default single feature id=name      return res;    }  }; diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index 09ef282c..fefa90bd 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -327,11 +327,6 @@ KLanguageModel<Model>::KLanguageModel(const string& param) {  }  template <class Model> -Features KLanguageModel<Model>::features() const { -  return single_feature(fid_); -} - -template <class Model>  KLanguageModel<Model>::~KLanguageModel() {    delete pimpl_;  } @@ -362,7 +357,6 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state,  template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string ¶m) {    KLanguageModel<Model> *ret = new KLanguageModel<Model>(param); -  ret->Init();    return boost::shared_ptr<FeatureFunction>(ret);  } diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h index 6efe50f6..b5ceffd0 100644 --- a/decoder/ff_klm.h +++ b/decoder/ff_klm.h @@ -20,10 +20,9 @@ class KLanguageModel : public FeatureFunction {    virtual void FinalTraversalFeatures(const void* context,                                        SparseVector<double>* features) const;    static std::string usage(bool param,bool verbose); -  Features features() const;   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 5e16d4e3..6ec7b4f3 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -519,10 +519,6 @@ LanguageModel::LanguageModel(const string& param) {    SetStateSize(LanguageModelImpl::OrderToStateSize(order));  } -Features LanguageModel::features() const { -  return single_feature(fid_); -} -  LanguageModel::~LanguageModel() {    delete pimpl_;  } diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index ccee4268..94e18f00 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -55,10 +55,9 @@ class LanguageModel : public FeatureFunction {                                        SparseVector<double>* features) const;    std::string DebugStateToString(const void* state) const;    static std::string usage(bool param,bool verbose); -  Features features() const;   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -81,7 +80,7 @@ class LanguageModelRandLM : public FeatureFunction {    std::string DebugStateToString(const void* state) const;   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h index 064dbb49..4965d235 100644 --- a/decoder/ff_ngrams.h +++ b/decoder/ff_ngrams.h @@ -17,7 +17,7 @@ class NgramDetector : public FeatureFunction {                                        SparseVector<double>* features) const;   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index 3d0e514a..6716d3da 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -10,6 +10,8 @@  #include "lattice.h"  #include "fdict.h"  #include "verbose.h" +#include "tdict.h" +#include "hg.h"  using namespace std; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 08b168b0..dc9a15d5 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -3,6 +3,7 @@  #include <vector>  #include <map> +#include "trule.h"  #include "ff.h"  #include "array2d.h"  #include "wordid.h" @@ -12,7 +13,7 @@ class RuleIdentityFeatures : public FeatureFunction {    RuleIdentityFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -42,7 +43,7 @@ class RuleTargetBigramFeatures : public FeatureFunction {    RuleTargetBigramFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_ruleshape.cc b/decoder/ff_ruleshape.cc index f56ccfa9..7bb548c4 100644 --- a/decoder/ff_ruleshape.cc +++ b/decoder/ff_ruleshape.cc @@ -1,5 +1,7 @@  #include "ff_ruleshape.h" +#include "trule.h" +#include "hg.h"  #include "fdict.h"  #include <sstream> diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h index 23c9827e..9f20faf3 100644 --- a/decoder/ff_ruleshape.h +++ b/decoder/ff_ruleshape.h @@ -9,7 +9,7 @@ class RuleShapeFeatures : public FeatureFunction {    RuleShapeFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 035132b4..a1997695 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -3,6 +3,7 @@  #include <sstream>  #include <stack> +#include "hg.h"  #include "sentence_metadata.h"  #include "array2d.h"  #include "filelib.h" diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index 279563e1..a8c7150a 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -11,7 +11,7 @@ class SourceSyntaxFeatures : public FeatureFunction {    ~SourceSyntaxFeatures();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -28,7 +28,7 @@ class SourceSpanSizeFeatures : public FeatureFunction {    ~SourceSpanSizeFeatures();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index 0483517b..0ccac69b 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -4,6 +4,8 @@  #include <cassert>  #include <cmath> +#include "hg.h" +#include "tdict.h"  #include "filelib.h"  #include "stringlib.h"  #include "sentence_metadata.h" diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index 24e0dede..d2f5e84c 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -12,7 +12,7 @@ class SpanFeatures : public FeatureFunction {    SpanFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -49,7 +49,7 @@ class CMR2008ReorderingFeatures : public FeatureFunction {    CMR2008ReorderingFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index fd9210fa..7f9af9cd 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -2,6 +2,7 @@  #include <sstream> +#include "hg.h"  #include "tdict.h"  #include "sentence_metadata.h"  #include "stringlib.h" diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index bd5b62c0..46418b0c 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -18,7 +18,7 @@ class Tagger_BigramIndicator : public FeatureFunction {    Tagger_BigramIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -39,7 +39,7 @@ class LexicalPairIndicator : public FeatureFunction {    virtual void PrepareForInput(const SentenceMetadata& smeta);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -59,7 +59,7 @@ class OutputIndicator : public FeatureFunction {    OutputIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index decdf9bc..1491819d 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -549,7 +549,7 @@ void IdentityCycleDetector::TraversalFeaturesImpl(const SentenceMetadata& smeta,        static map<WordID, bool> big_enough;        map<WordID,bool>::iterator it = big_enough_.find(word);        if (it == big_enough_.end()) { -        out_is_identity = big_enough_[word] = strlen(TD::Convert(word)) >= length_min_; +        out_is_identity = big_enough_[word] = TD::Convert(word).size() >= length_min_;        } else {          out_is_identity = it->second;        } diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index d7a2dda8..ba3d0b9b 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -13,7 +13,7 @@ class RelativeSentencePosition : public FeatureFunction {    RelativeSentencePosition(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -36,7 +36,7 @@ class SourceBigram : public FeatureFunction {    void PrepareForInput(const SentenceMetadata& smeta);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -55,7 +55,7 @@ class LexNullJump : public FeatureFunction {    LexNullJump(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -72,7 +72,7 @@ class NewJump : public FeatureFunction {    NewJump(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -109,7 +109,7 @@ class LexicalTranslationTrigger : public FeatureFunction {    LexicalTranslationTrigger(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -132,14 +132,14 @@ class BlunsomSynchronousParseHack : public FeatureFunction {    BlunsomSynchronousParseHack(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features,                                       void* out_context) const;   private:    inline bool DoesNotBelong(const void* state) const { -    for (int i = 0; i < NumBytesContext(); ++i) { +    for (int i = 0; i < StateSize(); ++i) {        if (*(static_cast<const unsigned char*>(state) + i)) return false;      }      return true; @@ -148,9 +148,9 @@ class BlunsomSynchronousParseHack : public FeatureFunction {    inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const {      int i = 0;      int ind = 0; -    while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; } -    // std::cerr << i << " " << NumBytesContext() << std::endl; -    assert(i != NumBytesContext()); +    while (i < StateSize() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; } +    // std::cerr << i << " " << StateSize() << std::endl; +    assert(i != StateSize());      assert(ind < cur_ref_->size());      int cur = *(static_cast<const unsigned char*>(state) + i);      int comp = 1; @@ -171,7 +171,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction {    }    inline void SetStateMask(int start, int end, void* state) const { -    assert((end / 8) < NumBytesContext()); +    assert((end / 8) < StateSize());      int i = 0;      int comp = 1;      for (int j = 0; j < start; ++j) { @@ -209,7 +209,7 @@ class WordPairFeatures : public FeatureFunction {    WordPairFeatures(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -226,7 +226,7 @@ class IdentityCycleDetector : public FeatureFunction {    IdentityCycleDetector(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -242,7 +242,7 @@ class InputIndicator : public FeatureFunction {    InputIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -258,7 +258,7 @@ class Fertility : public FeatureFunction {    Fertility(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc index 44468899..70cea7de 100644 --- a/decoder/ff_wordset.cc +++ b/decoder/ff_wordset.cc @@ -1,5 +1,6 @@  #include "ff_wordset.h" +#include "hg.h"  #include "fdict.h"  #include <sstream>  #include <iostream> diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 7c9a3fb7..639e1514 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -2,6 +2,7 @@  #define _FF_WORDSET_H_  #include "ff.h" +#include "tdict.h"  #include <tr1/unordered_set>  #include <boost/algorithm/string.hpp> @@ -32,11 +33,9 @@ class WordSet : public FeatureFunction {    ~WordSet() {    } -  Features features() const { return single_feature(fid_); } -   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/ffset.cc b/decoder/ffset.cc new file mode 100644 index 00000000..5820f421 --- /dev/null +++ b/decoder/ffset.cc @@ -0,0 +1,72 @@ +#include "ffset.h" + +#include "ff.h" +#include "tdict.h" +#include "hg.h" + +using namespace std; + +ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : +    models_(models), +    weights_(w), +    state_size_(0), +    model_state_pos_(models.size()) { +  for (int i = 0; i < models_.size(); ++i) { +    model_state_pos_[i] = state_size_; +    state_size_ += models_[i]->StateSize(); +  } +} + +void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { +  for (int i = 0; i < models_.size(); ++i) +    const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta); +} + +void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, +                                 const Hypergraph& /* hg */, +                                 const FFStates& node_states, +                                 HG::Edge* edge, +                                 FFState* context, +                                 prob_t* combination_cost_estimate) const { +  //edge->reset_info(); +  context->resize(state_size_); +  if (state_size_ > 0) { +    memset(&(*context)[0], 0, state_size_); +  } +  SparseVector<double> est_vals;  // only computed if combination_cost_estimate is non-NULL +  if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); +  for (int i = 0; i < models_.size(); ++i) { +    const FeatureFunction& ff = *models_[i]; +    void* cur_ff_context = NULL; +    vector<const void*> ants(edge->tail_nodes_.size()); +    bool has_context = ff.StateSize() > 0; +    if (has_context) { +      int spos = model_state_pos_[i]; +      cur_ff_context = &(*context)[spos]; +      for (int i = 0; i < ants.size(); ++i) { +        ants[i] = &node_states[edge->tail_nodes_[i]][spos]; +      } +    } +    ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); +  } +  if (combination_cost_estimate) +    combination_cost_estimate->logeq(est_vals.dot(weights_)); +  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + +void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMetadata const& smeta) const { +  assert(1 == edge->rule_->Arity()); +  //edge->reset_info(); +  for (int i = 0; i < models_.size(); ++i) { +    const FeatureFunction& ff = *models_[i]; +    const void* ant_state = NULL; +    bool has_context = ff.StateSize() > 0; +    if (has_context) { +      int spos = model_state_pos_[i]; +      ant_state = &state[spos]; +    } +    ff.FinalTraversalFeatures(ant_state, &edge->feature_values_); +  } +  edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + diff --git a/decoder/ffset.h b/decoder/ffset.h new file mode 100644 index 00000000..28aef667 --- /dev/null +++ b/decoder/ffset.h @@ -0,0 +1,57 @@ +#ifndef _FFSET_H_ +#define _FFSET_H_ + +#include <vector> +#include "value_array.h" +#include "prob.h" + +namespace HG { struct Edge; struct Node; } +class Hypergraph; +class FeatureFunction; +class SentenceMetadata; +class FeatureFunction;  // see definition below + +// TODO let states be dynamically sized +typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string + +//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef std::vector<FFState> FFStates; + +// this class is a set of FeatureFunctions that can be used to score, rescore, +// etc. a (translation?) forest +class ModelSet { + public: +  ModelSet(const std::vector<double>& weights, +           const std::vector<const FeatureFunction*>& models); + +  // sets edge->feature_values_ and edge->edge_prob_ +  // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes +  // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone +  void AddFeaturesToEdge(const SentenceMetadata& smeta, +                         const Hypergraph& hg, +                         const FFStates& node_states, +                         HG::Edge* edge, +                         FFState* residual_context, +                         prob_t* combination_cost_estimate = NULL) const; + +  //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) +  void AddFinalFeatures(const FFState& residual_context, +                        HG::Edge* edge, +                        SentenceMetadata const& smeta) const; + +  // this is called once before any feature functions apply to a hypergraph +  // it can be used to initialize sentence-specific data structures +  void PrepareForInput(const SentenceMetadata& smeta); + +  bool empty() const { return models_.empty(); } + +  bool stateless() const { return !state_size_; } + + private: +  std::vector<const FeatureFunction*> models_; +  const std::vector<double>& weights_; +  int state_size_; +  std::vector<int> model_state_pos_; +}; + +#endif diff --git a/decoder/grammar.cc b/decoder/grammar.cc index d1fe53af..ee43f537 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -127,48 +127,3 @@ bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {    return (max_span_ >= distance);  } -GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} - -void RefineRule(TRulePtr pt, const unsigned int ctf_level){ -  for (unsigned int i=0; i<ctf_level; ++i){ -    TRulePtr r(new TRule(*pt)); -    pt->fine_rules_.reset(new vector<TRulePtr>); -    pt->fine_rules_->push_back(r); -    pt = r; -  } -} - -GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { -  TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); -  AddRule(stop_glue); -  RefineRule(stop_glue, ctf_level); -  TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); -  AddRule(glue); -  RefineRule(glue, ctf_level); -} - -bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { -  return (i == 0); -} - -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { -  unordered_set<WordID> ss; -  for (int i = 0; i < input.size(); ++i) { -    const vector<LatticeArc>& alts = input[i]; -    for (int k = 0; k < alts.size(); ++k) { -      const int j = alts[k].dist2next + i; -      const string& src = TD::Convert(alts[k].label); -      if (ss.count(alts[k].label) == 0) { -        TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); -        pt->a_.push_back(AlignmentPoint(0,0)); -        AddRule(pt); -        RefineRule(pt, ctf_level); -        ss.insert(alts[k].label); -      } -    } -  } -} - -bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { -  return (distance < 2); -} diff --git a/decoder/grammar.h b/decoder/grammar.h index e6a15a69..add1a235 100644 --- a/decoder/grammar.h +++ b/decoder/grammar.h @@ -81,18 +81,4 @@ struct TextGrammar : public Grammar {  }; -struct GlueGrammar : public TextGrammar { -  // read glue grammar from file -  explicit GlueGrammar(const std::string& file); -  GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0);  // "S", "X" -  virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -struct PassThroughGrammar : public TextGrammar { -  PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); -  virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -void RefineRule(TRulePtr pt, const unsigned int ctf_level); -  #endif diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index 4500490a..912f4f12 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -10,7 +10,9 @@  #include "tdict.h"  #include "grammar.h"  #include "bottom_up_parser.h" +#include "hg.h"  #include "ff.h" +#include "ffset.h"  #include "weights.h"  using namespace std; diff --git a/decoder/hg.h b/decoder/hg.h index 591e98ce..3d8cd9bc 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -33,47 +33,20 @@  // slow  #undef HG_EDGES_TOPO_SORTED -class Hypergraph; -typedef boost::shared_ptr<Hypergraph> HypergraphP; - -// class representing an acyclic hypergraph -//  - edges have 1 head, 0..n tails -class Hypergraph { -public: -  Hypergraph() : is_linear_chain_(false) {} +// SmallVector is a fast, small vector<int> implementation for sizes <= 2 +typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ +typedef std::vector<int> EdgesVector; // indices in edges_ -  // SmallVector is a fast, small vector<int> implementation for sizes <= 2 -  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ -  typedef std::vector<int> EdgesVector; // indices in edges_ - -  // TODO get rid of cat_? -  // TODO keep cat_ and add span and/or state? :) -  struct Node { -    Node() : id_(), cat_() {} -    int id_; // equal to this object's position in the nodes_ vector -    WordID cat_;  // non-terminal category if <0, 0 if not set -    WordID NT() const { return -cat_; } -    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_ -    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_ -    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting -      cat_=o.cat_; -    } -    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { -      copy_fixed(o); -      id_=n2[id_]; -      e2.reindex_push_back(o.in_edges_,in_edges_); -      e2.reindex_push_back(o.out_edges_,out_edges_); -    } -  }; +enum { +  NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF +}; +namespace HG { -  // TODO get rid of edge_prob_? (can be computed on the fly as the dot -  // product of the weight vector and the feature values)    struct Edge { -//    int poplimit; //TODO: cube pruning per edge limit?  per node didn't work well at all.  also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score      Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {}      Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later. -    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after +    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett        : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); }      inline int Arity() const { return tail_nodes_.size(); }      int head_node_;               // refers to a position in nodes_ @@ -83,8 +56,6 @@ public:      prob_t edge_prob_;             // dot product of weights and feat_values      int id_;   // equal to this object's position in the edges_ vector -    //FIXME: these span ids belong in Node, not Edge, right?  every node should have the same spans. -      // span info. typically, i_ and j_ refer to indices in the source sentence.      // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices      // while prev_i_ prev_j_ will refer to positions in the source. @@ -97,54 +68,6 @@ public:      short int j_;      short int prev_i_;      short int prev_j_; - -    void copy_info(Edge const& o) { -#if USE_INFO_EDGE -      set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there. -#else -      (void) o; -#endif -    } -    void copy_pod(Edge const& o) { -      rule_=o.rule_; -      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; -    } -    void copy_features(Edge const& o) { -      feature_values_=o.feature_values_; -      copy_info(o); -    } -    void copy_fixed(Edge const& o) { -      copy_pod(o); -      copy_features(o); -      edge_prob_ = o.edge_prob_; -    } -    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { -      copy_fixed(o); -      head_node_=n2[o.head_node_]; -      id_=e2[o.id_]; -      n2.reindex_push_back(o.tail_nodes_,tail_nodes_); -    } - -#if USE_INFO_EDGE -    std::ostringstream info_; -    void set_info(std::string const& s) { -      info_.str(s); -      info_.seekp(0,std::ios_base::end); -    } -    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) { -//      info_.seekp(0,std::ios_base::end); - } -    void operator=(Edge const& o) { -      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; -      set_info(o.info_.str()); -    } -    std::string info() const { return info_.str(); } -    void reset_info() { info_.str(""); info_.clear(); } -#else -    std::string info() const { return std::string(); } -    void reset_info() {  } -    void set_info(std::string const& ) {  } -#endif      void show(std::ostream &o,unsigned mask=SPAN|RULE) const {        o<<'{';        if (mask&CATEGORY) @@ -159,10 +82,6 @@ public:          o<<' '<<feature_values_;        if (mask&RULE)          o<<' '<<rule_->AsString(mask&RULE_LHS); -      if (USE_INFO_EDGE) { -        std::string const& i=info(); -        if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i -      }        o<<'}';      }      std::string show(unsigned mask=SPAN|RULE) const { @@ -170,12 +89,28 @@ public:        show(o,mask);        return o.str();      } -    /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle) - -       FIXME: make kbest create a simple derivation-tree structure (could be a -       hg), and replace the list-of-edges viterbi.h with a tree-structured one. -       CreateViterbiHypergraph can do for 1best, though. -    */ +    void copy_pod(Edge const& o) { +      rule_=o.rule_; +      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; +    } +    void copy_features(Edge const& o) { +      feature_values_=o.feature_values_; +    } +    void copy_fixed(Edge const& o) { +      copy_pod(o); +      copy_features(o); +      edge_prob_ = o.edge_prob_; +    } +    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { +      copy_fixed(o); +      head_node_=n2[o.head_node_]; +      id_=e2[o.id_]; +      n2.reindex_push_back(o.tail_nodes_,tail_nodes_); +    } +    // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle) +    //   FIXME: make kbest create a simple derivation-tree structure (could be a +    //   hg), and replace the list-of-edges viterbi.h with a tree-structured one. +    //   CreateViterbiHypergraph can do for 1best, though.      template <class EdgeRecurse,class TEdgeHandle>      std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {        std::ostringstream o; @@ -203,7 +138,43 @@ public:      }    }; -  // all this info ought to live in Node, but for some reason it's on Edges. +  // TODO get rid of cat_? +  // TODO keep cat_ and add span and/or state? :) +  struct Node { +    Node() : id_(), cat_() {} +    int id_; // equal to this object's position in the nodes_ vector +    WordID cat_;  // non-terminal category if <0, 0 if not set +    WordID NT() const { return -cat_; } +    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_ +    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_ +    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting +      cat_=o.cat_; +    } +    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { +      copy_fixed(o); +      id_=n2[id_]; +      e2.reindex_push_back(o.in_edges_,in_edges_); +      e2.reindex_push_back(o.out_edges_,out_edges_); +    } +  }; + +} // namespace HG + +class Hypergraph; +typedef boost::shared_ptr<Hypergraph> HypergraphP; +// class representing an acyclic hypergraph +//  - edges have 1 head, 0..n tails +class Hypergraph { +public: +  Hypergraph() : is_linear_chain_(false) {} +  typedef HG::Node Node; +  typedef HG::Edge Edge; +  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ +  typedef std::vector<int> EdgesVector; // indices in edges_ +  enum { +    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF +  }; +    // except for stateful models that have split nt,span, this should identify the node    void SetNodeOrigin(int nodeid,NTSpan &r) const {      Node const &n=nodes_[nodeid]; @@ -230,18 +201,9 @@ public:      }      return s;    } -  // 0 if none, -TD index otherwise (just like in rule)    WordID NodeLHS(int nodeid) const {      Node const &n=nodes_[nodeid];      return n.NT(); -    /* -    if (!n.in_edges_.empty()) { -      Edge const& e=edges_[n.in_edges_.front()]; -      if (e.rule_) -        return -e.rule_->lhs_; -    } -    return 0; -    */    }    typedef std::vector<prob_t> EdgeProbs; @@ -250,14 +212,8 @@ public:    typedef std::vector<bool> NodeMask;    std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; -// builds viterbi hg and returns it formatted as a pretty string - -  enum { -    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF -  };    std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; -  // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first    typedef Edge const* EdgeHandle;    EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const { @@ -334,7 +290,7 @@ public:    Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) {      edges_.push_back(Edge(edges_.size(),in_edge));      Edge* edge = &edges_.back(); -    edge->copy_features(in_edge); +    edge->feature_values_ = in_edge.feature_values_;      edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back.  perhaps optimized it's the same.      index_tails(*edge);      return edge; @@ -503,9 +459,9 @@ public:    template <class V>    void visit_edges_topo(V &v) { -    for (int i = 0; i < nodes_.size(); ++i) { +    for (unsigned i = 0; i < nodes_.size(); ++i) {        EdgesVector const& in=nodes_[i].in_edges_; -      for (int j=0;j<in.size();++j) { +      for (unsigned j=0;j<in.size();++j) {          int e=in[j];          v(i,e,edges_[e]);        } @@ -534,14 +490,14 @@ private:  // for generic Viterbi/Inside algorithms  struct EdgeProb {    typedef prob_t Weight; -  inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; } +  inline const prob_t& operator()(const HG::Edge& e) const { return e.edge_prob_; }  };  struct EdgeSelectEdgeWeightFunction {    typedef prob_t Weight;    typedef std::vector<bool> EdgeMask;    EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {} -  inline prob_t operator()(const Hypergraph::Edge& e) const { +  inline prob_t operator()(const HG::Edge& e) const {      if (v_[e.id_]) return prob_t::One();      else return prob_t::Zero();    } @@ -551,7 +507,7 @@ private:  struct ScaledEdgeProb {    ScaledEdgeProb(const double& alpha) : alpha_(alpha) {} -  inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); } +  inline prob_t operator()(const HG::Edge& e) const { return e.edge_prob_.pow(alpha_); }    const double alpha_;    typedef prob_t Weight;  }; @@ -560,7 +516,7 @@ struct ScaledEdgeProb {  struct EdgeFeaturesAndProbWeightFunction {    typedef SparseVector<prob_t> Weight;    typedef Weight Result; //TODO: change Result->Weight everywhere? -  inline const Weight operator()(const Hypergraph::Edge& e) const { +  inline const Weight operator()(const HG::Edge& e) const {      SparseVector<prob_t> res;      for (SparseVector<double>::const_iterator it = e.feature_values_.begin();           it != e.feature_values_.end(); ++it) @@ -571,7 +527,7 @@ struct EdgeFeaturesAndProbWeightFunction {  struct TransitionCountWeightFunction {    typedef double Weight; -  inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; } +  inline double operator()(const HG::Edge& e) const { (void)e; return 1.0; }  };  #endif diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index 6e3bfee6..ad5b701a 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -79,7 +79,9 @@ static bool FastLinearIntersect(const Lattice& target, Hypergraph* hg) {    return (cov.size() == target.size());  } -bool HG::Intersect(const Lattice& target, Hypergraph* hg) { +namespace HG { + +bool Intersect(const Lattice& target, Hypergraph* hg) {    // there are a number of faster algorithms available for restricted    // classes of hypergraph and/or target.    if (hg->IsLinearChain() && target.IsSentence()) @@ -101,7 +103,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) {    // grammar, create the labels here    const string kSEP = "_";    for (unsigned i = 0; i < nnodes; ++i) { -    const char* pstr = "CAT"; +    string pstr = "CAT";      if (hg->nodes_[i].cat_ < 0)        pstr = TD::Convert(-hg->nodes_[i].cat_);      cats[i] = TD::Convert(pstr + kSEP + lexical_cast<string>(i)) * -1; @@ -160,3 +162,5 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) {    return true;  } +} + diff --git a/decoder/hg_intersect.h b/decoder/hg_intersect.h index 826bdaae..29a5ea2a 100644 --- a/decoder/hg_intersect.h +++ b/decoder/hg_intersect.h @@ -1,13 +1,11 @@  #ifndef _HG_INTERSECT_H_  #define _HG_INTERSECT_H_ -#include <vector> -  #include "lattice.h"  class Hypergraph; -struct HG { -  static bool Intersect(const Lattice& target, Hypergraph* hg); +namespace HG { +  bool Intersect(const Lattice& target, Hypergraph* hg);  };  #endif diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 8bd40387..64c6663e 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser {        hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node);      }    } -  void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { +  void CreateEdge(const TRulePtr& rule, SparseVector<double>* feats, const SmallVectorUnsigned& tail) {      Hypergraph::Edge* edge = hg.AddEdge(rule, tail);      feats->swap(edge->feature_values_);      edge->i_ = spans[0]; @@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses          const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]];          const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS;          double prob = log(e.edge_prob_); -        if (isinf(prob)) { prob = -9e20; } -        if (isnan(prob)) { prob = 0; } +        if (std::isinf(prob)) { prob = -9e20; } +        if (std::isnan(prob)) { prob = 0; }          os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),";        }        os << "),"; @@ -600,7 +600,7 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {    // grammar, create the labels here    const string kSEP = "_";    for (int i = 0; i < hg.nodes_.size(); ++i) { -    const char* pstr = "CAT"; +    string pstr = "CAT";      if (hg.nodes_[i].cat_ < 0)        pstr = TD::Convert(-hg.nodes_[i].cat_);      cats[i] = TD::Convert(pstr + kSEP + boost::lexical_cast<string>(i)) * -1; diff --git a/decoder/hg_sampler.cc b/decoder/hg_sampler.cc index cdf0ec3c..8e520871 100644 --- a/decoder/hg_sampler.cc +++ b/decoder/hg_sampler.cc @@ -71,3 +71,58 @@ void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,      Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));    }  } + +void HypergraphSampler::sample_trees(const Hypergraph& hg, +                                     unsigned n, +                                     MT19937* rng, +                                     vector<string>* trees) { +  trees->clear(); +  trees->resize(n); + +  // compute inside probabilities +  vector<prob_t> node_probs; +  Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb()); + +  vector<bool> sampled_edges(hg.edges_.size()); +  queue<unsigned> q; +  SampleSet<prob_t> ss; +  for (unsigned i = 0; i < n; ++i) { +    fill(sampled_edges.begin(), sampled_edges.end(), false); +    // sample derivation top down +    assert(q.empty()); +    q.push(hg.nodes_.size() - 1); +    prob_t model_score = prob_t::One(); +    while(!q.empty()) { +      unsigned cur_node_id = q.front(); +      q.pop(); +      const Hypergraph::Node& node = hg.nodes_[cur_node_id]; +      const unsigned num_in_edges = node.in_edges_.size(); +      unsigned sampled_edge_idx = 0; +      if (num_in_edges == 1) { +        sampled_edge_idx = node.in_edges_[0]; +      } else { +        assert(num_in_edges > 1); +        ss.clear(); +        for (unsigned j = 0; j < num_in_edges; ++j) { +          const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; +          prob_t p = edge.edge_prob_;   // edge weight +          for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) +            p *= node_probs[edge.tail_nodes_[k]];  // tail node inside weight +          ss.add(p); +        } +        sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)]; +      } +      sampled_edges[sampled_edge_idx] = true; +      const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx]; +      model_score *= sampled_edge.edge_prob_; +      //sampled_deriv->push_back(sampled_edge_idx); +      for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) { +        q.push(sampled_edge.tail_nodes_[j]); +      } +    } +    vector<WordID> tmp; +    Viterbi(hg, &tmp, ETreeTraversal(), SampledDerivationWeightFunction(sampled_edges)); +    (*trees)[i] = TD::GetString(tmp); +  } +} + diff --git a/decoder/hg_sampler.h b/decoder/hg_sampler.h index bf4e1eb0..6ac39a20 100644 --- a/decoder/hg_sampler.h +++ b/decoder/hg_sampler.h @@ -3,6 +3,7 @@  #include <vector> +#include <string>  #include "sparse_vector.h"  #include "sampler.h"  #include "wordid.h" @@ -22,6 +23,12 @@ struct HypergraphSampler {                      unsigned n,   // how many samples to draw                      MT19937* rng,                      std::vector<Hypothesis>* hypos); + +  static void +  sample_trees(const Hypergraph& hg, +               unsigned n, +               MT19937* rng, +               std::vector<std::string>* trees);  };  #endif diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 92ed98b2..37469748 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -6,6 +6,7 @@  #include "json_parse.h"  #include "hg_intersect.h" +#include "hg_union.h"  #include "viterbi.h"  #include "kbest.h"  #include "inside_outside.h" @@ -52,7 +53,7 @@ BOOST_AUTO_TEST_CASE(Union) {    int l2 = ViterbiPathLength(hg2);    cerr << c1 << "\t" << TD::GetString(t1) << endl;    cerr << c2 << "\t" << TD::GetString(t2) << endl; -  hg1.Union(hg2); +  HG::Union(hg2, &hg1);    hg1.Reweight(wts);    c3 = ViterbiESentence(hg1, &t3);    int l3 = ViterbiPathLength(hg1); @@ -121,8 +122,8 @@ BOOST_AUTO_TEST_CASE(InsideScore) {    vector<prob_t> post;    inside = hg.ComputeBestPathThroughEdges(&post);    BOOST_CHECK_CLOSE(-0.3, log(inside), 1e-4);  // computed by hand -  BOOST_CHECK_EQUAL(post.size(), 4); -  for (int i = 0; i < 4; ++i) { +  BOOST_CHECK_EQUAL(post.size(), 5); +  for (int i = 0; i < 5; ++i) {      cerr << "edge post: " << log(post[i]) << '\t' << hg.edges_[i].rule_->AsString() << endl;    }  } @@ -139,12 +140,15 @@ BOOST_AUTO_TEST_CASE(PruneInsideOutside) {    cerr << TD::GetString(trans) << "\n";    cerr << "cost: " << cost << "\n";    hg.PrintGraphviz(); +#if 0    hg.DensityPruneInsideOutside(0.5, false, 2.0);    hg.BeamPruneInsideOutside(0.5, false, 0.5);    cost = ViterbiESentence(hg, &trans);    cerr << "Ncst: " << cost << endl;    cerr << TD::GetString(trans) << "\n";    hg.PrintGraphviz(); +#endif +  cerr << "FIX PLEASE\n";  }  BOOST_AUTO_TEST_CASE(TestPruneEdges) { diff --git a/decoder/hg_test.h b/decoder/hg_test.h index 2e308c37..e96cb0b1 100644 --- a/decoder/hg_test.h +++ b/decoder/hg_test.h @@ -64,12 +64,21 @@ Name HGjsons[]= {  } +void AddNullEdge(Hypergraph* hg) { +  TRule x; +  x.arity_ = 0; +  hg->nodes_[0].in_edges_.push_back(hg->AddEdge(TRulePtr(new TRule(x)), Hypergraph::TailNodeVector())->id_); +  hg->edges_.back().head_node_ = 0; +} +  void HGSetup::CreateTinyLatticeHG(Hypergraph* hg) {      Json(hg,HGjsons[TinyLatticeHG]); +  AddNullEdge(hg);  }  void HGSetup::CreateLatticeHG(Hypergraph* hg) {    Json(hg,HGjsons[LatticeHG]); +  AddNullEdge(hg);  }  void HGSetup::CreateHG_tiny(Hypergraph* hg) { diff --git a/decoder/hg_union.cc b/decoder/hg_union.cc new file mode 100644 index 00000000..37082976 --- /dev/null +++ b/decoder/hg_union.cc @@ -0,0 +1,58 @@ +#include "hg_union.h" + +#include "hg.h" + +using namespace std; + +namespace HG { + +void Union(const Hypergraph& in, Hypergraph* out) { +  if (&in == out) return; +  if (out->nodes_.empty()) { +    out->nodes_ = in.nodes_; +    out->edges_ = in.edges_; return; +  } +  unsigned noff = out->nodes_.size(); +  unsigned eoff = out->edges_.size(); +  int ogoal = in.nodes_.size() - 1; +  int cgoal = noff - 1; +  // keep a single goal node, so add nodes.size - 1 +  out->nodes_.resize(out->nodes_.size() + ogoal); +  // add all edges +  out->edges_.resize(out->edges_.size() + in.edges_.size()); + +  for (int i = 0; i < ogoal; ++i) { +    const Hypergraph::Node& on = in.nodes_[i]; +    Hypergraph::Node& cn = out->nodes_[i + noff]; +    cn.id_ = i + noff; +    cn.in_edges_.resize(on.in_edges_.size()); +    for (unsigned j = 0; j < on.in_edges_.size(); ++j) +      cn.in_edges_[j] = on.in_edges_[j] + eoff; + +    cn.out_edges_.resize(on.out_edges_.size()); +    for (unsigned j = 0; j < on.out_edges_.size(); ++j) +      cn.out_edges_[j] = on.out_edges_[j] + eoff; +  } + +  for (unsigned i = 0; i < in.edges_.size(); ++i) { +    const Hypergraph::Edge& oe = in.edges_[i]; +    Hypergraph::Edge& ce = out->edges_[i + eoff]; +    ce.id_ = i + eoff; +    ce.rule_ = oe.rule_; +    ce.feature_values_ = oe.feature_values_; +    if (oe.head_node_ == ogoal) { +      ce.head_node_ = cgoal; +      out->nodes_[cgoal].in_edges_.push_back(ce.id_); +    } else { +      ce.head_node_ = oe.head_node_ + noff; +    } +    ce.tail_nodes_.resize(oe.tail_nodes_.size()); +    for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j) +      ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff; +  } + +  out->TopologicallySortNodesAndEdges(cgoal); +} + +} + diff --git a/decoder/hg_union.h b/decoder/hg_union.h new file mode 100644 index 00000000..34624246 --- /dev/null +++ b/decoder/hg_union.h @@ -0,0 +1,9 @@ +#ifndef _HG_UNION_H_ +#define _HG_UNION_H_ + +class Hypergraph; +namespace HG { +  void Union(const Hypergraph& in, Hypergraph* out); +}; + +#endif diff --git a/decoder/incremental.cc b/decoder/incremental.cc new file mode 100644 index 00000000..46615b0b --- /dev/null +++ b/decoder/incremental.cc @@ -0,0 +1,167 @@ +#include "incremental.h" + +#include "hg.h" +#include "fdict.h" +#include "tdict.h" + +#include "lm/enumerate_vocab.hh" +#include "lm/model.hh" +#include "search/config.hh" +#include "search/context.hh" +#include "search/edge.hh" +#include "search/edge_generator.hh" +#include "search/rule.hh" +#include "search/vertex.hh" +#include "search/vertex_generator.hh" +#include "util/exception.hh" + +#include <boost/scoped_ptr.hpp> +#include <boost/scoped_array.hpp> + +#include <iostream> +#include <vector> + +namespace { + +struct MapVocab : public lm::EnumerateVocab { +  public: +    MapVocab() {} + +    // Do not call after Lookup.   +    void Add(lm::WordIndex index, const StringPiece &str) { +      const WordID cdec_id = TD::Convert(str.as_string()); +      if (cdec_id >= out_.size()) out_.resize(cdec_id + 1); +      out_[cdec_id] = index; +    } + +    // Assumes Add has been called and will never be called again.   +    lm::WordIndex FromCDec(WordID id) const { +      return out_[out_.size() > id ? id : 0]; +    } + +  private: +    std::vector<lm::WordIndex> out_; +}; + +template <class Model> class Incremental : public IncrementalBase { +  public: +    Incremental(const char *model_file, const std::vector<weight_t> &weights) : +      IncrementalBase(weights),  +      m_(model_file, GetConfig()), +      weights_( +          weights[FD::Convert("KLanguageModel")], +          weights[FD::Convert("KLanguageModel_OOV")], +          weights[FD::Convert("WordPenalty")]) { +      std::cerr << "Weights KLanguageModel " << weights_.LM() << " KLanguageModel_OOV " << weights_.OOV() << " WordPenalty " << weights_.WordPenalty() << std::endl; +    } +    void Search(unsigned int pop_limit, const Hypergraph &hg) const; + +  private: +    void ConvertEdge(const search::Context<Model> &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const; + +    lm::ngram::Config GetConfig() { +      lm::ngram::Config ret; +      ret.enumerate_vocab = &vocab_; +      return ret; +    } + +    MapVocab vocab_; + +    const Model m_; + +    const search::Weights weights_; +}; + +void PrintFinal(const Hypergraph &hg, const search::Final final) { +  const std::vector<WordID> &words = static_cast<const Hypergraph::Edge*>(final.GetNote().vp)->rule_->e(); +  const search::Final *child(final.Children()); +  for (std::vector<WordID>::const_iterator i = words.begin(); i != words.end(); ++i) { +    if (*i > 0) { +      std::cout << TD::Convert(*i) << ' '; +    } else { +      PrintFinal(hg, *child++); +    } +  } +} + +template <class Model> void Incremental<Model>::Search(unsigned int pop_limit, const Hypergraph &hg) const { +  boost::scoped_array<search::Vertex> out_vertices(new search::Vertex[hg.nodes_.size()]); +  search::Config config(weights_, pop_limit); +  search::Context<Model> context(config, m_); + +  for (unsigned int i = 0; i < hg.nodes_.size() - 1; ++i) { +    search::EdgeGenerator gen; +    const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; +    for (unsigned int j = 0; j < down_edges.size(); ++j) { +      unsigned int edge_index = down_edges[j]; +      ConvertEdge(context, i == hg.nodes_.size() - 2, out_vertices.get(), hg.edges_[edge_index], gen); +    } +    search::VertexGenerator vertex_gen(context, out_vertices[i]); +    gen.Search(context, vertex_gen); +  } +  const search::Final top = out_vertices[hg.nodes_.size() - 2].BestChild(); +  if (!top.Valid()) { +    std::cout << "NO PATH FOUND" << std::endl; +  } else { +    PrintFinal(hg, top); +    std::cout << "||| " << top.GetScore() << std::endl; +  } +} + +template <class Model> void Incremental<Model>::ConvertEdge(const search::Context<Model> &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const { +  const std::vector<WordID> &e = in.rule_->e(); +  std::vector<lm::WordIndex> words; +  words.reserve(e.size()); +  std::vector<search::PartialVertex> nts; +  unsigned int terminals = 0; +  float score = 0.0; +  for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) { +    if (*word <= 0) { +      nts.push_back(vertices[in.tail_nodes_[-*word]].RootPartial()); +      if (nts.back().Empty()) return; +      score += nts.back().Bound(); +      words.push_back(lm::kMaxWordIndex); +    } else { +      ++terminals; +      words.push_back(vocab_.FromCDec(*word)); +    } +  } + +  if (final) { +    words.push_back(m_.GetVocabulary().EndSentence()); +  } + +  search::PartialEdge out(gen.AllocateEdge(nts.size())); + +  memcpy(out.NT(), &nts[0], sizeof(search::PartialVertex) * nts.size()); + +  search::Note note; +  note.vp = ∈ +  out.SetNote(note); + +  score += in.rule_->GetFeatureValues().dot(cdec_weights_); +  score -= static_cast<float>(terminals) * context.GetWeights().WordPenalty() / M_LN10; +  score += search::ScoreRule(context, words, final, out.Between()); +  out.SetScore(score); + +  gen.AddEdge(out); +} + +} // namespace + +IncrementalBase *IncrementalBase::Load(const char *model_file, const std::vector<weight_t> &weights) { +  lm::ngram::ModelType model_type; +  if (!lm::ngram::RecognizeBinary(model_file, model_type)) model_type = lm::ngram::PROBING; +  switch (model_type) { +    case lm::ngram::PROBING: +      return new Incremental<lm::ngram::ProbingModel>(model_file, weights); +    case lm::ngram::REST_PROBING: +      return new Incremental<lm::ngram::RestProbingModel>(model_file, weights); +    default: +      UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); +  } +} + +IncrementalBase::~IncrementalBase() {} + +IncrementalBase::IncrementalBase(const std::vector<weight_t> &weights) : cdec_weights_(weights) {} diff --git a/decoder/incremental.h b/decoder/incremental.h new file mode 100644 index 00000000..f791a626 --- /dev/null +++ b/decoder/incremental.h @@ -0,0 +1,23 @@ +#ifndef _INCREMENTAL_H_ +#define _INCREMENTAL_H_ + +#include "weights.h" +#include <vector> + +class Hypergraph; + +class IncrementalBase { +  public: +    static IncrementalBase *Load(const char *model_file, const std::vector<weight_t> &weights); + +    virtual ~IncrementalBase(); + +    virtual void Search(unsigned int pop_limit, const Hypergraph &hg) const = 0; + +  protected: +    IncrementalBase(const std::vector<weight_t> &weights); + +    const std::vector<weight_t> &cdec_weights_; +}; + +#endif // _INCREMENTAL_H_ diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index f73a1d3f..c0377fe8 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -42,7 +42,7 @@ WeightType Inside(const Hypergraph& hg,      Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;      const unsigned num_in_edges = in.size();      for (unsigned j = 0; j < num_in_edges; ++j) { -      const Hypergraph::Edge& edge = hg.edges_[in[j]]; +      const HG::Edge& edge = hg.edges_[in[j]];        WeightType score = weight(edge);        for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) {          const int tail_node_index = edge.tail_nodes_[k]; @@ -74,7 +74,7 @@ void Outside(const Hypergraph& hg,      Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;      const int num_in_edges = in.size();      for (int j = 0; j < num_in_edges; ++j) { -      const Hypergraph::Edge& edge = hg.edges_[in[j]]; +      const HG::Edge& edge = hg.edges_[in[j]];        WeightType head_and_edge_weight = weight(edge);        head_and_edge_weight *= head_node_outside_score;        const int num_tail_nodes = edge.tail_nodes_.size(); @@ -138,7 +138,7 @@ struct InsideOutsides {        Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;        const int num_in_edges = in.size();        for (int j = 0; j < num_in_edges; ++j) { -        const Hypergraph::Edge& edge = hg.edges_[in[j]]; +        const HG::Edge& edge = hg.edges_[in[j]];          KType kbar_e = outside[i];          const int num_tail_nodes = edge.tail_nodes_.size();          for (int k = 0; k < num_tail_nodes; ++k) @@ -156,7 +156,7 @@ struct InsideOutsides {        const int num_in_edges = in.size();        for (int j = 0; j < num_in_edges; ++j) {          int edgei=in[j]; -        const Hypergraph::Edge& edge = hg.edges_[edgei]; +        const HG::Edge& edge = hg.edges_[edgei];          V x=weight(edge)*outside[i];          const int num_tail_nodes = edge.tail_nodes_.size();          for (int k = 0; k < num_tail_nodes; ++k) diff --git a/decoder/kbest.h b/decoder/kbest.h index 9af3a20e..9a55f653 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -48,7 +48,7 @@ namespace KBest {      }      struct Derivation { -      Derivation(const Hypergraph::Edge& e, +      Derivation(const HG::Edge& e,                   const SmallVectorInt& jv,                   const WeightType& w,                   const SparseVector<double>& f) : @@ -58,11 +58,11 @@ namespace KBest {          feature_values(f) {}        // dummy constructor, just for query -      Derivation(const Hypergraph::Edge& e, +      Derivation(const HG::Edge& e,                   const SmallVectorInt& jv) : edge(&e), j(jv) {}        T yield; -      const Hypergraph::Edge* const edge; +      const HG::Edge* const edge;        const SmallVectorInt j;        const WeightType score;        const SparseVector<double> feature_values; @@ -82,8 +82,8 @@ namespace KBest {        Derivation const* d;        explicit EdgeHandle(Derivation const* d) : d(d) {  }  //      operator bool() const { return d->edge; } -      operator Hypergraph::Edge const* () const { return d->edge; } -//      Hypergraph::Edge const * operator ->() const { return d->edge; } +      operator HG::Edge const* () const { return d->edge; } +//      HG::Edge const * operator ->() const { return d->edge; }      };      EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { @@ -158,7 +158,7 @@ namespace KBest {      // the yield is computed in LazyKthBest before the derivation is added to D      // returns NULL if j refers to derivation numbers larger than the      // antecedent structure define -    Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVectorInt& j) { +    Derivation* CreateDerivation(const HG::Edge& e, const SmallVectorInt& j) {        WeightType score = w(e);        SparseVector<double> feats = e.feature_values_;        for (int i = 0; i < e.Arity(); ++i) { @@ -177,7 +177,7 @@ namespace KBest {        const Hypergraph::Node& node = g.nodes_[v];        for (unsigned i = 0; i < node.in_edges_.size(); ++i) { -        const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; +        const HG::Edge& edge = g.edges_[node.in_edges_[i]];          SmallVectorInt jv(edge.Arity(), 0);          Derivation* d = CreateDerivation(edge, jv);          assert(d); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index b603e27a..d2c4715c 100644 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -12,6 +12,7 @@  #include "scorer.h"  #include "hg.h"  #include "ff_factory.h" +#include "ffset.h"  #include "ff_bleu.h"  #include "sparse_vector.h"  #include "viterbi.h" @@ -26,7 +27,7 @@  struct Translation {    typedef std::vector<WordID> Sentence;    Sentence sentence; -  FeatureVector features; +  SparseVector<double> features;    Translation() {  }    Translation(Hypergraph const& hg,WeightVector *feature_weights=0)    { @@ -57,14 +58,14 @@ struct Oracle {    }    // feature 0 will be the error rate in fear and hope    // move toward hope -  FeatureVector ModelHopeGradient() const { -    FeatureVector r=hope.features-model.features; +  SparseVector<double> ModelHopeGradient() const { +    SparseVector<double> r=hope.features-model.features;      r.set_value(0,0);      return r;    }    // move toward hope from fear -  FeatureVector FearHopeGradient() const { -    FeatureVector r=hope.features-fear.features; +  SparseVector<double> FearHopeGradient() const { +    SparseVector<double> r=hope.features-fear.features;      r.set_value(0,0);      return r;    } diff --git a/decoder/program_options.h b/decoder/program_options.h index 87afb320..3cd7649a 100644 --- a/decoder/program_options.h +++ b/decoder/program_options.h @@ -94,7 +94,7 @@ struct any_printer  : public boost::function<void (Ostream &,boost::any const&)>    {}    template <class T> -  explicit any_printer(T const* tag) : F(typed_print<T>()) { +  explicit any_printer(T const*) : F(typed_print<T>()) {    }    template <class T> diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index a978cfc2..3b43b586 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -15,13 +15,73 @@  #include "tdict.h"  #include "viterbi.h"  #include "verbose.h" +#include <tr1/unordered_map>  #define foreach         BOOST_FOREACH  #define reverse_foreach BOOST_REVERSE_FOREACH  using namespace std; +using namespace std::tr1;  static bool printGrammarsUsed = false; +struct GlueGrammar : public TextGrammar { +  // read glue grammar from file +  explicit GlueGrammar(const std::string& file); +  GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0);  // "S", "X" +  virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +struct PassThroughGrammar : public TextGrammar { +  PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); +  virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} + +static void RefineRule(TRulePtr pt, const unsigned int ctf_level){ +  for (unsigned int i=0; i<ctf_level; ++i){ +    TRulePtr r(new TRule(*pt)); +    pt->fine_rules_.reset(new vector<TRulePtr>); +    pt->fine_rules_->push_back(r); +    pt = r; +  } +} + +GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { +  TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); +  AddRule(stop_glue); +  RefineRule(stop_glue, ctf_level); +  TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); +  AddRule(glue); +  RefineRule(glue, ctf_level); +} + +bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { +  return (i == 0); +} + +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { +  unordered_set<WordID> ss; +  for (int i = 0; i < input.size(); ++i) { +    const vector<LatticeArc>& alts = input[i]; +    for (int k = 0; k < alts.size(); ++k) { +      const int j = alts[k].dist2next + i; +      const string& src = TD::Convert(alts[k].label); +      if (ss.count(alts[k].label) == 0) { +        TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); +        pt->a_.push_back(AlignmentPoint(0,0)); +        AddRule(pt); +        RefineRule(pt, ctf_level); +        ss.insert(alts[k].label); +      } +    } +  } +} + +bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { +  return (distance < 2); +} +  struct SCFGTranslatorImpl {    SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :        max_span_limit(conf["scfg_max_span_limit"].as<int>()), diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h index 599a2d54..fde33100 100644 --- a/decoder/tromble_loss.h +++ b/decoder/tromble_loss.h @@ -28,7 +28,7 @@ class TrombleLossComputer : private boost::base_from_member<boost::scoped_ptr<Tr   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, +                                     const HG::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 1b9c6665..9e381ac6 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -139,8 +139,8 @@ inline bool close_enough(double a,double b,double epsilon)      return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b);  } -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { -  FeatureVector r; +SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { +  SparseVector<double> r;    const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r);    if (weights) {      double logp=log(p); diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 03e961a2..a8a0ea7f 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -14,10 +14,10 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest"  //TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction?  // Traversal must implement:  //  typedef T Result; -//  void operator()(Hypergraph::Edge const& e,const vector<const Result*>& ants, Result* result) const; +//  void operator()(HG::Edge const& e,const vector<const Result*>& ants, Result* result) const;  // WeightFunction must implement:  //  typedef prob_t Weight; -//  Weight operator()(Hypergraph::Edge const& e) const; +//  Weight operator()(HG::Edge const& e) const;  template<class Traversal,class WeightFunction>  typename WeightFunction::Weight Viterbi(const Hypergraph& hg,                     typename Traversal::Result* result, @@ -39,9 +39,9 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,        *cur_node_best_weight = WeightType(1);        continue;      } -    Hypergraph::Edge const* edge_best=0; +    HG::Edge const* edge_best=0;      for (unsigned j = 0; j < num_in_edges; ++j) { -      const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; +      const HG::Edge& edge = hg.edges_[cur_node.in_edges_[j]];        WeightType score = weight(edge);        for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)          score *= vit_weight[edge.tail_nodes_[k]]; @@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,        }      }      assert(edge_best); -    Hypergraph::Edge const& edgeb=*edge_best; +    HG::Edge const& edgeb=*edge_best;      std::vector<const T*> antsb(edgeb.tail_nodes_.size());      for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k)        antsb[k] = &vit_result[edgeb.tail_nodes_[k]]; @@ -98,7 +98,7 @@ prob_t Viterbi(const Hypergraph& hg,  struct PathLengthTraversal {    typedef int Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const int*>& ants,                    int* result) const {      (void) edge; @@ -109,7 +109,7 @@ struct PathLengthTraversal {  struct ESentenceTraversal {    typedef std::vector<WordID> Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const Result*>& ants,                    Result* result) const {      edge.rule_->ESubstitute(ants, result); @@ -118,7 +118,7 @@ struct ESentenceTraversal {  struct ELengthTraversal {    typedef int Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const int*>& ants,                    int* result) const {      *result = edge.rule_->ELength() - edge.rule_->Arity(); @@ -128,7 +128,7 @@ struct ELengthTraversal {  struct FSentenceTraversal {    typedef std::vector<WordID> Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const Result*>& ants,                    Result* result) const {      edge.rule_->FSubstitute(ants, result); @@ -142,7 +142,7 @@ struct ETreeTraversal {    const std::string space;    const std::string right;    typedef std::vector<WordID> Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const Result*>& ants,                    Result* result) const {      Result tmp; @@ -162,7 +162,7 @@ struct FTreeTraversal {    const std::string space;    const std::string right;    typedef std::vector<WordID> Result; -  void operator()(const Hypergraph::Edge& edge, +  void operator()(const HG::Edge& edge,                    const std::vector<const Result*>& ants,                    Result* result) const {      Result tmp; @@ -177,8 +177,8 @@ struct FTreeTraversal {  };  struct ViterbiPathTraversal { -  typedef std::vector<Hypergraph::Edge const*> Result; -  void operator()(const Hypergraph::Edge& edge, +  typedef std::vector<HG::Edge const*> Result; +  void operator()(const HG::Edge& edge,                    std::vector<Result const*> const& ants,                    Result* result) const {      for (unsigned i = 0; i < ants.size(); ++i) @@ -189,8 +189,8 @@ struct ViterbiPathTraversal {  };  struct FeatureVectorTraversal { -  typedef FeatureVector Result; -  void operator()(Hypergraph::Edge const& edge, +  typedef SparseVector<double> Result; +  void operator()(HG::Edge const& edge,                    std::vector<Result const*> const& ants,                    Result* result) const {      for (unsigned i = 0; i < ants.size(); ++i) @@ -210,6 +210,6 @@ int ViterbiELength(const Hypergraph& hg);  int ViterbiPathLength(const Hypergraph& hg);  /// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not).  return features (sum over all edges in viterbi derivation) -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); +SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);  #endif | 
