summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
commit6f29f345dc06c1a1033475eac1d1340781d1d603 (patch)
tree6fa4cdd7aefd7d54c9585c2c6274db61bb8b159a /decoder
parentb510da2e562c695c90d565eb295c749569c59be8 (diff)
parentc615c37501fa8576584a510a9d2bfe2fdd5bace7 (diff)
merge upstream/master
Diffstat (limited to 'decoder')
-rw-r--r--decoder/Jamfile78
-rw-r--r--decoder/Makefile.am7
-rw-r--r--decoder/apply_models.cc1
-rw-r--r--decoder/cdec.cc4
-rw-r--r--decoder/cdec_ff.cc3
-rw-r--r--decoder/cfg.h2
-rw-r--r--decoder/cfg_format.h2
-rw-r--r--decoder/cfg_test.cc4
-rw-r--r--decoder/decoder.cc46
-rw-r--r--decoder/decoder.h2
-rw-r--r--decoder/exp_semiring.h2
-rw-r--r--decoder/ff.cc197
-rw-r--r--decoder/ff.h245
-rw-r--r--decoder/ff_basic.cc80
-rw-r--r--decoder/ff_basic.h68
-rw-r--r--decoder/ff_bleu.h2
-rw-r--r--decoder/ff_charset.cc12
-rw-r--r--decoder/ff_charset.h6
-rw-r--r--decoder/ff_context.cc2
-rw-r--r--decoder/ff_context.h2
-rw-r--r--decoder/ff_csplit.cc3
-rw-r--r--decoder/ff_csplit.h4
-rw-r--r--decoder/ff_dwarf.cc1
-rw-r--r--decoder/ff_dwarf.h2
-rw-r--r--decoder/ff_external.cc60
-rw-r--r--decoder/ff_external.h26
-rw-r--r--decoder/ff_factory.h4
-rw-r--r--decoder/ff_klm.cc6
-rw-r--r--decoder/ff_klm.h3
-rw-r--r--decoder/ff_lm.cc4
-rw-r--r--decoder/ff_lm.h5
-rw-r--r--decoder/ff_ngrams.h2
-rw-r--r--decoder/ff_rules.cc2
-rw-r--r--decoder/ff_rules.h5
-rw-r--r--decoder/ff_ruleshape.cc2
-rw-r--r--decoder/ff_ruleshape.h2
-rw-r--r--decoder/ff_source_syntax.cc1
-rw-r--r--decoder/ff_source_syntax.h4
-rw-r--r--decoder/ff_spans.cc2
-rw-r--r--decoder/ff_spans.h4
-rw-r--r--decoder/ff_tagger.cc1
-rw-r--r--decoder/ff_tagger.h6
-rw-r--r--decoder/ff_wordalign.cc2
-rw-r--r--decoder/ff_wordalign.h30
-rw-r--r--decoder/ff_wordset.cc1
-rw-r--r--decoder/ff_wordset.h5
-rw-r--r--decoder/ffset.cc72
-rw-r--r--decoder/ffset.h57
-rw-r--r--decoder/grammar.cc45
-rw-r--r--decoder/grammar.h14
-rw-r--r--decoder/grammar_test.cc2
-rw-r--r--decoder/hg.h194
-rw-r--r--decoder/hg_intersect.cc8
-rw-r--r--decoder/hg_intersect.h6
-rw-r--r--decoder/hg_io.cc8
-rw-r--r--decoder/hg_sampler.cc55
-rw-r--r--decoder/hg_sampler.h7
-rw-r--r--decoder/hg_test.cc10
-rw-r--r--decoder/hg_test.h9
-rw-r--r--decoder/hg_union.cc58
-rw-r--r--decoder/hg_union.h9
-rw-r--r--decoder/incremental.cc167
-rw-r--r--decoder/incremental.h23
-rw-r--r--decoder/inside_outside.h8
-rw-r--r--decoder/kbest.h14
-rw-r--r--decoder/oracle_bleu.h11
-rw-r--r--decoder/program_options.h2
-rw-r--r--decoder/scfg_translator.cc60
-rw-r--r--decoder/tromble_loss.h2
-rw-r--r--decoder/viterbi.cc4
-rw-r--r--decoder/viterbi.h32
71 files changed, 1017 insertions, 812 deletions
diff --git a/decoder/Jamfile b/decoder/Jamfile
deleted file mode 100644
index 06c5bfda..00000000
--- a/decoder/Jamfile
+++ /dev/null
@@ -1,78 +0,0 @@
-import testing ;
-import lex ;
-import option ;
-
-if [ option.get "with-glc" ] {
- glc = ff_glc.cc string_util.cc feature-factory.cc ;
-}
-
-lib decoder :
- forest_writer.cc
- maxtrans_blunsom.cc
- cdec_ff.cc
- cfg.cc
- dwarf.cc
- ff_dwarf.cc
- rule_lexer.ll
- fst_translator.cc
- csplit.cc
- translator.cc
- scfg_translator.cc
- hg.cc
- hg_io.cc
- decoder.cc
- hg_intersect.cc
- hg_sampler.cc
- factored_lexicon_helper.cc
- viterbi.cc
- lattice.cc
- aligner.cc
- apply_models.cc
- earley_composer.cc
- phrasetable_fst.cc
- trule.cc
- ff.cc
- ff_rules.cc
- ff_wordset.cc
- ff_context.cc
- ff_charset.cc
- ff_lm.cc
- ff_klm.cc
- ff_ngrams.cc
- ff_spans.cc
- ff_ruleshape.cc
- ff_wordalign.cc
- ff_csplit.cc
- ff_tagger.cc
- ff_source_syntax.cc
- ff_bleu.cc
- ff_factory.cc
- lexalign.cc
- lextrans.cc
- tagger.cc
- bottom_up_parser.cc
- phrasebased_translator.cc
- JSON_parser.c
- json_parse.cc
- grammar.cc
- $(glc)
- ..//utils
- ..//mteval
- ../klm/lm//kenlm
- ..//boost_program_options
- : <include>.
- : :
- <library>..//utils
- <library>..//mteval
- <library>../klm/lm//kenlm
- <library>..//boost_program_options
- <include>.
- ;
-
-exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ;
-
-all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : <testing.arg>$(TOP)/decoder/test_data ;
-
-install legacy : cdec
- : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ;
-
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 0a792549..f8f427d3 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -17,7 +17,7 @@ trule_test_SOURCES = trule_test.cc
trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
cdec_SOURCES = cdec.cc
-cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
@@ -33,6 +33,7 @@ libcdec_a_SOURCES = \
cfg.cc \
dwarf.cc \
ff_dwarf.cc \
+ ff_external.cc \
rule_lexer.cc \
fst_translator.cc \
csplit.cc \
@@ -44,6 +45,7 @@ libcdec_a_SOURCES = \
hg_remove_eps.cc \
decoder.cc \
hg_intersect.cc \
+ hg_union.cc \
hg_sampler.cc \
factored_lexicon_helper.cc \
viterbi.cc \
@@ -54,6 +56,8 @@ libcdec_a_SOURCES = \
phrasetable_fst.cc \
trule.cc \
ff.cc \
+ ffset.cc \
+ ff_basic.cc \
ff_rules.cc \
ff_wordset.cc \
ff_context.cc \
@@ -69,6 +73,7 @@ libcdec_a_SOURCES = \
ff_source_syntax.cc \
ff_bleu.cc \
ff_factory.cc \
+ incremental.cc \
lexalign.cc \
lextrans.cc \
tagger.cc \
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 9ba59d1b..330de9e2 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -16,6 +16,7 @@
#include "verbose.h"
#include "hg.h"
#include "ff.h"
+#include "ffset.h"
#define NORMAL_CP 1
#define FAST_CP 2
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index c671af57..cc3fcff1 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -4,6 +4,8 @@
#include "decoder.h"
#include "ff_register.h"
#include "verbose.h"
+#include "timing_stats.h"
+#include "util/usage.hh"
using namespace std;
@@ -27,6 +29,7 @@ int main(int argc, char** argv) {
if (buf.empty()) continue;
decoder.Decode(buf);
}
+ Timer::Summarize();
#ifdef CP_TIME
cerr << "Time required for Cube Pruning execution: "
<< CpTime::Get()
@@ -38,6 +41,7 @@ int main(int argc, char** argv) {
cout << FD::Convert(i) << endl;
}
}
+ util::PrintUsage(std::cerr);
return 0;
}
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index d64bdada..3ab0f9f6 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -1,6 +1,7 @@
#include <boost/shared_ptr.hpp>
#include "ff.h"
+#include "ff_basic.h"
#include "ff_context.h"
#include "ff_spans.h"
#include "ff_lm.h"
@@ -18,6 +19,7 @@
#include "ff_charset.h"
#include "ff_wordset.h"
#include "ff_dwarf.h"
+#include "ff_external.h"
#ifdef HAVE_GLC
#include <cdec/ff_glc.h>
@@ -70,6 +72,7 @@ void register_feature_functions() {
ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
ff_registry.Register("WordSet", new FFFactory<WordSet>);
ff_registry.Register("Dwarf", new FFFactory<Dwarf>);
+ ff_registry.Register("External", new FFFactory<ExternalFeature>);
#ifdef HAVE_GLC
ff_registry.Register("ContextCRF", new FFFactory<Model1Features>);
#endif
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 8cb29bb9..aeeacb83 100644
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -130,7 +130,7 @@ struct CFG {
int lhs; // index into nts
RHS rhs;
prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top)
- FeatureVector f; // may be empty, unless copy_features on Init
+ SparseVector<double> f; // may be empty, unless copy_features on Init
IF_CFG_TRULE(TRulePtr rule;)
int size() const { // for stats only
return rhs.size();
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index 2f40d483..d12da261 100644
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
@@ -100,7 +100,7 @@ struct CFGFormat {
}
}
- void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const {
+ void print_features(std::ostream &o,prob_t p,SparseVector<double> const& fv=SparseVector<double>()) const {
bool logp=(logprob_feat && p!=prob_t::One());
if (features || logp) {
o << partsep;
diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index b8f4cf11..316c6d16 100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
@@ -25,9 +25,9 @@ struct CFGTest : public TestWithParam<HgW> {
Hypergraph hg;
CFG cfg;
CFGFormat form;
- FeatureVector weights;
+ SparseVector<double> weights;
- static void JsonFN(Hypergraph &hg,CFG &cfg,FeatureVector &featw,std::string file
+ static void JsonFN(Hypergraph &hg,CFG &cfg,SparseVector<double> &featw,std::string file
,std::string const& wts="Model_0 1 EgivenF 1 f1 1")
{
istringstream ws(wts);
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index a6f7b1ce..b5f4b9b6 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -4,6 +4,7 @@
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/make_shared.hpp>
+#include <boost/scoped_ptr.hpp>
#include "program_options.h"
#include "stringlib.h"
@@ -24,10 +25,12 @@
#include "hg.h"
#include "sentence_metadata.h"
#include "hg_intersect.h"
+#include "hg_union.h"
#include "oracle_bleu.h"
#include "apply_models.h"
#include "ff.h"
+#include "ffset.h"
#include "ff_factory.h"
#include "viterbi.h"
#include "kbest.h"
@@ -37,6 +40,7 @@
#include "sampler.h"
#include "forest_writer.h" // TODO this section should probably be handled by an Observer
+#include "incremental.h"
#include "hg_io.h"
#include "aligner.h"
@@ -89,11 +93,6 @@ inline void ShowBanner() {
cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n";
}
-inline void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) {
- cerr<<header<<": ";
- ms.show_features(cerr,cerr,conf.count("warn_0_weight"));
-}
-
inline string str(char const* name,po::variables_map const& conf) {
return conf[name].as<string>();
}
@@ -131,7 +130,7 @@ inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose
}
boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param);
if (!pf) exit(1);
- int nbyte=pf->NumBytesContext();
+ int nbyte=pf->StateSize();
if (verbose_feature_functions && !SILENT)
cerr<<"State is "<<nbyte<<" bytes for "<<pre<<"feature "<<ffp<<endl;
return pf;
@@ -327,6 +326,8 @@ struct DecoderImpl {
bool feature_expectations; // TODO Observer
bool output_training_vector; // TODO Observer
bool remove_intersected_rule_annotations;
+ boost::scoped_ptr<IncrementalBase> incremental;
+
static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) {
for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it)
@@ -414,6 +415,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")
("show_cfg_search_space", "Show the search space as a CFG")
("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to")
+ ("incremental_search", po::value<string>(), "Run lazy search with this language model file")
("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")
("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")
("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse")
@@ -641,8 +643,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
prev_weights = rp.weight_vector;
}
rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs));
- string ps = "Pass1 "; ps[4] += pass;
- if (!SILENT) show_models(conf,*rp.models,ps.c_str());
}
// show configuration of rescoring passes
@@ -730,6 +730,10 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
sent_id = -1;
acc_obj = 0; // accumulate objective
g_count = 0; // number of gradient pieces computed
+
+ if (conf.count("incremental_search")) {
+ incremental.reset(IncrementalBase::Load(conf["incremental_search"].as<string>().c_str(), CurrentWeightVector()));
+ }
}
Decoder::Decoder(istream* cfg) { pimpl_.reset(new DecoderImpl(conf,0,0,cfg)); }
@@ -831,6 +835,12 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (conf.count("show_target_graph"))
HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest);
+ if (conf.count("incremental_search")) {
+ incremental->Search(pop_limit, forest);
+ o->NotifyDecodingComplete(smeta);
+ return true;
+ }
+
for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
const RescoringPass& rp = rescoring_passes[pass];
const vector<weight_t>& cur_weights = *rp.weight_vector;
@@ -870,13 +880,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (rp.fid_summary) {
if (summary_feature_type == kEDGE_PROB) {
const prob_t z = forest.PushWeightsToGoal(1.0);
- if (!isfinite(log(z)) || isnan(log(z))) {
+ if (!std::isfinite(log(z)) || std::isnan(log(z))) {
cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
} else {
for (int i = 0; i < forest.edges_.size(); ++i) {
const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
// head node by forest.PushWeightsToGoal
- if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+ if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) {
cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
abort();
}
@@ -888,7 +898,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
} else if (summary_feature_type == kNODE_RISK) {
Hypergraph::EdgeProbs posts;
const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
- if (!isfinite(log(z)) || isnan(log(z))) {
+ if (!std::isfinite(log(z)) || std::isnan(log(z))) {
cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
} else {
for (int i = 0; i < forest.nodes_.size(); ++i) {
@@ -897,7 +907,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
for (int j = 0; j < in_edges.size(); ++j)
node_post += (posts[in_edges[j]] / z);
const double log_np = log(node_post);
- if (!isfinite(log_np) || isnan(log_np)) {
+ if (!std::isfinite(log_np) || std::isnan(log_np)) {
cerr << "got bad posterior prob for node " << i << endl;
abort();
}
@@ -912,13 +922,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
} else if (summary_feature_type == kEDGE_RISK) {
Hypergraph::EdgeProbs posts;
const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
- if (!isfinite(log(z)) || isnan(log(z))) {
+ if (!std::isfinite(log(z)) || std::isnan(log(z))) {
cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
} else {
assert(posts.size() == forest.edges_.size());
for (int i = 0; i < posts.size(); ++i) {
const double log_np = log(posts[i] / z);
- if (!isfinite(log_np) || isnan(log_np)) {
+ if (!std::isfinite(log_np) || std::isnan(log_np)) {
cerr << "got bad posterior prob for node " << i << endl;
abort();
}
@@ -958,7 +968,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
// Oracle Rescoring
if(get_oracle_forest) {
- assert(!"this is broken"); FeatureVector dummy; // = last_weights
+ assert(!"this is broken"); SparseVector<double> dummy; // = last_weights
Oracle oc=oracle.ComputeOracle(smeta,&forest,dummy,10,conf["forest_output"].as<std::string>());
if (!SILENT) cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl;
if (!SILENT) cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl;
@@ -980,7 +990,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg);
if (!succeeded) abort();
}
- new_hg.Union(forest);
+ HG::Union(forest, &new_hg);
bool succeeded = writer.Write(new_hg, false);
if (!succeeded) abort();
} else {
@@ -1067,7 +1077,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg);
if (!succeeded) abort();
}
- new_hg.Union(forest);
+ HG::Union(forest, &new_hg);
bool succeeded = writer.Write(new_hg, false);
if (!succeeded) abort();
} else {
@@ -1089,7 +1099,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl;
exit(1);
}
- assert(!isnan(log_ref_z));
+ assert(!std::isnan(log_ref_z));
ref_exp -= full_exp;
acc_vec += ref_exp;
acc_obj += (log_z - log_ref_z);
diff --git a/decoder/decoder.h b/decoder/decoder.h
index bef2ff5e..79c7a602 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -24,7 +24,7 @@ private:
#endif
class SentenceMetadata;
-struct Hypergraph;
+class Hypergraph;
struct DecoderImpl;
struct DecoderObserver {
diff --git a/decoder/exp_semiring.h b/decoder/exp_semiring.h
index 111eaaf1..2a9034bb 100644
--- a/decoder/exp_semiring.h
+++ b/decoder/exp_semiring.h
@@ -59,7 +59,7 @@ struct PRWeightFunction {
explicit PRWeightFunction(const PWeightFunction& pwf = PWeightFunction(),
const RWeightFunction& rwf = RWeightFunction()) :
pweight(pwf), rweight(rwf) {}
- PRPair<P,R> operator()(const Hypergraph::Edge& e) const {
+ PRPair<P,R> operator()(const HG::Edge& e) const {
const P p = pweight(e);
const R r = rweight(e);
return PRPair<P,R>(p, r * p);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 557e0b5f..a6a035b5 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,9 +1,3 @@
-//TODO: non-sparse vector for all feature functions? modelset applymodels keeps track of who has what features? it's nice having FF that could generate a handful out of 10000 possible feats, though.
-
-//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
-
-#include "fast_lexical_cast.hpp"
-#include <stdexcept>
#include "ff.h"
#include "tdict.h"
@@ -16,8 +10,7 @@ FeatureFunction::~FeatureFunction() {}
void FeatureFunction::PrepareForInput(const SentenceMetadata&) {}
void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */,
- SparseVector<double>* /* features */) const {
-}
+ SparseVector<double>* /* features */) const {}
string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) {
string r=name;
@@ -32,188 +25,14 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const&
return r;
}
-Features FeatureFunction::single_feature(WordID feat) {
- return Features(1,feat);
-}
-
-Features ModelSet::all_features(std::ostream *warn,bool warn0) {
- //return ::all_features(models_,weights_,warn,warn0);
-}
-
-void show_features(Features const& ffs,DenseWeightVector const& weights_,std::ostream &out,std::ostream &warn,bool warn_zero_wt) {
- out << "Weight Feature\n";
- for (unsigned i=0;i<ffs.size();++i) {
- WordID fid=ffs[i];
- string const& fname=FD::Convert(fid);
- double wt=weights_[fid];
- if (warn_zero_wt && wt==0)
- warn<<"WARNING: "<<fname<<" has 0 weight."<<endl;
- out << wt << " " << fname<<endl;
- }
-}
-
-void ModelSet::show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt)
-{
-// ::show_features(all_features(),weights_,out,warn,warn_zero_wt);
- //show_all_features(models_,weights_,out,warn,warn_zero_wt,warn_zero_wt);
-}
-
-// Hiero and Joshua use log_10(e) as the value, so I do to
-WordPenalty::WordPenalty(const string& param) :
- fid_(FD::Convert("WordPenalty")),
- value_(-1.0 / log(10)) {
- if (!param.empty()) {
- cerr << "Warning WordPenalty ignoring parameter: " << param << endl;
- }
-}
-
-void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_states,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* state) const {
- throw std::runtime_error("TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog.\n");
+void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata&,
+ const Hypergraph::Edge&,
+ const std::vector<const void*>&,
+ SparseVector<double>*,
+ SparseVector<double>*,
+ void*) const {
+ cerr << "TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog\n";
abort();
}
-void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_states,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* state) const {
- (void) smeta;
- (void) ant_states;
- (void) state;
- (void) estimated_features;
- features->set_value(fid_, edge.rule_->EWords() * value_);
-}
-
-SourceWordPenalty::SourceWordPenalty(const string& param) :
- fid_(FD::Convert("SourceWordPenalty")),
- value_(-1.0 / log(10)) {
- if (!param.empty()) {
- cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl;
- }
-}
-
-Features SourceWordPenalty::features() const {
- return single_feature(fid_);
-}
-
-Features WordPenalty::features() const {
- return single_feature(fid_);
-}
-
-
-void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_states,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* state) const {
- (void) smeta;
- (void) ant_states;
- (void) state;
- (void) estimated_features;
- features->set_value(fid_, edge.rule_->FWords() * value_);
-}
-
-ArityPenalty::ArityPenalty(const std::string& param) :
- value_(-1.0 / log(10)) {
- string fname = "Arity_";
- unsigned MAX=DEFAULT_MAX_ARITY;
- using namespace boost;
- if (!param.empty())
- MAX=lexical_cast<unsigned>(param);
- for (unsigned i = 0; i <= MAX; ++i) {
- WordID fid=FD::Convert(fname+lexical_cast<string>(i));
- fids_.push_back(fid);
- }
- while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything
-}
-
-Features ArityPenalty::features() const {
- return Features(fids_.begin(),fids_.end());
-}
-
-void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_states,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* state) const {
- (void) smeta;
- (void) ant_states;
- (void) state;
- (void) estimated_features;
- unsigned a=edge.Arity();
- features->set_value(a<fids_.size()?fids_[a]:0, value_);
-}
-
-ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
- models_(models),
- weights_(w),
- state_size_(0),
- model_state_pos_(models.size()) {
- for (int i = 0; i < models_.size(); ++i) {
- model_state_pos_[i] = state_size_;
- state_size_ += models_[i]->NumBytesContext();
- }
-}
-
-void ModelSet::PrepareForInput(const SentenceMetadata& smeta) {
- for (int i = 0; i < models_.size(); ++i)
- const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta);
-}
-
-void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
- const Hypergraph& /* hg */,
- const FFStates& node_states,
- Hypergraph::Edge* edge,
- FFState* context,
- prob_t* combination_cost_estimate) const {
- edge->reset_info();
- context->resize(state_size_);
- if (state_size_ > 0) {
- memset(&(*context)[0], 0, state_size_);
- }
- SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL
- if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
- for (int i = 0; i < models_.size(); ++i) {
- const FeatureFunction& ff = *models_[i];
- void* cur_ff_context = NULL;
- vector<const void*> ants(edge->tail_nodes_.size());
- bool has_context = ff.NumBytesContext() > 0;
- if (has_context) {
- int spos = model_state_pos_[i];
- cur_ff_context = &(*context)[spos];
- for (int i = 0; i < ants.size(); ++i) {
- ants[i] = &node_states[edge->tail_nodes_[i]][spos];
- }
- }
- ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context);
- }
- if (combination_cost_estimate)
- combination_cost_estimate->logeq(est_vals.dot(weights_));
- edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
-}
-
-void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
- assert(1 == edge->rule_->Arity());
- edge->reset_info();
- for (int i = 0; i < models_.size(); ++i) {
- const FeatureFunction& ff = *models_[i];
- const void* ant_state = NULL;
- bool has_context = ff.NumBytesContext() > 0;
- if (has_context) {
- int spos = model_state_pos_[i];
- ant_state = &state[spos];
- }
- ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_);
- }
- edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
-}
-
diff --git a/decoder/ff.h b/decoder/ff.h
index 6c22d39f..3280592e 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -1,79 +1,47 @@
#ifndef _FF_H_
#define _FF_H_
-#define DEBUG_INIT 0
-#if DEBUG_INIT
-# include <iostream>
-# define DBGINIT(a) do { std::cerr<<a<<"\n"; } while(0)
-#else
-# define DBGINIT(a)
-#endif
-
-#include <stdint.h>
+#include <string>
#include <vector>
-#include <cstring>
-#include "fdict.h"
-#include "hg.h"
-#include "feature_vector.h"
-#include "value_array.h"
+#include "sparse_vector.h"
+namespace HG { struct Edge; struct Node; }
+class Hypergraph;
class SentenceMetadata;
-class FeatureFunction; // see definition below
-
-typedef std::vector<WordID> Features; // set of features ids
// if you want to develop a new feature, inherit from this class and
// override TraversalFeaturesImpl(...). If it's a feature that returns /
// depends on context, you may also need to implement
// FinalTraversalFeatures(...)
class FeatureFunction {
+ friend class ExternalFeature;
public:
std::string name_; // set by FF factory using usage()
- bool debug_; // also set by FF factory checking param for immediate initial "debug"
- //called after constructor, but before name_ and debug_ have been set
- virtual void Init() { DBGINIT("default FF::Init name="<<name_); }
- virtual void init_name_debug(std::string const& n,bool debug) {
- name_=n;
- debug_=debug;
- }
- bool debug() const { return debug_; }
FeatureFunction() : state_size_() {}
explicit FeatureFunction(int state_size) : state_size_(state_size) {}
virtual ~FeatureFunction();
bool IsStateful() const { return state_size_ > 0; }
+ int StateSize() const { return state_size_; }
// override this. not virtual because we want to expose this to factory template for help before creating a FF
static std::string usage(bool show_params,bool show_details) {
return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details);
}
static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details);
- static Features single_feature(int feat);
-public:
-
- // stateless feature that doesn't depend on source span: override and return true. then your feature can be precomputed over rules.
- virtual bool rule_feature() const { return false; }
// called once, per input, before any feature calls to TraversalFeatures, etc.
// used to initialize sentence-specific data structures
virtual void PrepareForInput(const SentenceMetadata& smeta);
- //OVERRIDE THIS:
- virtual Features features() const { return single_feature(FD::Convert(name_)); }
- // returns the number of bytes of context that this feature function will
- // (maximally) use. By default, 0 ("stateless" models in Hiero/Joshua).
- // NOTE: this value is fixed for the instance of your class, you cannot
- // use different amounts of memory for different nodes in the forest. this will be read as soon as you create a ModelSet, then fixed forever on
- inline int NumBytesContext() const { return state_size_; }
-
// Compute the feature values and (if this applies) the estimates of the
// feature values when this edge is used incorporated into a larger context
inline void TraversalFeatures(const SentenceMetadata& smeta,
- Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
void* out_state) const {
- TraversalFeaturesLog(smeta, edge, ant_contexts,
+ TraversalFeaturesImpl(smeta, edge, ant_contexts,
features, estimated_features, out_state);
// TODO it's easy for careless feature function developers to overwrite
// the end of their state and clobber someone else's memory. These bugs
@@ -83,21 +51,10 @@ public:
}
// if there's some state left when you transition to the goal state, score
- // it here. For example, the language model computes the cost of adding
+ // it here. For example, a language model might the cost of adding
// <s> and </s>.
-
-protected:
virtual void FinalTraversalFeatures(const void* residual_state,
- FeatureVector* final_features) const;
-public:
- //override either this or one of above.
- virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */,
- Hypergraph::Edge& /* edge */, // so you can log()
- const void* residual_state,
- FeatureVector* final_features) const {
- FinalTraversalFeatures(residual_state,final_features);
- }
-
+ SparseVector<double>* final_features) const;
protected:
// context is a pointer to a buffer of size NumBytesContext() that the
@@ -107,191 +64,19 @@ public:
// of the particular FeatureFunction class. There is one exception:
// equality of the contents (i.e., memcmp) is required to determine whether
// two states can be combined.
-
- // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc."). most features don't use this so implement the below. it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1.
- virtual void TraversalFeaturesLog(const SentenceMetadata& smeta,
- Hypergraph::Edge& edge, // this is writable only so you can use log()
- const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
- void* context) const {
- TraversalFeaturesImpl(smeta,edge,ant_contexts,features,estimated_features,context);
- }
-
- // override above or below.
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- Hypergraph::Edge const& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
void* context) const;
// !!! ONLY call this from subclass *CONSTRUCTORS* !!!
void SetStateSize(size_t state_size) {
state_size_ = state_size;
}
- int StateSize() const { return state_size_; }
- private:
- int state_size_;
-};
-
-
-// word penalty feature, for each word on the E side of a rule,
-// add value_
-class WordPenalty : public FeatureFunction {
- public:
- Features features() const;
- WordPenalty(const std::string& param);
- static std::string usage(bool p,bool d) {
- return usage_helper("WordPenalty","","number of target words (local feature)",p,d);
- }
- bool rule_feature() const { return true; }
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
- void* context) const;
- private:
- const int fid_;
- const double value_;
-};
-
-class SourceWordPenalty : public FeatureFunction {
- public:
- bool rule_feature() const { return true; }
- Features features() const;
- SourceWordPenalty(const std::string& param);
- static std::string usage(bool p,bool d) {
- return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d);
- }
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
- void* context) const;
- private:
- const int fid_;
- const double value_;
-};
-
-#define DEFAULT_MAX_ARITY 9
-#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
-#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
-#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
-
-class ArityPenalty : public FeatureFunction {
- public:
- bool rule_feature() const { return true; }
- Features features() const;
- ArityPenalty(const std::string& param);
- static std::string usage(bool p,bool d) {
- return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d);
- }
-
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
- void* context) const;
- private:
- std::vector<WordID> fids_;
- const double value_;
-};
-
-void show_features(Features const& features,DenseWeightVector const& weights,std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights
-
-template <class FFp>
-Features all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream *warn=0,bool warn_fid_0=false) {
- using namespace std;
- Features ffs;
-#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x << endl; } } while(0)
- typedef map<WordID,string> FFM;
- FFM ff_from;
- for (unsigned i=0;i<models_.size();++i) {
- string const& ffname=models_[i]->name_;
- Features si=models_[i]->features();
- if (si.empty()) {
- WARNFF(ffname<<" doesn't yet report any feature IDs - either supply feature weight, or use --no_freeze_feature_set, or implement features() method");
- }
- unsigned n0=0;
- for (unsigned j=0;j<si.size();++j) {
- WordID fid=si[j];
- if (!fid) ++n0;
- if (fid >= weights_.size())
- weights_.resize(fid+1);
- if (warn_fid_0 || fid) {
- pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
- if (i_new.second) {
- if (fid)
- ffs.push_back(fid);
- else
- WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided. Don't freeze feature ids to see the name");
- } else {
- WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
- }
- }
- }
- if (n0)
- WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)");
- }
- return ffs;
-#undef WARNFF
-}
-
-template <class FFp>
-void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream &out,std::ostream &warn,bool warn_fid_0=true,bool warn_zero_wt=true) {
- return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt);
-}
-
-typedef ValueArray<uint8_t> FFState; // this is about 10% faster than string.
-//typedef std::string FFState;
-
-//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
-typedef std::vector<FFState> FFStates;
-
-// this class is a set of FeatureFunctions that can be used to score, rescore,
-// etc. a (translation?) forest
-class ModelSet {
- public:
- ModelSet(const std::vector<double>& weights,
- const std::vector<const FeatureFunction*>& models);
-
- // sets edge->feature_values_ and edge->edge_prob_
- // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
- // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
- void AddFeaturesToEdge(const SentenceMetadata& smeta,
- const Hypergraph& hg,
- const FFStates& node_states,
- Hypergraph::Edge* edge,
- FFState* residual_context,
- prob_t* combination_cost_estimate = NULL) const;
-
- //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
- void AddFinalFeatures(const FFState& residual_context,
- Hypergraph::Edge* edge,
- SentenceMetadata const& smeta) const;
-
- // this is called once before any feature functions apply to a hypergraph
- // it can be used to initialize sentence-specific data structures
- void PrepareForInput(const SentenceMetadata& smeta);
-
- bool empty() const { return models_.empty(); }
-
- bool stateless() const { return !state_size_; }
- Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another). also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id. since 0 is a NULL feature id, it's never included. if warn_fid_zero, then even the first 0 id is
- void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true);
-
private:
- std::vector<const FeatureFunction*> models_;
- const std::vector<double>& weights_;
int state_size_;
- std::vector<int> model_state_pos_;
};
#endif
diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc
new file mode 100644
index 00000000..f9404d24
--- /dev/null
+++ b/decoder/ff_basic.cc
@@ -0,0 +1,80 @@
+#include "ff_basic.h"
+
+#include "fast_lexical_cast.hpp"
+#include "hg.h"
+
+using namespace std;
+
+// Hiero and Joshua use log_10(e) as the value, so I do to
+WordPenalty::WordPenalty(const string& param) :
+ fid_(FD::Convert("WordPenalty")),
+ value_(-1.0 / log(10)) {
+ if (!param.empty()) {
+ cerr << "Warning WordPenalty ignoring parameter: " << param << endl;
+ }
+}
+
+void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_states,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* state) const {
+ (void) smeta;
+ (void) ant_states;
+ (void) state;
+ (void) estimated_features;
+ features->set_value(fid_, edge.rule_->EWords() * value_);
+}
+
+
+SourceWordPenalty::SourceWordPenalty(const string& param) :
+ fid_(FD::Convert("SourceWordPenalty")),
+ value_(-1.0 / log(10)) {
+ if (!param.empty()) {
+ cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl;
+ }
+}
+
+void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_states,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* state) const {
+ (void) smeta;
+ (void) ant_states;
+ (void) state;
+ (void) estimated_features;
+ features->set_value(fid_, edge.rule_->FWords() * value_);
+}
+
+
+ArityPenalty::ArityPenalty(const std::string& param) :
+ value_(-1.0 / log(10)) {
+ string fname = "Arity_";
+ unsigned MAX=DEFAULT_MAX_ARITY;
+ using namespace boost;
+ if (!param.empty())
+ MAX=lexical_cast<unsigned>(param);
+ for (unsigned i = 0; i <= MAX; ++i) {
+ WordID fid=FD::Convert(fname+lexical_cast<string>(i));
+ fids_.push_back(fid);
+ }
+ while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything
+}
+
+void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_states,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* state) const {
+ (void) smeta;
+ (void) ant_states;
+ (void) state;
+ (void) estimated_features;
+ unsigned a=edge.Arity();
+ features->set_value(a<fids_.size()?fids_[a]:0, value_);
+}
+
diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h
new file mode 100644
index 00000000..901c0110
--- /dev/null
+++ b/decoder/ff_basic.h
@@ -0,0 +1,68 @@
+#ifndef _FF_BASIC_H_
+#define _FF_BASIC_H_
+
+#include "ff.h"
+
+// word penalty feature, for each word on the E side of a rule,
+// add value_
+class WordPenalty : public FeatureFunction {
+ public:
+ WordPenalty(const std::string& param);
+ static std::string usage(bool p,bool d) {
+ return usage_helper("WordPenalty","","number of target words (local feature)",p,d);
+ }
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ const int fid_;
+ const double value_;
+};
+
+class SourceWordPenalty : public FeatureFunction {
+ public:
+ SourceWordPenalty(const std::string& param);
+ static std::string usage(bool p,bool d) {
+ return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d);
+ }
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ const int fid_;
+ const double value_;
+};
+
+#define DEFAULT_MAX_ARITY 9
+#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
+#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
+#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
+
+class ArityPenalty : public FeatureFunction {
+ public:
+ ArityPenalty(const std::string& param);
+ static std::string usage(bool p,bool d) {
+ return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d);
+ }
+
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ std::vector<WordID> fids_;
+ const double value_;
+};
+
+#endif
diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h
index 5544920e..344dc788 100644
--- a/decoder/ff_bleu.h
+++ b/decoder/ff_bleu.h
@@ -20,7 +20,7 @@ class BLEUModel : public FeatureFunction {
static std::string usage(bool param,bool verbose);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_charset.cc b/decoder/ff_charset.cc
index 33afc1a7..6429088b 100644
--- a/decoder/ff_charset.cc
+++ b/decoder/ff_charset.cc
@@ -1,5 +1,7 @@
#include "ff_charset.h"
+#include "tdict.h"
+#include "hg.h"
#include "fdict.h"
#include "stringlib.h"
@@ -7,9 +9,9 @@ using namespace std;
NonLatinCount::NonLatinCount(const string& param) : FeatureFunction(), fid_(FD::Convert("NonLatinCount")) {}
-bool ContainsNonLatin(const char* word) {
- int cur = 0;
- while(word[cur]) {
+bool ContainsNonLatin(const string& word) {
+ unsigned cur = 0;
+ while(cur < word.size()) {
const int size = UTF8Len(word[cur]);
if (size > 1) return true;
cur += size;
@@ -20,8 +22,8 @@ bool ContainsNonLatin(const char* word) {
void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
void* context) const {
const vector<WordID>& e = edge.rule_->e();
int count = 0;
diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h
index b1ad537e..267ef65d 100644
--- a/decoder/ff_charset.h
+++ b/decoder/ff_charset.h
@@ -13,10 +13,10 @@ class NonLatinCount : public FeatureFunction {
NonLatinCount(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
- FeatureVector* features,
- FeatureVector* estimated_features,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
void* context) const;
private:
mutable std::map<WordID, bool> is_non_latin_;
diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc
index 9de4d737..f2b0e67c 100644
--- a/decoder/ff_context.cc
+++ b/decoder/ff_context.cc
@@ -5,12 +5,14 @@
#include <cassert>
#include <cmath>
+#include "hg.h"
#include "filelib.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "lattice.h"
#include "fdict.h"
#include "verbose.h"
+#include "tdict.h"
RuleContextFeatures::RuleContextFeatures(const string& param) {
// cerr << "initializing RuleContextFeatures with parameters: " << param;
diff --git a/decoder/ff_context.h b/decoder/ff_context.h
index 89bcb557..19198ec3 100644
--- a/decoder/ff_context.h
+++ b/decoder/ff_context.h
@@ -14,7 +14,7 @@ class RuleContextFeatures : public FeatureFunction {
RuleContextFeatures(const string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index c9ed996c..e6f78f84 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -5,6 +5,7 @@
#include "klm/lm/model.hh"
+#include "hg.h"
#include "sentence_metadata.h"
#include "lattice.h"
#include "tdict.h"
@@ -88,7 +89,7 @@ void BasicCSplitFeaturesImpl::TraversalFeaturesImpl(
features->set_value(letters_sq_, (edge.j_ - edge.i_) * (edge.j_ - edge.i_));
features->set_value(letters_sqrt_, sqrt(edge.j_ - edge.i_));
const WordID word = edge.rule_->e_[1];
- const char* sword = TD::Convert(word);
+ const char* sword = TD::Convert(word).c_str();
const int len = strlen(sword);
int cur = 0;
int chars = 0;
diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h
index 38c0c5b8..64d42526 100644
--- a/decoder/ff_csplit.h
+++ b/decoder/ff_csplit.h
@@ -12,7 +12,7 @@ class BasicCSplitFeatures : public FeatureFunction {
BasicCSplitFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -27,7 +27,7 @@ class ReverseCharLMCSplitFeature : public FeatureFunction {
ReverseCharLMCSplitFeature(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc
index 43528405..fe7a472e 100644
--- a/decoder/ff_dwarf.cc
+++ b/decoder/ff_dwarf.cc
@@ -4,6 +4,7 @@
#include <string>
#include <iostream>
#include <map>
+#include "hg.h"
#include "ff_dwarf.h"
#include "dwarf.h"
#include "wordid.h"
diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h
index 083fcc7c..3d6a7da6 100644
--- a/decoder/ff_dwarf.h
+++ b/decoder/ff_dwarf.h
@@ -56,7 +56,7 @@ class Dwarf : public FeatureFunction {
function word alignments set by 3.
*/
void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc
new file mode 100644
index 00000000..dea0e20f
--- /dev/null
+++ b/decoder/ff_external.cc
@@ -0,0 +1,60 @@
+#include "ff_external.h"
+
+#include <dlfcn.h>
+
+#include "stringlib.h"
+#include "hg.h"
+
+using namespace std;
+
+ExternalFeature::ExternalFeature(const string& param) {
+ size_t pos = param.find(' ');
+ string nparam;
+ string file = param;
+ if (pos < param.size()) {
+ nparam = Trim(param.substr(pos + 1));
+ file = param.substr(0, pos);
+ }
+ if (file.size() < 1) {
+ cerr << "External requires a path to a dynamic library!\n";
+ abort();
+ }
+ lib_handle = dlopen(file.c_str(), RTLD_LAZY);
+ if (!lib_handle) {
+ cerr << "dlopen reports: " << dlerror() << endl;
+ cerr << "Did you provide a full path to the dynamic library?\n";
+ abort();
+ }
+ FeatureFunction* (*fn)(const string&) =
+ (FeatureFunction* (*)(const string&))(dlsym(lib_handle, "create_ff"));
+ if (!fn) {
+ cerr << "dlsym reports: " << dlerror() << endl;
+ abort();
+ }
+ ff_ext = (*fn)(nparam);
+ SetStateSize(ff_ext->StateSize());
+}
+
+ExternalFeature::~ExternalFeature() {
+ delete ff_ext;
+ dlclose(lib_handle);
+}
+
+void ExternalFeature::PrepareForInput(const SentenceMetadata& smeta) {
+ ff_ext->PrepareForInput(smeta);
+}
+
+void ExternalFeature::FinalTraversalFeatures(const void* context,
+ SparseVector<double>* features) const {
+ ff_ext->FinalTraversalFeatures(context, features);
+}
+
+void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+ ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context);
+}
+
diff --git a/decoder/ff_external.h b/decoder/ff_external.h
new file mode 100644
index 00000000..3e2bee51
--- /dev/null
+++ b/decoder/ff_external.h
@@ -0,0 +1,26 @@
+#ifndef _FFEXTERNAL_H_
+#define _FFEXTERNAL_H_
+
+#include "ff.h"
+
+// dynamically loaded feature function
+class ExternalFeature : public FeatureFunction {
+ public:
+ ExternalFeature(const std::string& param);
+ ~ExternalFeature();
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+ virtual void FinalTraversalFeatures(const void* context,
+ SparseVector<double>* features) const;
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ void* lib_handle;
+ FeatureFunction* ff_ext;
+};
+
+#endif
diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h
index 5eb68c8b..bfdd3257 100644
--- a/decoder/ff_factory.h
+++ b/decoder/ff_factory.h
@@ -43,7 +43,6 @@ template<class FF>
struct FFFactory : public FactoryBase<FeatureFunction> {
FP Create(std::string param) const {
FF *ret=new FF(param);
- ret->Init();
return FP(ret);
}
virtual std::string usage(bool params,bool verbose) const {
@@ -57,7 +56,6 @@ template<class FF>
struct FsaFactory : public FactoryBase<FsaFeatureFunction> {
FP Create(std::string param) const {
FF *ret=new FF(param);
- ret->Init();
return FP(ret);
}
virtual std::string usage(bool params,bool verbose) const {
@@ -98,8 +96,6 @@ struct FactoryRegistry : public UntypedFactoryRegistry {
if (debug)
cerr<<"debug enabled for "<<ffname<< " - remaining options: '"<<param<<"'\n";
FP res = dynamic_cast<FB const&>(*it->second).Create(param);
- res->init_name_debug(ffname,debug);
- // could add a res->Init() here instead of in Create if we wanted feature id to potentially differ based on the registered name rather than static usage() - of course, specific feature ids can be computed on the basis of feature param as well; this only affects the default single feature id=name
return res;
}
};
diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc
index 09ef282c..fefa90bd 100644
--- a/decoder/ff_klm.cc
+++ b/decoder/ff_klm.cc
@@ -327,11 +327,6 @@ KLanguageModel<Model>::KLanguageModel(const string& param) {
}
template <class Model>
-Features KLanguageModel<Model>::features() const {
- return single_feature(fid_);
-}
-
-template <class Model>
KLanguageModel<Model>::~KLanguageModel() {
delete pimpl_;
}
@@ -362,7 +357,6 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state,
template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string &param) {
KLanguageModel<Model> *ret = new KLanguageModel<Model>(param);
- ret->Init();
return boost::shared_ptr<FeatureFunction>(ret);
}
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index 6efe50f6..b5ceffd0 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -20,10 +20,9 @@ class KLanguageModel : public FeatureFunction {
virtual void FinalTraversalFeatures(const void* context,
SparseVector<double>* features) const;
static std::string usage(bool param,bool verbose);
- Features features() const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 5e16d4e3..6ec7b4f3 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -519,10 +519,6 @@ LanguageModel::LanguageModel(const string& param) {
SetStateSize(LanguageModelImpl::OrderToStateSize(order));
}
-Features LanguageModel::features() const {
- return single_feature(fid_);
-}
-
LanguageModel::~LanguageModel() {
delete pimpl_;
}
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index ccee4268..94e18f00 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -55,10 +55,9 @@ class LanguageModel : public FeatureFunction {
SparseVector<double>* features) const;
std::string DebugStateToString(const void* state) const;
static std::string usage(bool param,bool verbose);
- Features features() const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -81,7 +80,7 @@ class LanguageModelRandLM : public FeatureFunction {
std::string DebugStateToString(const void* state) const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h
index 064dbb49..4965d235 100644
--- a/decoder/ff_ngrams.h
+++ b/decoder/ff_ngrams.h
@@ -17,7 +17,7 @@ class NgramDetector : public FeatureFunction {
SparseVector<double>* features) const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index 3d0e514a..6716d3da 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -10,6 +10,8 @@
#include "lattice.h"
#include "fdict.h"
#include "verbose.h"
+#include "tdict.h"
+#include "hg.h"
using namespace std;
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 08b168b0..dc9a15d5 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -3,6 +3,7 @@
#include <vector>
#include <map>
+#include "trule.h"
#include "ff.h"
#include "array2d.h"
#include "wordid.h"
@@ -12,7 +13,7 @@ class RuleIdentityFeatures : public FeatureFunction {
RuleIdentityFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -42,7 +43,7 @@ class RuleTargetBigramFeatures : public FeatureFunction {
RuleTargetBigramFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_ruleshape.cc b/decoder/ff_ruleshape.cc
index f56ccfa9..7bb548c4 100644
--- a/decoder/ff_ruleshape.cc
+++ b/decoder/ff_ruleshape.cc
@@ -1,5 +1,7 @@
#include "ff_ruleshape.h"
+#include "trule.h"
+#include "hg.h"
#include "fdict.h"
#include <sstream>
diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h
index 23c9827e..9f20faf3 100644
--- a/decoder/ff_ruleshape.h
+++ b/decoder/ff_ruleshape.h
@@ -9,7 +9,7 @@ class RuleShapeFeatures : public FeatureFunction {
RuleShapeFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 035132b4..a1997695 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -3,6 +3,7 @@
#include <sstream>
#include <stack>
+#include "hg.h"
#include "sentence_metadata.h"
#include "array2d.h"
#include "filelib.h"
diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h
index 279563e1..a8c7150a 100644
--- a/decoder/ff_source_syntax.h
+++ b/decoder/ff_source_syntax.h
@@ -11,7 +11,7 @@ class SourceSyntaxFeatures : public FeatureFunction {
~SourceSyntaxFeatures();
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -28,7 +28,7 @@ class SourceSpanSizeFeatures : public FeatureFunction {
~SourceSpanSizeFeatures();
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc
index 0483517b..0ccac69b 100644
--- a/decoder/ff_spans.cc
+++ b/decoder/ff_spans.cc
@@ -4,6 +4,8 @@
#include <cassert>
#include <cmath>
+#include "hg.h"
+#include "tdict.h"
#include "filelib.h"
#include "stringlib.h"
#include "sentence_metadata.h"
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index 24e0dede..d2f5e84c 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -12,7 +12,7 @@ class SpanFeatures : public FeatureFunction {
SpanFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -49,7 +49,7 @@ class CMR2008ReorderingFeatures : public FeatureFunction {
CMR2008ReorderingFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc
index fd9210fa..7f9af9cd 100644
--- a/decoder/ff_tagger.cc
+++ b/decoder/ff_tagger.cc
@@ -2,6 +2,7 @@
#include <sstream>
+#include "hg.h"
#include "tdict.h"
#include "sentence_metadata.h"
#include "stringlib.h"
diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h
index bd5b62c0..46418b0c 100644
--- a/decoder/ff_tagger.h
+++ b/decoder/ff_tagger.h
@@ -18,7 +18,7 @@ class Tagger_BigramIndicator : public FeatureFunction {
Tagger_BigramIndicator(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -39,7 +39,7 @@ class LexicalPairIndicator : public FeatureFunction {
virtual void PrepareForInput(const SentenceMetadata& smeta);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -59,7 +59,7 @@ class OutputIndicator : public FeatureFunction {
OutputIndicator(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index decdf9bc..1491819d 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -549,7 +549,7 @@ void IdentityCycleDetector::TraversalFeaturesImpl(const SentenceMetadata& smeta,
static map<WordID, bool> big_enough;
map<WordID,bool>::iterator it = big_enough_.find(word);
if (it == big_enough_.end()) {
- out_is_identity = big_enough_[word] = strlen(TD::Convert(word)) >= length_min_;
+ out_is_identity = big_enough_[word] = TD::Convert(word).size() >= length_min_;
} else {
out_is_identity = it->second;
}
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index d7a2dda8..ba3d0b9b 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -13,7 +13,7 @@ class RelativeSentencePosition : public FeatureFunction {
RelativeSentencePosition(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -36,7 +36,7 @@ class SourceBigram : public FeatureFunction {
void PrepareForInput(const SentenceMetadata& smeta);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -55,7 +55,7 @@ class LexNullJump : public FeatureFunction {
LexNullJump(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -72,7 +72,7 @@ class NewJump : public FeatureFunction {
NewJump(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -109,7 +109,7 @@ class LexicalTranslationTrigger : public FeatureFunction {
LexicalTranslationTrigger(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -132,14 +132,14 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
BlunsomSynchronousParseHack(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* out_context) const;
private:
inline bool DoesNotBelong(const void* state) const {
- for (int i = 0; i < NumBytesContext(); ++i) {
+ for (int i = 0; i < StateSize(); ++i) {
if (*(static_cast<const unsigned char*>(state) + i)) return false;
}
return true;
@@ -148,9 +148,9 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const {
int i = 0;
int ind = 0;
- while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
- // std::cerr << i << " " << NumBytesContext() << std::endl;
- assert(i != NumBytesContext());
+ while (i < StateSize() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
+ // std::cerr << i << " " << StateSize() << std::endl;
+ assert(i != StateSize());
assert(ind < cur_ref_->size());
int cur = *(static_cast<const unsigned char*>(state) + i);
int comp = 1;
@@ -171,7 +171,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction {
}
inline void SetStateMask(int start, int end, void* state) const {
- assert((end / 8) < NumBytesContext());
+ assert((end / 8) < StateSize());
int i = 0;
int comp = 1;
for (int j = 0; j < start; ++j) {
@@ -209,7 +209,7 @@ class WordPairFeatures : public FeatureFunction {
WordPairFeatures(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -226,7 +226,7 @@ class IdentityCycleDetector : public FeatureFunction {
IdentityCycleDetector(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -242,7 +242,7 @@ class InputIndicator : public FeatureFunction {
InputIndicator(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
@@ -258,7 +258,7 @@ class Fertility : public FeatureFunction {
Fertility(const std::string& param);
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc
index 44468899..70cea7de 100644
--- a/decoder/ff_wordset.cc
+++ b/decoder/ff_wordset.cc
@@ -1,5 +1,6 @@
#include "ff_wordset.h"
+#include "hg.h"
#include "fdict.h"
#include <sstream>
#include <iostream>
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 7c9a3fb7..639e1514 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -2,6 +2,7 @@
#define _FF_WORDSET_H_
#include "ff.h"
+#include "tdict.h"
#include <tr1/unordered_set>
#include <boost/algorithm/string.hpp>
@@ -32,11 +33,9 @@ class WordSet : public FeatureFunction {
~WordSet() {
}
- Features features() const { return single_feature(fid_); }
-
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
new file mode 100644
index 00000000..5820f421
--- /dev/null
+++ b/decoder/ffset.cc
@@ -0,0 +1,72 @@
+#include "ffset.h"
+
+#include "ff.h"
+#include "tdict.h"
+#include "hg.h"
+
+using namespace std;
+
+ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
+ models_(models),
+ weights_(w),
+ state_size_(0),
+ model_state_pos_(models.size()) {
+ for (int i = 0; i < models_.size(); ++i) {
+ model_state_pos_[i] = state_size_;
+ state_size_ += models_[i]->StateSize();
+ }
+}
+
+void ModelSet::PrepareForInput(const SentenceMetadata& smeta) {
+ for (int i = 0; i < models_.size(); ++i)
+ const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta);
+}
+
+void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
+ const Hypergraph& /* hg */,
+ const FFStates& node_states,
+ HG::Edge* edge,
+ FFState* context,
+ prob_t* combination_cost_estimate) const {
+ //edge->reset_info();
+ context->resize(state_size_);
+ if (state_size_ > 0) {
+ memset(&(*context)[0], 0, state_size_);
+ }
+ SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL
+ if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
+ for (int i = 0; i < models_.size(); ++i) {
+ const FeatureFunction& ff = *models_[i];
+ void* cur_ff_context = NULL;
+ vector<const void*> ants(edge->tail_nodes_.size());
+ bool has_context = ff.StateSize() > 0;
+ if (has_context) {
+ int spos = model_state_pos_[i];
+ cur_ff_context = &(*context)[spos];
+ for (int i = 0; i < ants.size(); ++i) {
+ ants[i] = &node_states[edge->tail_nodes_[i]][spos];
+ }
+ }
+ ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context);
+ }
+ if (combination_cost_estimate)
+ combination_cost_estimate->logeq(est_vals.dot(weights_));
+ edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
+}
+
+void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMetadata const& smeta) const {
+ assert(1 == edge->rule_->Arity());
+ //edge->reset_info();
+ for (int i = 0; i < models_.size(); ++i) {
+ const FeatureFunction& ff = *models_[i];
+ const void* ant_state = NULL;
+ bool has_context = ff.StateSize() > 0;
+ if (has_context) {
+ int spos = model_state_pos_[i];
+ ant_state = &state[spos];
+ }
+ ff.FinalTraversalFeatures(ant_state, &edge->feature_values_);
+ }
+ edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
+}
+
diff --git a/decoder/ffset.h b/decoder/ffset.h
new file mode 100644
index 00000000..28aef667
--- /dev/null
+++ b/decoder/ffset.h
@@ -0,0 +1,57 @@
+#ifndef _FFSET_H_
+#define _FFSET_H_
+
+#include <vector>
+#include "value_array.h"
+#include "prob.h"
+
+namespace HG { struct Edge; struct Node; }
+class Hypergraph;
+class FeatureFunction;
+class SentenceMetadata;
+class FeatureFunction; // see definition below
+
+// TODO let states be dynamically sized
+typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string
+
+//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
+typedef std::vector<FFState> FFStates;
+
+// this class is a set of FeatureFunctions that can be used to score, rescore,
+// etc. a (translation?) forest
+class ModelSet {
+ public:
+ ModelSet(const std::vector<double>& weights,
+ const std::vector<const FeatureFunction*>& models);
+
+ // sets edge->feature_values_ and edge->edge_prob_
+ // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
+ // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
+ void AddFeaturesToEdge(const SentenceMetadata& smeta,
+ const Hypergraph& hg,
+ const FFStates& node_states,
+ HG::Edge* edge,
+ FFState* residual_context,
+ prob_t* combination_cost_estimate = NULL) const;
+
+ //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
+ void AddFinalFeatures(const FFState& residual_context,
+ HG::Edge* edge,
+ SentenceMetadata const& smeta) const;
+
+ // this is called once before any feature functions apply to a hypergraph
+ // it can be used to initialize sentence-specific data structures
+ void PrepareForInput(const SentenceMetadata& smeta);
+
+ bool empty() const { return models_.empty(); }
+
+ bool stateless() const { return !state_size_; }
+
+ private:
+ std::vector<const FeatureFunction*> models_;
+ const std::vector<double>& weights_;
+ int state_size_;
+ std::vector<int> model_state_pos_;
+};
+
+#endif
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index d1fe53af..ee43f537 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -127,48 +127,3 @@ bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
return (max_span_ >= distance);
}
-GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {}
-
-void RefineRule(TRulePtr pt, const unsigned int ctf_level){
- for (unsigned int i=0; i<ctf_level; ++i){
- TRulePtr r(new TRule(*pt));
- pt->fine_rules_.reset(new vector<TRulePtr>);
- pt->fine_rules_->push_back(r);
- pt = r;
- }
-}
-
-GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) {
- TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]"));
- AddRule(stop_glue);
- RefineRule(stop_glue, ctf_level);
- TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1"));
- AddRule(glue);
- RefineRule(glue, ctf_level);
-}
-
-bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
- return (i == 0);
-}
-
-PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) {
- unordered_set<WordID> ss;
- for (int i = 0; i < input.size(); ++i) {
- const vector<LatticeArc>& alts = input[i];
- for (int k = 0; k < alts.size(); ++k) {
- const int j = alts[k].dist2next + i;
- const string& src = TD::Convert(alts[k].label);
- if (ss.count(alts[k].label) == 0) {
- TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
- pt->a_.push_back(AlignmentPoint(0,0));
- AddRule(pt);
- RefineRule(pt, ctf_level);
- ss.insert(alts[k].label);
- }
- }
- }
-}
-
-bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const {
- return (distance < 2);
-}
diff --git a/decoder/grammar.h b/decoder/grammar.h
index e6a15a69..add1a235 100644
--- a/decoder/grammar.h
+++ b/decoder/grammar.h
@@ -81,18 +81,4 @@ struct TextGrammar : public Grammar {
};
-struct GlueGrammar : public TextGrammar {
- // read glue grammar from file
- explicit GlueGrammar(const std::string& file);
- GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X"
- virtual bool HasRuleForSpan(int i, int j, int distance) const;
-};
-
-struct PassThroughGrammar : public TextGrammar {
- PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
- virtual bool HasRuleForSpan(int i, int j, int distance) const;
-};
-
-void RefineRule(TRulePtr pt, const unsigned int ctf_level);
-
#endif
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index 4500490a..912f4f12 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -10,7 +10,9 @@
#include "tdict.h"
#include "grammar.h"
#include "bottom_up_parser.h"
+#include "hg.h"
#include "ff.h"
+#include "ffset.h"
#include "weights.h"
using namespace std;
diff --git a/decoder/hg.h b/decoder/hg.h
index 591e98ce..3d8cd9bc 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -33,47 +33,20 @@
// slow
#undef HG_EDGES_TOPO_SORTED
-class Hypergraph;
-typedef boost::shared_ptr<Hypergraph> HypergraphP;
-
-// class representing an acyclic hypergraph
-// - edges have 1 head, 0..n tails
-class Hypergraph {
-public:
- Hypergraph() : is_linear_chain_(false) {}
+// SmallVector is a fast, small vector<int> implementation for sizes <= 2
+typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+typedef std::vector<int> EdgesVector; // indices in edges_
- // SmallVector is a fast, small vector<int> implementation for sizes <= 2
- typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
- typedef std::vector<int> EdgesVector; // indices in edges_
-
- // TODO get rid of cat_?
- // TODO keep cat_ and add span and/or state? :)
- struct Node {
- Node() : id_(), cat_() {}
- int id_; // equal to this object's position in the nodes_ vector
- WordID cat_; // non-terminal category if <0, 0 if not set
- WordID NT() const { return -cat_; }
- EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_
- EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_
- void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
- cat_=o.cat_;
- }
- void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
- copy_fixed(o);
- id_=n2[id_];
- e2.reindex_push_back(o.in_edges_,in_edges_);
- e2.reindex_push_back(o.out_edges_,out_edges_);
- }
- };
+enum {
+ NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+};
+namespace HG {
- // TODO get rid of edge_prob_? (can be computed on the fly as the dot
- // product of the weight vector and the feature values)
struct Edge {
-// int poplimit; //TODO: cube pruning per edge limit? per node didn't work well at all. also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score
Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {}
Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later.
- Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after
+ Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett
: tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); }
inline int Arity() const { return tail_nodes_.size(); }
int head_node_; // refers to a position in nodes_
@@ -83,8 +56,6 @@ public:
prob_t edge_prob_; // dot product of weights and feat_values
int id_; // equal to this object's position in the edges_ vector
- //FIXME: these span ids belong in Node, not Edge, right? every node should have the same spans.
-
// span info. typically, i_ and j_ refer to indices in the source sentence.
// In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices
// while prev_i_ prev_j_ will refer to positions in the source.
@@ -97,54 +68,6 @@ public:
short int j_;
short int prev_i_;
short int prev_j_;
-
- void copy_info(Edge const& o) {
-#if USE_INFO_EDGE
- set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space). it's empty if nobody put any info there.
-#else
- (void) o;
-#endif
- }
- void copy_pod(Edge const& o) {
- rule_=o.rule_;
- i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
- }
- void copy_features(Edge const& o) {
- feature_values_=o.feature_values_;
- copy_info(o);
- }
- void copy_fixed(Edge const& o) {
- copy_pod(o);
- copy_features(o);
- edge_prob_ = o.edge_prob_;
- }
- void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
- copy_fixed(o);
- head_node_=n2[o.head_node_];
- id_=e2[o.id_];
- n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
- }
-
-#if USE_INFO_EDGE
- std::ostringstream info_;
- void set_info(std::string const& s) {
- info_.str(s);
- info_.seekp(0,std::ios_base::end);
- }
- Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) {
-// info_.seekp(0,std::ios_base::end);
- }
- void operator=(Edge const& o) {
- head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
- set_info(o.info_.str());
- }
- std::string info() const { return info_.str(); }
- void reset_info() { info_.str(""); info_.clear(); }
-#else
- std::string info() const { return std::string(); }
- void reset_info() { }
- void set_info(std::string const& ) { }
-#endif
void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
o<<'{';
if (mask&CATEGORY)
@@ -159,10 +82,6 @@ public:
o<<' '<<feature_values_;
if (mask&RULE)
o<<' '<<rule_->AsString(mask&RULE_LHS);
- if (USE_INFO_EDGE) {
- std::string const& i=info();
- if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i
- }
o<<'}';
}
std::string show(unsigned mask=SPAN|RULE) const {
@@ -170,12 +89,28 @@ public:
show(o,mask);
return o.str();
}
- /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
-
- FIXME: make kbest create a simple derivation-tree structure (could be a
- hg), and replace the list-of-edges viterbi.h with a tree-structured one.
- CreateViterbiHypergraph can do for 1best, though.
- */
+ void copy_pod(Edge const& o) {
+ rule_=o.rule_;
+ i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
+ }
+ void copy_features(Edge const& o) {
+ feature_values_=o.feature_values_;
+ }
+ void copy_fixed(Edge const& o) {
+ copy_pod(o);
+ copy_features(o);
+ edge_prob_ = o.edge_prob_;
+ }
+ void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
+ copy_fixed(o);
+ head_node_=n2[o.head_node_];
+ id_=e2[o.id_];
+ n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
+ }
+ // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
+ // FIXME: make kbest create a simple derivation-tree structure (could be a
+ // hg), and replace the list-of-edges viterbi.h with a tree-structured one.
+ // CreateViterbiHypergraph can do for 1best, though.
template <class EdgeRecurse,class TEdgeHandle>
std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
std::ostringstream o;
@@ -203,7 +138,43 @@ public:
}
};
- // all this info ought to live in Node, but for some reason it's on Edges.
+ // TODO get rid of cat_?
+ // TODO keep cat_ and add span and/or state? :)
+ struct Node {
+ Node() : id_(), cat_() {}
+ int id_; // equal to this object's position in the nodes_ vector
+ WordID cat_; // non-terminal category if <0, 0 if not set
+ WordID NT() const { return -cat_; }
+ EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_
+ EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_
+ void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+ cat_=o.cat_;
+ }
+ void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
+ copy_fixed(o);
+ id_=n2[id_];
+ e2.reindex_push_back(o.in_edges_,in_edges_);
+ e2.reindex_push_back(o.out_edges_,out_edges_);
+ }
+ };
+
+} // namespace HG
+
+class Hypergraph;
+typedef boost::shared_ptr<Hypergraph> HypergraphP;
+// class representing an acyclic hypergraph
+// - edges have 1 head, 0..n tails
+class Hypergraph {
+public:
+ Hypergraph() : is_linear_chain_(false) {}
+ typedef HG::Node Node;
+ typedef HG::Edge Edge;
+ typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+ typedef std::vector<int> EdgesVector; // indices in edges_
+ enum {
+ NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+ };
+
// except for stateful models that have split nt,span, this should identify the node
void SetNodeOrigin(int nodeid,NTSpan &r) const {
Node const &n=nodes_[nodeid];
@@ -230,18 +201,9 @@ public:
}
return s;
}
- // 0 if none, -TD index otherwise (just like in rule)
WordID NodeLHS(int nodeid) const {
Node const &n=nodes_[nodeid];
return n.NT();
- /*
- if (!n.in_edges_.empty()) {
- Edge const& e=edges_[n.in_edges_.front()];
- if (e.rule_)
- return -e.rule_->lhs_;
- }
- return 0;
- */
}
typedef std::vector<prob_t> EdgeProbs;
@@ -250,14 +212,8 @@ public:
typedef std::vector<bool> NodeMask;
std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-// builds viterbi hg and returns it formatted as a pretty string
-
- enum {
- NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
- };
std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
- // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first
typedef Edge const* EdgeHandle;
EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
@@ -334,7 +290,7 @@ public:
Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) {
edges_.push_back(Edge(edges_.size(),in_edge));
Edge* edge = &edges_.back();
- edge->copy_features(in_edge);
+ edge->feature_values_ = in_edge.feature_values_;
edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back. perhaps optimized it's the same.
index_tails(*edge);
return edge;
@@ -503,9 +459,9 @@ public:
template <class V>
void visit_edges_topo(V &v) {
- for (int i = 0; i < nodes_.size(); ++i) {
+ for (unsigned i = 0; i < nodes_.size(); ++i) {
EdgesVector const& in=nodes_[i].in_edges_;
- for (int j=0;j<in.size();++j) {
+ for (unsigned j=0;j<in.size();++j) {
int e=in[j];
v(i,e,edges_[e]);
}
@@ -534,14 +490,14 @@ private:
// for generic Viterbi/Inside algorithms
struct EdgeProb {
typedef prob_t Weight;
- inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; }
+ inline const prob_t& operator()(const HG::Edge& e) const { return e.edge_prob_; }
};
struct EdgeSelectEdgeWeightFunction {
typedef prob_t Weight;
typedef std::vector<bool> EdgeMask;
EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {}
- inline prob_t operator()(const Hypergraph::Edge& e) const {
+ inline prob_t operator()(const HG::Edge& e) const {
if (v_[e.id_]) return prob_t::One();
else return prob_t::Zero();
}
@@ -551,7 +507,7 @@ private:
struct ScaledEdgeProb {
ScaledEdgeProb(const double& alpha) : alpha_(alpha) {}
- inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); }
+ inline prob_t operator()(const HG::Edge& e) const { return e.edge_prob_.pow(alpha_); }
const double alpha_;
typedef prob_t Weight;
};
@@ -560,7 +516,7 @@ struct ScaledEdgeProb {
struct EdgeFeaturesAndProbWeightFunction {
typedef SparseVector<prob_t> Weight;
typedef Weight Result; //TODO: change Result->Weight everywhere?
- inline const Weight operator()(const Hypergraph::Edge& e) const {
+ inline const Weight operator()(const HG::Edge& e) const {
SparseVector<prob_t> res;
for (SparseVector<double>::const_iterator it = e.feature_values_.begin();
it != e.feature_values_.end(); ++it)
@@ -571,7 +527,7 @@ struct EdgeFeaturesAndProbWeightFunction {
struct TransitionCountWeightFunction {
typedef double Weight;
- inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; }
+ inline double operator()(const HG::Edge& e) const { (void)e; return 1.0; }
};
#endif
diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc
index 6e3bfee6..ad5b701a 100644
--- a/decoder/hg_intersect.cc
+++ b/decoder/hg_intersect.cc
@@ -79,7 +79,9 @@ static bool FastLinearIntersect(const Lattice& target, Hypergraph* hg) {
return (cov.size() == target.size());
}
-bool HG::Intersect(const Lattice& target, Hypergraph* hg) {
+namespace HG {
+
+bool Intersect(const Lattice& target, Hypergraph* hg) {
// there are a number of faster algorithms available for restricted
// classes of hypergraph and/or target.
if (hg->IsLinearChain() && target.IsSentence())
@@ -101,7 +103,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) {
// grammar, create the labels here
const string kSEP = "_";
for (unsigned i = 0; i < nnodes; ++i) {
- const char* pstr = "CAT";
+ string pstr = "CAT";
if (hg->nodes_[i].cat_ < 0)
pstr = TD::Convert(-hg->nodes_[i].cat_);
cats[i] = TD::Convert(pstr + kSEP + lexical_cast<string>(i)) * -1;
@@ -160,3 +162,5 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) {
return true;
}
+}
+
diff --git a/decoder/hg_intersect.h b/decoder/hg_intersect.h
index 826bdaae..29a5ea2a 100644
--- a/decoder/hg_intersect.h
+++ b/decoder/hg_intersect.h
@@ -1,13 +1,11 @@
#ifndef _HG_INTERSECT_H_
#define _HG_INTERSECT_H_
-#include <vector>
-
#include "lattice.h"
class Hypergraph;
-struct HG {
- static bool Intersect(const Lattice& target, Hypergraph* hg);
+namespace HG {
+ bool Intersect(const Lattice& target, Hypergraph* hg);
};
#endif
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 8bd40387..64c6663e 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -28,7 +28,7 @@ struct HGReader : public JSONParser {
hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node);
}
}
- void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) {
+ void CreateEdge(const TRulePtr& rule, SparseVector<double>* feats, const SmallVectorUnsigned& tail) {
Hypergraph::Edge* edge = hg.AddEdge(rule, tail);
feats->swap(edge->feature_values_);
edge->i_ = spans[0];
@@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses
const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]];
const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS;
double prob = log(e.edge_prob_);
- if (isinf(prob)) { prob = -9e20; }
- if (isnan(prob)) { prob = 0; }
+ if (std::isinf(prob)) { prob = -9e20; }
+ if (std::isnan(prob)) { prob = 0; }
os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),";
}
os << "),";
@@ -600,7 +600,7 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
// grammar, create the labels here
const string kSEP = "_";
for (int i = 0; i < hg.nodes_.size(); ++i) {
- const char* pstr = "CAT";
+ string pstr = "CAT";
if (hg.nodes_[i].cat_ < 0)
pstr = TD::Convert(-hg.nodes_[i].cat_);
cats[i] = TD::Convert(pstr + kSEP + boost::lexical_cast<string>(i)) * -1;
diff --git a/decoder/hg_sampler.cc b/decoder/hg_sampler.cc
index cdf0ec3c..8e520871 100644
--- a/decoder/hg_sampler.cc
+++ b/decoder/hg_sampler.cc
@@ -71,3 +71,58 @@ void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
}
}
+
+void HypergraphSampler::sample_trees(const Hypergraph& hg,
+ unsigned n,
+ MT19937* rng,
+ vector<string>* trees) {
+ trees->clear();
+ trees->resize(n);
+
+ // compute inside probabilities
+ vector<prob_t> node_probs;
+ Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
+
+ vector<bool> sampled_edges(hg.edges_.size());
+ queue<unsigned> q;
+ SampleSet<prob_t> ss;
+ for (unsigned i = 0; i < n; ++i) {
+ fill(sampled_edges.begin(), sampled_edges.end(), false);
+ // sample derivation top down
+ assert(q.empty());
+ q.push(hg.nodes_.size() - 1);
+ prob_t model_score = prob_t::One();
+ while(!q.empty()) {
+ unsigned cur_node_id = q.front();
+ q.pop();
+ const Hypergraph::Node& node = hg.nodes_[cur_node_id];
+ const unsigned num_in_edges = node.in_edges_.size();
+ unsigned sampled_edge_idx = 0;
+ if (num_in_edges == 1) {
+ sampled_edge_idx = node.in_edges_[0];
+ } else {
+ assert(num_in_edges > 1);
+ ss.clear();
+ for (unsigned j = 0; j < num_in_edges; ++j) {
+ const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
+ prob_t p = edge.edge_prob_; // edge weight
+ for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
+ p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight
+ ss.add(p);
+ }
+ sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
+ }
+ sampled_edges[sampled_edge_idx] = true;
+ const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
+ model_score *= sampled_edge.edge_prob_;
+ //sampled_deriv->push_back(sampled_edge_idx);
+ for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
+ q.push(sampled_edge.tail_nodes_[j]);
+ }
+ }
+ vector<WordID> tmp;
+ Viterbi(hg, &tmp, ETreeTraversal(), SampledDerivationWeightFunction(sampled_edges));
+ (*trees)[i] = TD::GetString(tmp);
+ }
+}
+
diff --git a/decoder/hg_sampler.h b/decoder/hg_sampler.h
index bf4e1eb0..6ac39a20 100644
--- a/decoder/hg_sampler.h
+++ b/decoder/hg_sampler.h
@@ -3,6 +3,7 @@
#include <vector>
+#include <string>
#include "sparse_vector.h"
#include "sampler.h"
#include "wordid.h"
@@ -22,6 +23,12 @@ struct HypergraphSampler {
unsigned n, // how many samples to draw
MT19937* rng,
std::vector<Hypothesis>* hypos);
+
+ static void
+ sample_trees(const Hypergraph& hg,
+ unsigned n,
+ MT19937* rng,
+ std::vector<std::string>* trees);
};
#endif
diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc
index 92ed98b2..37469748 100644
--- a/decoder/hg_test.cc
+++ b/decoder/hg_test.cc
@@ -6,6 +6,7 @@
#include "json_parse.h"
#include "hg_intersect.h"
+#include "hg_union.h"
#include "viterbi.h"
#include "kbest.h"
#include "inside_outside.h"
@@ -52,7 +53,7 @@ BOOST_AUTO_TEST_CASE(Union) {
int l2 = ViterbiPathLength(hg2);
cerr << c1 << "\t" << TD::GetString(t1) << endl;
cerr << c2 << "\t" << TD::GetString(t2) << endl;
- hg1.Union(hg2);
+ HG::Union(hg2, &hg1);
hg1.Reweight(wts);
c3 = ViterbiESentence(hg1, &t3);
int l3 = ViterbiPathLength(hg1);
@@ -121,8 +122,8 @@ BOOST_AUTO_TEST_CASE(InsideScore) {
vector<prob_t> post;
inside = hg.ComputeBestPathThroughEdges(&post);
BOOST_CHECK_CLOSE(-0.3, log(inside), 1e-4); // computed by hand
- BOOST_CHECK_EQUAL(post.size(), 4);
- for (int i = 0; i < 4; ++i) {
+ BOOST_CHECK_EQUAL(post.size(), 5);
+ for (int i = 0; i < 5; ++i) {
cerr << "edge post: " << log(post[i]) << '\t' << hg.edges_[i].rule_->AsString() << endl;
}
}
@@ -139,12 +140,15 @@ BOOST_AUTO_TEST_CASE(PruneInsideOutside) {
cerr << TD::GetString(trans) << "\n";
cerr << "cost: " << cost << "\n";
hg.PrintGraphviz();
+#if 0
hg.DensityPruneInsideOutside(0.5, false, 2.0);
hg.BeamPruneInsideOutside(0.5, false, 0.5);
cost = ViterbiESentence(hg, &trans);
cerr << "Ncst: " << cost << endl;
cerr << TD::GetString(trans) << "\n";
hg.PrintGraphviz();
+#endif
+ cerr << "FIX PLEASE\n";
}
BOOST_AUTO_TEST_CASE(TestPruneEdges) {
diff --git a/decoder/hg_test.h b/decoder/hg_test.h
index 2e308c37..e96cb0b1 100644
--- a/decoder/hg_test.h
+++ b/decoder/hg_test.h
@@ -64,12 +64,21 @@ Name HGjsons[]= {
}
+void AddNullEdge(Hypergraph* hg) {
+ TRule x;
+ x.arity_ = 0;
+ hg->nodes_[0].in_edges_.push_back(hg->AddEdge(TRulePtr(new TRule(x)), Hypergraph::TailNodeVector())->id_);
+ hg->edges_.back().head_node_ = 0;
+}
+
void HGSetup::CreateTinyLatticeHG(Hypergraph* hg) {
Json(hg,HGjsons[TinyLatticeHG]);
+ AddNullEdge(hg);
}
void HGSetup::CreateLatticeHG(Hypergraph* hg) {
Json(hg,HGjsons[LatticeHG]);
+ AddNullEdge(hg);
}
void HGSetup::CreateHG_tiny(Hypergraph* hg) {
diff --git a/decoder/hg_union.cc b/decoder/hg_union.cc
new file mode 100644
index 00000000..37082976
--- /dev/null
+++ b/decoder/hg_union.cc
@@ -0,0 +1,58 @@
+#include "hg_union.h"
+
+#include "hg.h"
+
+using namespace std;
+
+namespace HG {
+
+void Union(const Hypergraph& in, Hypergraph* out) {
+ if (&in == out) return;
+ if (out->nodes_.empty()) {
+ out->nodes_ = in.nodes_;
+ out->edges_ = in.edges_; return;
+ }
+ unsigned noff = out->nodes_.size();
+ unsigned eoff = out->edges_.size();
+ int ogoal = in.nodes_.size() - 1;
+ int cgoal = noff - 1;
+ // keep a single goal node, so add nodes.size - 1
+ out->nodes_.resize(out->nodes_.size() + ogoal);
+ // add all edges
+ out->edges_.resize(out->edges_.size() + in.edges_.size());
+
+ for (int i = 0; i < ogoal; ++i) {
+ const Hypergraph::Node& on = in.nodes_[i];
+ Hypergraph::Node& cn = out->nodes_[i + noff];
+ cn.id_ = i + noff;
+ cn.in_edges_.resize(on.in_edges_.size());
+ for (unsigned j = 0; j < on.in_edges_.size(); ++j)
+ cn.in_edges_[j] = on.in_edges_[j] + eoff;
+
+ cn.out_edges_.resize(on.out_edges_.size());
+ for (unsigned j = 0; j < on.out_edges_.size(); ++j)
+ cn.out_edges_[j] = on.out_edges_[j] + eoff;
+ }
+
+ for (unsigned i = 0; i < in.edges_.size(); ++i) {
+ const Hypergraph::Edge& oe = in.edges_[i];
+ Hypergraph::Edge& ce = out->edges_[i + eoff];
+ ce.id_ = i + eoff;
+ ce.rule_ = oe.rule_;
+ ce.feature_values_ = oe.feature_values_;
+ if (oe.head_node_ == ogoal) {
+ ce.head_node_ = cgoal;
+ out->nodes_[cgoal].in_edges_.push_back(ce.id_);
+ } else {
+ ce.head_node_ = oe.head_node_ + noff;
+ }
+ ce.tail_nodes_.resize(oe.tail_nodes_.size());
+ for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j)
+ ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff;
+ }
+
+ out->TopologicallySortNodesAndEdges(cgoal);
+}
+
+}
+
diff --git a/decoder/hg_union.h b/decoder/hg_union.h
new file mode 100644
index 00000000..34624246
--- /dev/null
+++ b/decoder/hg_union.h
@@ -0,0 +1,9 @@
+#ifndef _HG_UNION_H_
+#define _HG_UNION_H_
+
+class Hypergraph;
+namespace HG {
+ void Union(const Hypergraph& in, Hypergraph* out);
+};
+
+#endif
diff --git a/decoder/incremental.cc b/decoder/incremental.cc
new file mode 100644
index 00000000..46615b0b
--- /dev/null
+++ b/decoder/incremental.cc
@@ -0,0 +1,167 @@
+#include "incremental.h"
+
+#include "hg.h"
+#include "fdict.h"
+#include "tdict.h"
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/model.hh"
+#include "search/config.hh"
+#include "search/context.hh"
+#include "search/edge.hh"
+#include "search/edge_generator.hh"
+#include "search/rule.hh"
+#include "search/vertex.hh"
+#include "search/vertex_generator.hh"
+#include "util/exception.hh"
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/scoped_array.hpp>
+
+#include <iostream>
+#include <vector>
+
+namespace {
+
+struct MapVocab : public lm::EnumerateVocab {
+ public:
+ MapVocab() {}
+
+ // Do not call after Lookup.
+ void Add(lm::WordIndex index, const StringPiece &str) {
+ const WordID cdec_id = TD::Convert(str.as_string());
+ if (cdec_id >= out_.size()) out_.resize(cdec_id + 1);
+ out_[cdec_id] = index;
+ }
+
+ // Assumes Add has been called and will never be called again.
+ lm::WordIndex FromCDec(WordID id) const {
+ return out_[out_.size() > id ? id : 0];
+ }
+
+ private:
+ std::vector<lm::WordIndex> out_;
+};
+
+template <class Model> class Incremental : public IncrementalBase {
+ public:
+ Incremental(const char *model_file, const std::vector<weight_t> &weights) :
+ IncrementalBase(weights),
+ m_(model_file, GetConfig()),
+ weights_(
+ weights[FD::Convert("KLanguageModel")],
+ weights[FD::Convert("KLanguageModel_OOV")],
+ weights[FD::Convert("WordPenalty")]) {
+ std::cerr << "Weights KLanguageModel " << weights_.LM() << " KLanguageModel_OOV " << weights_.OOV() << " WordPenalty " << weights_.WordPenalty() << std::endl;
+ }
+ void Search(unsigned int pop_limit, const Hypergraph &hg) const;
+
+ private:
+ void ConvertEdge(const search::Context<Model> &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const;
+
+ lm::ngram::Config GetConfig() {
+ lm::ngram::Config ret;
+ ret.enumerate_vocab = &vocab_;
+ return ret;
+ }
+
+ MapVocab vocab_;
+
+ const Model m_;
+
+ const search::Weights weights_;
+};
+
+void PrintFinal(const Hypergraph &hg, const search::Final final) {
+ const std::vector<WordID> &words = static_cast<const Hypergraph::Edge*>(final.GetNote().vp)->rule_->e();
+ const search::Final *child(final.Children());
+ for (std::vector<WordID>::const_iterator i = words.begin(); i != words.end(); ++i) {
+ if (*i > 0) {
+ std::cout << TD::Convert(*i) << ' ';
+ } else {
+ PrintFinal(hg, *child++);
+ }
+ }
+}
+
+template <class Model> void Incremental<Model>::Search(unsigned int pop_limit, const Hypergraph &hg) const {
+ boost::scoped_array<search::Vertex> out_vertices(new search::Vertex[hg.nodes_.size()]);
+ search::Config config(weights_, pop_limit);
+ search::Context<Model> context(config, m_);
+
+ for (unsigned int i = 0; i < hg.nodes_.size() - 1; ++i) {
+ search::EdgeGenerator gen;
+ const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_;
+ for (unsigned int j = 0; j < down_edges.size(); ++j) {
+ unsigned int edge_index = down_edges[j];
+ ConvertEdge(context, i == hg.nodes_.size() - 2, out_vertices.get(), hg.edges_[edge_index], gen);
+ }
+ search::VertexGenerator vertex_gen(context, out_vertices[i]);
+ gen.Search(context, vertex_gen);
+ }
+ const search::Final top = out_vertices[hg.nodes_.size() - 2].BestChild();
+ if (!top.Valid()) {
+ std::cout << "NO PATH FOUND" << std::endl;
+ } else {
+ PrintFinal(hg, top);
+ std::cout << "||| " << top.GetScore() << std::endl;
+ }
+}
+
+template <class Model> void Incremental<Model>::ConvertEdge(const search::Context<Model> &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const {
+ const std::vector<WordID> &e = in.rule_->e();
+ std::vector<lm::WordIndex> words;
+ words.reserve(e.size());
+ std::vector<search::PartialVertex> nts;
+ unsigned int terminals = 0;
+ float score = 0.0;
+ for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) {
+ if (*word <= 0) {
+ nts.push_back(vertices[in.tail_nodes_[-*word]].RootPartial());
+ if (nts.back().Empty()) return;
+ score += nts.back().Bound();
+ words.push_back(lm::kMaxWordIndex);
+ } else {
+ ++terminals;
+ words.push_back(vocab_.FromCDec(*word));
+ }
+ }
+
+ if (final) {
+ words.push_back(m_.GetVocabulary().EndSentence());
+ }
+
+ search::PartialEdge out(gen.AllocateEdge(nts.size()));
+
+ memcpy(out.NT(), &nts[0], sizeof(search::PartialVertex) * nts.size());
+
+ search::Note note;
+ note.vp = &in;
+ out.SetNote(note);
+
+ score += in.rule_->GetFeatureValues().dot(cdec_weights_);
+ score -= static_cast<float>(terminals) * context.GetWeights().WordPenalty() / M_LN10;
+ score += search::ScoreRule(context, words, final, out.Between());
+ out.SetScore(score);
+
+ gen.AddEdge(out);
+}
+
+} // namespace
+
+IncrementalBase *IncrementalBase::Load(const char *model_file, const std::vector<weight_t> &weights) {
+ lm::ngram::ModelType model_type;
+ if (!lm::ngram::RecognizeBinary(model_file, model_type)) model_type = lm::ngram::PROBING;
+ switch (model_type) {
+ case lm::ngram::PROBING:
+ return new Incremental<lm::ngram::ProbingModel>(model_file, weights);
+ case lm::ngram::REST_PROBING:
+ return new Incremental<lm::ngram::RestProbingModel>(model_file, weights);
+ default:
+ UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet.");
+ }
+}
+
+IncrementalBase::~IncrementalBase() {}
+
+IncrementalBase::IncrementalBase(const std::vector<weight_t> &weights) : cdec_weights_(weights) {}
diff --git a/decoder/incremental.h b/decoder/incremental.h
new file mode 100644
index 00000000..f791a626
--- /dev/null
+++ b/decoder/incremental.h
@@ -0,0 +1,23 @@
+#ifndef _INCREMENTAL_H_
+#define _INCREMENTAL_H_
+
+#include "weights.h"
+#include <vector>
+
+class Hypergraph;
+
+class IncrementalBase {
+ public:
+ static IncrementalBase *Load(const char *model_file, const std::vector<weight_t> &weights);
+
+ virtual ~IncrementalBase();
+
+ virtual void Search(unsigned int pop_limit, const Hypergraph &hg) const = 0;
+
+ protected:
+ IncrementalBase(const std::vector<weight_t> &weights);
+
+ const std::vector<weight_t> &cdec_weights_;
+};
+
+#endif // _INCREMENTAL_H_
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index f73a1d3f..c0377fe8 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -42,7 +42,7 @@ WeightType Inside(const Hypergraph& hg,
Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
const unsigned num_in_edges = in.size();
for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[in[j]];
+ const HG::Edge& edge = hg.edges_[in[j]];
WeightType score = weight(edge);
for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) {
const int tail_node_index = edge.tail_nodes_[k];
@@ -74,7 +74,7 @@ void Outside(const Hypergraph& hg,
Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
const int num_in_edges = in.size();
for (int j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[in[j]];
+ const HG::Edge& edge = hg.edges_[in[j]];
WeightType head_and_edge_weight = weight(edge);
head_and_edge_weight *= head_node_outside_score;
const int num_tail_nodes = edge.tail_nodes_.size();
@@ -138,7 +138,7 @@ struct InsideOutsides {
Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
const int num_in_edges = in.size();
for (int j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[in[j]];
+ const HG::Edge& edge = hg.edges_[in[j]];
KType kbar_e = outside[i];
const int num_tail_nodes = edge.tail_nodes_.size();
for (int k = 0; k < num_tail_nodes; ++k)
@@ -156,7 +156,7 @@ struct InsideOutsides {
const int num_in_edges = in.size();
for (int j = 0; j < num_in_edges; ++j) {
int edgei=in[j];
- const Hypergraph::Edge& edge = hg.edges_[edgei];
+ const HG::Edge& edge = hg.edges_[edgei];
V x=weight(edge)*outside[i];
const int num_tail_nodes = edge.tail_nodes_.size();
for (int k = 0; k < num_tail_nodes; ++k)
diff --git a/decoder/kbest.h b/decoder/kbest.h
index 9af3a20e..9a55f653 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -48,7 +48,7 @@ namespace KBest {
}
struct Derivation {
- Derivation(const Hypergraph::Edge& e,
+ Derivation(const HG::Edge& e,
const SmallVectorInt& jv,
const WeightType& w,
const SparseVector<double>& f) :
@@ -58,11 +58,11 @@ namespace KBest {
feature_values(f) {}
// dummy constructor, just for query
- Derivation(const Hypergraph::Edge& e,
+ Derivation(const HG::Edge& e,
const SmallVectorInt& jv) : edge(&e), j(jv) {}
T yield;
- const Hypergraph::Edge* const edge;
+ const HG::Edge* const edge;
const SmallVectorInt j;
const WeightType score;
const SparseVector<double> feature_values;
@@ -82,8 +82,8 @@ namespace KBest {
Derivation const* d;
explicit EdgeHandle(Derivation const* d) : d(d) { }
// operator bool() const { return d->edge; }
- operator Hypergraph::Edge const* () const { return d->edge; }
-// Hypergraph::Edge const * operator ->() const { return d->edge; }
+ operator HG::Edge const* () const { return d->edge; }
+// HG::Edge const * operator ->() const { return d->edge; }
};
EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const {
@@ -158,7 +158,7 @@ namespace KBest {
// the yield is computed in LazyKthBest before the derivation is added to D
// returns NULL if j refers to derivation numbers larger than the
// antecedent structure define
- Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVectorInt& j) {
+ Derivation* CreateDerivation(const HG::Edge& e, const SmallVectorInt& j) {
WeightType score = w(e);
SparseVector<double> feats = e.feature_values_;
for (int i = 0; i < e.Arity(); ++i) {
@@ -177,7 +177,7 @@ namespace KBest {
const Hypergraph::Node& node = g.nodes_[v];
for (unsigned i = 0; i < node.in_edges_.size(); ++i) {
- const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]];
+ const HG::Edge& edge = g.edges_[node.in_edges_[i]];
SmallVectorInt jv(edge.Arity(), 0);
Derivation* d = CreateDerivation(edge, jv);
assert(d);
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index b603e27a..d2c4715c 100644
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -12,6 +12,7 @@
#include "scorer.h"
#include "hg.h"
#include "ff_factory.h"
+#include "ffset.h"
#include "ff_bleu.h"
#include "sparse_vector.h"
#include "viterbi.h"
@@ -26,7 +27,7 @@
struct Translation {
typedef std::vector<WordID> Sentence;
Sentence sentence;
- FeatureVector features;
+ SparseVector<double> features;
Translation() { }
Translation(Hypergraph const& hg,WeightVector *feature_weights=0)
{
@@ -57,14 +58,14 @@ struct Oracle {
}
// feature 0 will be the error rate in fear and hope
// move toward hope
- FeatureVector ModelHopeGradient() const {
- FeatureVector r=hope.features-model.features;
+ SparseVector<double> ModelHopeGradient() const {
+ SparseVector<double> r=hope.features-model.features;
r.set_value(0,0);
return r;
}
// move toward hope from fear
- FeatureVector FearHopeGradient() const {
- FeatureVector r=hope.features-fear.features;
+ SparseVector<double> FearHopeGradient() const {
+ SparseVector<double> r=hope.features-fear.features;
r.set_value(0,0);
return r;
}
diff --git a/decoder/program_options.h b/decoder/program_options.h
index 87afb320..3cd7649a 100644
--- a/decoder/program_options.h
+++ b/decoder/program_options.h
@@ -94,7 +94,7 @@ struct any_printer : public boost::function<void (Ostream &,boost::any const&)>
{}
template <class T>
- explicit any_printer(T const* tag) : F(typed_print<T>()) {
+ explicit any_printer(T const*) : F(typed_print<T>()) {
}
template <class T>
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index a978cfc2..3b43b586 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -15,13 +15,73 @@
#include "tdict.h"
#include "viterbi.h"
#include "verbose.h"
+#include <tr1/unordered_map>
#define foreach BOOST_FOREACH
#define reverse_foreach BOOST_REVERSE_FOREACH
using namespace std;
+using namespace std::tr1;
static bool printGrammarsUsed = false;
+struct GlueGrammar : public TextGrammar {
+ // read glue grammar from file
+ explicit GlueGrammar(const std::string& file);
+ GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X"
+ virtual bool HasRuleForSpan(int i, int j, int distance) const;
+};
+
+struct PassThroughGrammar : public TextGrammar {
+ PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
+ virtual bool HasRuleForSpan(int i, int j, int distance) const;
+};
+
+GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {}
+
+static void RefineRule(TRulePtr pt, const unsigned int ctf_level){
+ for (unsigned int i=0; i<ctf_level; ++i){
+ TRulePtr r(new TRule(*pt));
+ pt->fine_rules_.reset(new vector<TRulePtr>);
+ pt->fine_rules_->push_back(r);
+ pt = r;
+ }
+}
+
+GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) {
+ TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]"));
+ AddRule(stop_glue);
+ RefineRule(stop_glue, ctf_level);
+ TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1"));
+ AddRule(glue);
+ RefineRule(glue, ctf_level);
+}
+
+bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
+ return (i == 0);
+}
+
+PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) {
+ unordered_set<WordID> ss;
+ for (int i = 0; i < input.size(); ++i) {
+ const vector<LatticeArc>& alts = input[i];
+ for (int k = 0; k < alts.size(); ++k) {
+ const int j = alts[k].dist2next + i;
+ const string& src = TD::Convert(alts[k].label);
+ if (ss.count(alts[k].label) == 0) {
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
+ ss.insert(alts[k].label);
+ }
+ }
+ }
+}
+
+bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const {
+ return (distance < 2);
+}
+
struct SCFGTranslatorImpl {
SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :
max_span_limit(conf["scfg_max_span_limit"].as<int>()),
diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h
index 599a2d54..fde33100 100644
--- a/decoder/tromble_loss.h
+++ b/decoder/tromble_loss.h
@@ -28,7 +28,7 @@ class TrombleLossComputer : private boost::base_from_member<boost::scoped_ptr<Tr
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
+ const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index 1b9c6665..9e381ac6 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -139,8 +139,8 @@ inline bool close_enough(double a,double b,double epsilon)
return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b);
}
-FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) {
- FeatureVector r;
+SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) {
+ SparseVector<double> r;
const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r);
if (weights) {
double logp=log(p);
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index 03e961a2..a8a0ea7f 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -14,10 +14,10 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest"
//TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction?
// Traversal must implement:
// typedef T Result;
-// void operator()(Hypergraph::Edge const& e,const vector<const Result*>& ants, Result* result) const;
+// void operator()(HG::Edge const& e,const vector<const Result*>& ants, Result* result) const;
// WeightFunction must implement:
// typedef prob_t Weight;
-// Weight operator()(Hypergraph::Edge const& e) const;
+// Weight operator()(HG::Edge const& e) const;
template<class Traversal,class WeightFunction>
typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
typename Traversal::Result* result,
@@ -39,9 +39,9 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
*cur_node_best_weight = WeightType(1);
continue;
}
- Hypergraph::Edge const* edge_best=0;
+ HG::Edge const* edge_best=0;
for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]];
+ const HG::Edge& edge = hg.edges_[cur_node.in_edges_[j]];
WeightType score = weight(edge);
for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
score *= vit_weight[edge.tail_nodes_[k]];
@@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg,
}
}
assert(edge_best);
- Hypergraph::Edge const& edgeb=*edge_best;
+ HG::Edge const& edgeb=*edge_best;
std::vector<const T*> antsb(edgeb.tail_nodes_.size());
for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k)
antsb[k] = &vit_result[edgeb.tail_nodes_[k]];
@@ -98,7 +98,7 @@ prob_t Viterbi(const Hypergraph& hg,
struct PathLengthTraversal {
typedef int Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const int*>& ants,
int* result) const {
(void) edge;
@@ -109,7 +109,7 @@ struct PathLengthTraversal {
struct ESentenceTraversal {
typedef std::vector<WordID> Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const Result*>& ants,
Result* result) const {
edge.rule_->ESubstitute(ants, result);
@@ -118,7 +118,7 @@ struct ESentenceTraversal {
struct ELengthTraversal {
typedef int Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const int*>& ants,
int* result) const {
*result = edge.rule_->ELength() - edge.rule_->Arity();
@@ -128,7 +128,7 @@ struct ELengthTraversal {
struct FSentenceTraversal {
typedef std::vector<WordID> Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const Result*>& ants,
Result* result) const {
edge.rule_->FSubstitute(ants, result);
@@ -142,7 +142,7 @@ struct ETreeTraversal {
const std::string space;
const std::string right;
typedef std::vector<WordID> Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const Result*>& ants,
Result* result) const {
Result tmp;
@@ -162,7 +162,7 @@ struct FTreeTraversal {
const std::string space;
const std::string right;
typedef std::vector<WordID> Result;
- void operator()(const Hypergraph::Edge& edge,
+ void operator()(const HG::Edge& edge,
const std::vector<const Result*>& ants,
Result* result) const {
Result tmp;
@@ -177,8 +177,8 @@ struct FTreeTraversal {
};
struct ViterbiPathTraversal {
- typedef std::vector<Hypergraph::Edge const*> Result;
- void operator()(const Hypergraph::Edge& edge,
+ typedef std::vector<HG::Edge const*> Result;
+ void operator()(const HG::Edge& edge,
std::vector<Result const*> const& ants,
Result* result) const {
for (unsigned i = 0; i < ants.size(); ++i)
@@ -189,8 +189,8 @@ struct ViterbiPathTraversal {
};
struct FeatureVectorTraversal {
- typedef FeatureVector Result;
- void operator()(Hypergraph::Edge const& edge,
+ typedef SparseVector<double> Result;
+ void operator()(HG::Edge const& edge,
std::vector<Result const*> const& ants,
Result* result) const {
for (unsigned i = 0; i < ants.size(); ++i)
@@ -210,6 +210,6 @@ int ViterbiELength(const Hypergraph& hg);
int ViterbiPathLength(const Hypergraph& hg);
/// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not). return features (sum over all edges in viterbi derivation)
-FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);
+SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);
#endif