From 4d48a6d19521b24d9ac0987ce9a472d9ba574c4b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 24 Jun 2012 16:42:56 +0200 Subject: RuleTargetBigramFeatures, parallelize.rb --- decoder/cdec_ff.cc | 3 ++- decoder/ff_rules.cc | 48 +++++++++++++++++++++++++++++++++++++++++++----- decoder/ff_rules.h | 19 +++++++++++++++++-- 3 files changed, 62 insertions(+), 8 deletions(-) (limited to 'decoder') diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b516c386..d64bdada 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -47,8 +47,9 @@ void register_feature_functions() { ff_registry.Register("RuleIdentityFeatures", new FFFactory()); ff_registry.Register("SourceSyntaxFeatures", new FFFactory); ff_registry.Register("SourceSpanSizeFeatures", new FFFactory); - ff_registry.Register("RuleNgramFeatures", new FFFactory()); ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory()); + ff_registry.Register("RuleSourceBigramFeatures", new FFFactory()); + ff_registry.Register("RuleTargetBigramFeatures", new FFFactory()); ff_registry.Register("KLanguageModel", new KLanguageModelFactory()); ff_registry.Register("NonLatinCount", new FFFactory); ff_registry.Register("RuleShape", new FFFactory); diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index bd4c4cc0..3d0e514a 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -66,15 +66,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->add_value(it->second, 1); } -RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) { } -void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { // std::map > rule2_feats_.clear(); } -void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const vector& ant_contexts, SparseVector* features, @@ -92,14 +92,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, assert(w > 0); const string& cur = TD::Convert(w); ostringstream os; - os << "RB:" << prev << '_' << cur; + os << "RBS:" << prev << '_' << cur; const int fid = FD::Convert(Escape(os.str())); if (fid <= 0) return; f.add_value(fid, 1.0); prev = cur; } ostringstream os; - os << "RB:" << prev << '_' << ""; + os << "RBS:" << prev << '_' << ""; + f.set_value(FD::Convert(Escape(os.str())), 1.0); + } + (*features) += it->second; +} + +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) { +} + +void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { + rule2_feats_.clear(); +} + +void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + map >::iterator it = rule2_feats_.find(edge.rule_.get()); + if (it == rule2_feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2_feats_.insert(make_pair(&rule, SparseVector())).first; + SparseVector& f = it->second; + string prev = ""; + for (int i = 0; i < rule.e_.size(); ++i) { + WordID w = rule.e_[i]; + if (w < 0) w = -w; + if (w == 0) return; + const string& cur = TD::Convert(w); + ostringstream os; + os << "RBT:" << prev << '_' << cur; + const int fid = FD::Convert(Escape(os.str())); + if (fid <= 0) return; + f.add_value(fid, 1.0); + prev = cur; + } + ostringstream os; + os << "RBT:" << prev << '_' << ""; f.set_value(FD::Convert(Escape(os.str())), 1.0); } (*features) += it->second; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..08b168b0 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -22,9 +22,24 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map rule2_fid_; }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction { public: - RuleNgramFeatures(const std::string& param); + RuleSourceBigramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + mutable std::map > rule2_feats_; +}; + +class RuleTargetBigramFeatures : public FeatureFunction { + public: + RuleTargetBigramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, -- cgit v1.2.3 From 0c54220adfaada6ad1e2d54f31a9895da35127fd Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 5 Nov 2012 18:57:39 +0100 Subject: build fix, default learning rate --- decoder/ff_rules.h | 1 + dtrain/dtrain.cc | 4 +- dtrain/dtrain.h | 3 +- dtrain/test/example/dtrain.ini | 8 +-- dtrain/test/example/expected-output | 128 ++++++++++++++---------------------- 5 files changed, 59 insertions(+), 85 deletions(-) (limited to 'decoder') diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index dc9a15d5..b100ec34 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -5,6 +5,7 @@ #include #include "trule.h" #include "ff.h" +#include "hg.h" #include "array2d.h" #include "wordid.h" diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index b7a4bb6f..18286668 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -24,13 +24,13 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("pair_threshold", po::value()->default_value(0.), "bleu [0,1] threshold to filter pairs") ("N", po::value()->default_value(4), "N for Ngrams (BLEU)") ("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_, lc_") - ("learning_rate", po::value()->default_value(0.0001), "learning rate") + ("learning_rate", po::value()->default_value(1.0), "learning rate") ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") ("select_weights", po::value()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") ("rescale", po::value()->zero_tokens(), "rescale weight vector after each input") ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") ("l1_reg_strength", po::value(), "l1 regularization strength") - ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPL") // TODO + ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 7e084a79..4b6f415c 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -3,7 +3,7 @@ #undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs // DO NOT USE WITH SVM! -#define DTRAIN_LOCAL +//#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" #define DTRAIN_SCALE 100000 @@ -22,7 +22,6 @@ #include "filelib.h" - using namespace std; using namespace dtrain; namespace po = boost::program_options; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 8338b2d3..72d50ca1 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,18 +1,18 @@ input=test/example/nc-wmt11.1k.gz # use '-' for STDIN output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights +select_weights=VOID # don't output weights decoder_config=test/example/cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=100 # stop epoch after 10 inputs +stop_after=10 # stop epoch after 10 inputs # interesting stuff -epochs=3 # run over input 3 times +epochs=2 # run over input 2 times k=100 # use 100best lists N=4 # optimize (approx) BLEU4 scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=0.0001 # learning rate +learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) gamma=0 # use SVM reg sample_from=kbest # use kbest lists (as opposed to forest) filter=uniq # only unique entries in kbest (surface form) diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output index 43798484..05326763 100644 --- a/dtrain/test/example/expected-output +++ b/dtrain/test/example/expected-output @@ -4,17 +4,17 @@ Reading test/example/nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2108658507 +Seeding random number sequence to 2912000813 dtrain Parameters: k 100 N 4 - T 3 + T 2 scorer 'stupid_bleu' sample from 'kbest' filter 'uniq' - learning rate 0.0001 + learning rate 1 gamma 0 loss margin 0 pairs 'XYX' @@ -26,90 +26,64 @@ Parameters: cdec cfg 'test/example/cdec.ini' input 'test/example/nc-wmt11.1k.gz' output '-' - stop_after 100 + stop_after 10 (a dot represents 10 inputs) -Iteration #1 of 3. - .......... 100 -Stopping after 100 input sentences. +Iteration #1 of 2. + . 10 +Stopping after 10 input sentences. WEIGHTS - Glue = -0.236 - WordPenalty = +0.056111 - LanguageModel = +0.71011 - LanguageModel_OOV = -0.489 - PhraseModel_0 = -0.21332 - PhraseModel_1 = -0.13038 - PhraseModel_2 = +0.085148 - PhraseModel_3 = -0.16982 - PhraseModel_4 = -0.026332 - PhraseModel_5 = +0.2133 - PhraseModel_6 = +0.1002 - PassThrough = -0.5541 + Glue = -637 + WordPenalty = +1064 + LanguageModel = +1175.3 + LanguageModel_OOV = -1437 + PhraseModel_0 = +1935.6 + PhraseModel_1 = +2499.3 + PhraseModel_2 = +964.96 + PhraseModel_3 = +1410.8 + PhraseModel_4 = -5977.9 + PhraseModel_5 = +522 + PhraseModel_6 = +1089 + PassThrough = -1308 --- - 1best avg score: 0.16928 (+0.16928) - 1best avg model score: 2.4454 (+2.4454) - avg # pairs: 1616.2 - avg # rank err: 769.6 + 1best avg score: 0.16963 (+0.16963) + 1best avg model score: 64485 (+64485) + avg # pairs: 1494.4 + avg # rank err: 702.6 avg # margin viol: 0 - non0 feature count: 4068 - avg list sz: 96.65 - avg f count: 118.01 -(time 1.3 min, 0.79 s/S) + non0 feature count: 528 + avg list sz: 85.7 + avg f count: 102.75 +(time 0.083 min, 0.5 s/S) -Iteration #2 of 3. - .......... 100 +Iteration #2 of 2. + . 10 WEIGHTS - Glue = -0.1721 - WordPenalty = -0.14132 - LanguageModel = +0.56023 - LanguageModel_OOV = -0.6786 - PhraseModel_0 = +0.14155 - PhraseModel_1 = +0.34218 - PhraseModel_2 = +0.22954 - PhraseModel_3 = -0.24762 - PhraseModel_4 = -0.25848 - PhraseModel_5 = -0.0453 - PhraseModel_6 = -0.0264 - PassThrough = -0.7436 + Glue = -1196 + WordPenalty = +809.52 + LanguageModel = +3112.1 + LanguageModel_OOV = -1464 + PhraseModel_0 = +3895.5 + PhraseModel_1 = +4683.4 + PhraseModel_2 = +1092.8 + PhraseModel_3 = +1079.6 + PhraseModel_4 = -6827.7 + PhraseModel_5 = -888 + PhraseModel_6 = +142 + PassThrough = -1335 --- - 1best avg score: 0.19585 (+0.02657) - 1best avg model score: -16.311 (-18.757) - avg # pairs: 1475.8 - avg # rank err: 668.48 + 1best avg score: 0.277 (+0.10736) + 1best avg model score: -3110.5 (-67595) + avg # pairs: 1144.2 + avg # rank err: 529.1 avg # margin viol: 0 - non0 feature count: 6300 - avg list sz: 96.08 - avg f count: 114.92 -(time 1.3 min, 0.76 s/S) - -Iteration #3 of 3. - .......... 100 -WEIGHTS - Glue = -0.1577 - WordPenalty = -0.086902 - LanguageModel = +0.30136 - LanguageModel_OOV = -0.7848 - PhraseModel_0 = +0.11743 - PhraseModel_1 = +0.11142 - PhraseModel_2 = -0.0053865 - PhraseModel_3 = -0.18731 - PhraseModel_4 = -0.67144 - PhraseModel_5 = +0.1236 - PhraseModel_6 = -0.2665 - PassThrough = -0.8498 - --- - 1best avg score: 0.20034 (+0.0044978) - 1best avg model score: -7.2775 (+9.0336) - avg # pairs: 1578.6 - avg # rank err: 705.77 - avg # margin viol: 0 - non0 feature count: 7313 - avg list sz: 96.84 - avg f count: 124.48 -(time 1.5 min, 0.9 s/S) + non0 feature count: 859 + avg list sz: 74.9 + avg f count: 112.84 +(time 0.067 min, 0.4 s/S) Writing weights file to '-' ... done --- -Best iteration: 3 [SCORE 'stupid_bleu'=0.20034]. -This took 4.0833 min. +Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. +This took 0.15 min. -- cgit v1.2.3