From 6b6a2d966a0d341fe5abee8b332a9d89f6c95bc4 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 13 Jan 2014 17:15:24 +0100 Subject: Felix' https://github.com/felleh lexical word alignment features --- decoder/Makefile.am | 1 + decoder/cdec_ff.cc | 2 + decoder/ff_lexical.h | 128 ++++++++++++++++++++++ training/dtrain/examples/standard/cdec.ini | 3 +- training/dtrain/examples/standard/expected-output | 115 +++++++++---------- 5 files changed, 191 insertions(+), 58 deletions(-) create mode 100644 decoder/ff_lexical.h diff --git a/decoder/Makefile.am b/decoder/Makefile.am index b735756d..c0371081 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -48,6 +48,7 @@ libcdec_a_SOURCES = \ ff_external.h \ ff_factory.h \ ff_klm.h \ + ff_lexical.h \ ff_lm.h \ ff_ngrams.h \ ff_parse_match.h \ diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b2541722..80b42d22 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -24,6 +24,7 @@ #include "ff_charset.h" #include "ff_wordset.h" #include "ff_external.h" +#include "ff_lexical.h" void register_feature_functions() { @@ -39,6 +40,7 @@ void register_feature_functions() { RegisterFF(); RegisterFF(); RegisterFF(); + RegisterFF(); //TODO: use for all features the new Register which requires static FF::usage(false,false) give name ff_registry.Register("SpanFeatures", new FFFactory()); diff --git a/decoder/ff_lexical.h b/decoder/ff_lexical.h new file mode 100644 index 00000000..21c85b27 --- /dev/null +++ b/decoder/ff_lexical.h @@ -0,0 +1,128 @@ +#ifndef FF_LEXICAL_H_ +#define FF_LEXICAL_H_ + +#include +#include +#include "trule.h" +#include "ff.h" +#include "hg.h" +#include "array2d.h" +#include "wordid.h" +#include +#include +#include + +#include "filelib.h" +#include "stringlib.h" +#include "sentence_metadata.h" +#include "lattice.h" +#include "fdict.h" +#include "verbose.h" +#include "tdict.h" +#include "hg.h" + +using namespace std; + +namespace { + string Escape(const string& x) { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; + } + return y; + } +} + +class LexicalFeatures : public FeatureFunction { +public: + LexicalFeatures(const std::string& param) { + if (param.empty()) { + cerr << "LexicalFeatures: using T,D,I\n"; + T_ = true; I_ = true; D_ = true; + } else { + const vector argv = SplitOnWhitespace(param); + assert(argv.size() == 3); + T_ = (bool) atoi(argv[0].c_str()); + I_ = (bool) atoi(argv[1].c_str()); + D_ = (bool) atoi(argv[2].c_str()); + cerr << "T=" << T_ << " I=" << I_ << " D=" << D_ << endl; + } + }; + static std::string usage(bool p,bool d) { + return usage_helper("LexicalFeatures","[0/1 0/1 0/1]","Sparse lexical word translation indicator features. If arguments are supplied, specify like this: translations insertions deletions",p,d); + } +protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); +private: + mutable std::map > rule2feats_; + bool T_; + bool I_; + bool D_; +}; + +void LexicalFeatures::PrepareForInput(const SentenceMetadata& smeta) { + rule2feats_.clear(); // std::map > +} + +void LexicalFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + + map >::iterator it = rule2feats_.find(edge.rule_.get()); + if (it == rule2feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2feats_.insert(make_pair(&rule, SparseVector())).first; + SparseVector& f = it->second; + std::vector sf(edge.rule_->FLength(),false); // stores if source tokens are visited by alignment points + std::vector se(edge.rule_->ELength(),false); // stores if target tokens are visited by alignment points + int fid = 0; + // translations + for (unsigned i=0;i 0) {// if not visited and is terminal + ostringstream os; + os << "LD:" << Escape(TD::Convert(rule.f_[i])); + fid = FD::Convert(os.str()); + if (fid <= 0) continue; + if (D_) + f.add_value(fid, 1.0); + } + } + // word insertions + for (unsigned i=0;i= 1) {// if not visited and is terminal + ostringstream os; + os << "LI:" << Escape(TD::Convert(rule.e_[i])); + fid = FD::Convert(os.str()); + if (fid <= 0) continue; + if (I_) + f.add_value(fid, 1.0); + } + } + } + (*features) += it->second; +} + + +#endif diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini index 6cba9e1e..044ae2f5 100644 --- a/training/dtrain/examples/standard/cdec.ini +++ b/training/dtrain/examples/standard/cdec.ini @@ -21,7 +21,8 @@ feature_function=RuleIdentityFeatures feature_function=RuleSourceBigramFeatures feature_function=RuleTargetBigramFeatures feature_function=RuleShape -feature_function=RuleWordAlignmentFeatures +#feature_function=RuleWordAlignmentFeatures +feature_function=LexicalFeatures 1 1 1 #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty #feature_function=SpanFeatures diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output index fa831221..2460cfbb 100644 --- a/training/dtrain/examples/standard/expected-output +++ b/training/dtrain/examples/standard/expected-output @@ -4,7 +4,8 @@ Reading ./nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** Example feature: Shape_S00000_T00000 -Seeding random number sequence to 4138446869 +T=1 I=1 D=1 +Seeding random number sequence to 2327685089 dtrain Parameters: @@ -36,87 +37,87 @@ Iteration #1 of 3. . 10 Stopping after 10 input sentences. WEIGHTS - Glue = -80.3 - WordPenalty = -51.247 - LanguageModel = +282.46 - LanguageModel_OOV = -85.8 - PhraseModel_0 = -100.06 - PhraseModel_1 = -98.692 - PhraseModel_2 = -9.4958 - PhraseModel_3 = +18.535 - PhraseModel_4 = +62.35 - PhraseModel_5 = +7 - PhraseModel_6 = +31.4 - PassThrough = -126.5 + Glue = +6.9 + WordPenalty = -46.426 + LanguageModel = +535.12 + LanguageModel_OOV = -123.5 + PhraseModel_0 = -160.73 + PhraseModel_1 = -350.13 + PhraseModel_2 = -187.81 + PhraseModel_3 = +172.04 + PhraseModel_4 = +0.90108 + PhraseModel_5 = +21.6 + PhraseModel_6 = +67.2 + PassThrough = -149.7 --- - 1best avg score: 0.25631 (+0.25631) - 1best avg model score: -4843.6 (-4843.6) - avg # pairs: 744.4 + 1best avg score: 0.23327 (+0.23327) + 1best avg model score: -9084.9 (-9084.9) + avg # pairs: 780.7 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 1274 + non0 feature count: 1389 avg list sz: 91.3 - avg f count: 143.72 -(time 0.4 min, 2.4 s/S) + avg f count: 146.2 +(time 0.37 min, 2.2 s/S) Iteration #2 of 3. . 10 WEIGHTS - Glue = -117.4 - WordPenalty = -99.584 - LanguageModel = +395.05 - LanguageModel_OOV = -136.8 - PhraseModel_0 = +40.614 - PhraseModel_1 = -123.29 - PhraseModel_2 = -152 - PhraseModel_3 = -161.13 - PhraseModel_4 = -76.379 - PhraseModel_5 = +39.1 - PhraseModel_6 = +137.7 - PassThrough = -162.1 + Glue = -43 + WordPenalty = -22.019 + LanguageModel = +591.53 + LanguageModel_OOV = -252.1 + PhraseModel_0 = -120.21 + PhraseModel_1 = -43.589 + PhraseModel_2 = +73.53 + PhraseModel_3 = +113.7 + PhraseModel_4 = -223.81 + PhraseModel_5 = +64 + PhraseModel_6 = +54.8 + PassThrough = -331.1 --- - 1best avg score: 0.26751 (+0.011198) - 1best avg model score: -10061 (-5216.9) - avg # pairs: 639.1 + 1best avg score: 0.29568 (+0.062413) + 1best avg model score: -15879 (-6794.1) + avg # pairs: 566.1 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 1845 + non0 feature count: 1931 avg list sz: 91.3 - avg f count: 139.88 -(time 0.35 min, 2.1 s/S) + avg f count: 139.89 +(time 0.33 min, 2 s/S) Iteration #3 of 3. . 10 WEIGHTS - Glue = -101.1 - WordPenalty = -139.97 - LanguageModel = +327.98 - LanguageModel_OOV = -234.7 - PhraseModel_0 = -144.49 - PhraseModel_1 = -263.88 - PhraseModel_2 = -149.25 - PhraseModel_3 = -38.805 - PhraseModel_4 = +50.575 - PhraseModel_5 = -52.4 - PhraseModel_6 = +41.6 - PassThrough = -230.2 + Glue = -44.3 + WordPenalty = -131.85 + LanguageModel = +230.91 + LanguageModel_OOV = -285.4 + PhraseModel_0 = -194.27 + PhraseModel_1 = -294.83 + PhraseModel_2 = -92.043 + PhraseModel_3 = -140.24 + PhraseModel_4 = +85.613 + PhraseModel_5 = +238.1 + PhraseModel_6 = +158.7 + PassThrough = -359.6 --- - 1best avg score: 0.36222 (+0.094717) - 1best avg model score: -17416 (-7355.5) - avg # pairs: 661.2 + 1best avg score: 0.37375 (+0.078067) + 1best avg model score: -14519 (+1359.7) + avg # pairs: 545.4 avg # rank err: 0 (meaningless) avg # margin viol: 0 k-best loss imp: 100% - non0 feature count: 2163 + non0 feature count: 2218 avg list sz: 91.3 - avg f count: 132.53 -(time 0.33 min, 2 s/S) + avg f count: 137.77 +(time 0.35 min, 2.1 s/S) Writing weights file to '-' ... done --- -Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.36222]. -This took 1.0833 min. +Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375]. +This took 1.05 min. -- cgit v1.2.3 From a1f3f7fb262b6efefb106a7bddaf81d2a3a6df93 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 13 Jan 2014 18:30:58 +0100 Subject: remove duplicate word alignment features --- decoder/cdec_ff.cc | 1 - decoder/ff_rules.cc | 22 ---------------------- decoder/ff_rules.h | 13 ------------- training/dtrain/examples/standard/cdec.ini | 1 - 4 files changed, 37 deletions(-) diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 80b42d22..8689a615 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -47,7 +47,6 @@ void register_feature_functions() { ff_registry.Register("NgramFeatures", new FFFactory()); ff_registry.Register("RuleContextFeatures", new FFFactory()); ff_registry.Register("RuleIdentityFeatures", new FFFactory()); - ff_registry.Register("RuleWordAlignmentFeatures", new FFFactory()); ff_registry.Register("ParseMatchFeatures", new FFFactory); ff_registry.Register("SoftSyntaxFeatures", new FFFactory); ff_registry.Register("SoftSyntaxFeaturesMindist", new FFFactory); diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index 7bccf084..9533caed 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -69,28 +69,6 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->add_value(it->second, 1); } -RuleWordAlignmentFeatures::RuleWordAlignmentFeatures(const std::string& param) { -} - -void RuleWordAlignmentFeatures::PrepareForInput(const SentenceMetadata& smeta) { -} - -void RuleWordAlignmentFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* context) const { - const TRule& rule = *edge.rule_; - ostringstream os; - vector als = rule.als(); - std::vector::const_iterator xx = als.begin(); - for (; xx != als.end(); ++xx) { - os << "WA:" << TD::Convert(rule.f_[xx->s_]) << ":" << TD::Convert(rule.e_[xx->t_]); - } - features->add_value(FD::Convert(Escape(os.str())), 1); -} - RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) { } diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 324d7a39..f210dc65 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -24,19 +24,6 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map rule2_fid_; }; -class RuleWordAlignmentFeatures : public FeatureFunction { - public: - RuleWordAlignmentFeatures(const std::string& param); - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const HG::Edge& edge, - const std::vector& ant_contexts, - SparseVector* features, - SparseVector* estimated_features, - void* context) const; - virtual void PrepareForInput(const SentenceMetadata& smeta); -}; - class RuleSourceBigramFeatures : public FeatureFunction { public: RuleSourceBigramFeatures(const std::string& param); diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini index 044ae2f5..3330dd71 100644 --- a/training/dtrain/examples/standard/cdec.ini +++ b/training/dtrain/examples/standard/cdec.ini @@ -21,7 +21,6 @@ feature_function=RuleIdentityFeatures feature_function=RuleSourceBigramFeatures feature_function=RuleTargetBigramFeatures feature_function=RuleShape -#feature_function=RuleWordAlignmentFeatures feature_function=LexicalFeatures 1 1 1 #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty -- cgit v1.2.3 From 1b0d40959f529b67db3b9d10dbf93101e0c65c7c Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 27 Jan 2014 10:40:14 +0100 Subject: verbose parameter for mira (thanks Felix!) --- training/mira/kbest_cut_mira.cc | 9 ++++++--- training/mira/mira.py | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 990609d7..9415909e 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -95,7 +95,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("stream,t", "Stream mode (used for realtime)") ("weights_output,O",po::value(),"Directory to write weights to") ("output_dir,D",po::value(),"Directory to place output in") - ("decoder_config,c",po::value(),"Decoder configuration file"); + ("decoder_config,c",po::value(),"Decoder configuration file") + ("verbose,v",po::value()->zero_tokens(),"verbose stderr output"); po::options_description clo("Command line options"); clo.add_options() ("config", po::value(), "Configuration file") @@ -627,6 +628,7 @@ int main(int argc, char** argv) { vector corpus; + const bool VERBOSE = conf.count("verbose"); const string metric_name = conf["mt_metric"].as(); optimizer = conf["optimizer"].as(); fear_select = conf["fear"].as(); @@ -790,7 +792,8 @@ int main(int argc, char** argv) { double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights); double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric); const double loss = margin + mt_loss; - cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) < 0.0 || !checkloss) { SparseVector diff = cur_good.features; diff -= cur_bad.features; @@ -928,7 +931,7 @@ int main(int argc, char** argv) { lambdas += (cur_pair[1]->features) * step_size; lambdas -= (cur_pair[0]->features) * step_size; - cerr << " Lambdas " << lambdas << endl; + if (VERBOSE) cerr << " Lambdas " << lambdas << endl; //reload weights based on update dense_weights.clear(); diff --git a/training/mira/mira.py b/training/mira/mira.py index d5a1d9f8..1555cbb4 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -143,6 +143,8 @@ def main(): parser.add_argument('--pass-suffix', help='multipass decoding iteration. see documentation ' 'at www.cdec-decoder.org for more information') + parser.add_argument('-v', '--verbose', + help='more verbose mira optimizers') args = parser.parse_args() args.metric = args.metric.upper() @@ -352,6 +354,8 @@ def optimize(args, script_dir, dev_size): decoder_cmd += ' -a' if not args.no_pseudo: decoder_cmd += ' -e' + if args.verbose: + decoder_cmd += ' -v' #always use fork parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( -- cgit v1.2.3 From d7df7aa6ce149f27267082e367ee790524550540 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 18 Jul 2014 10:57:56 +0200 Subject: fix --- training/dtrain/dtrain.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index b01cf421..ccb50af2 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -438,7 +438,7 @@ main(int argc, char** argv) score_t model_diff = it->first.model - it->second.model; score_t loss = max(0.0, -1.0 * model_diff); - if (check && ki == 1) cout << losses[pair_idx] - loss << endl; + if (check && ki==repeat-1) cout << losses[pair_idx] - loss << endl; pair_idx++; if (repeat > 1) { @@ -455,7 +455,7 @@ main(int argc, char** argv) margin = fabs(model_diff); if (!rank_error && margin < loss_margin) margin_violations++; } - if (rank_error && ki==1) rank_errors++; + if (rank_error && ki==0) rank_errors++; if (scale_bleu_diff) eta = it->first.score - it->second.score; if (rank_error || margin < loss_margin) { SparseVector diff_vec = it->first.f - it->second.f; -- cgit v1.2.3 From f5bf8c9da47c72c0059cf8bfd6cbe07360893b92 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 22 Jul 2014 11:24:34 +0200 Subject: fix parallelize.rb --- training/dtrain/parallelize.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 60ca9422..82600009 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -26,7 +26,6 @@ opts = Trollop::options do end usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] - dtrain_dir = File.expand_path File.dirname(__FILE__) if not opts[:dtrain_binary] dtrain_bin = "#{dtrain_dir}/dtrain" @@ -56,6 +55,7 @@ refs = opts[:references] use_qsub = opts[:qsub] shards_at_once = opts[:processes_at_once] first_input_weights = opts[:first_input_weights] +opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!="" `mkdir work` @@ -64,8 +64,9 @@ def make_shards(input, refs, num_shards, epoch, rand) index = (0..lc-1).to_a index.reverse! index.shuffle! if rand - shard_sz = lc / num_shards - leftover = lc % num_shards + shard_sz = (lc / num_shards.to_f).round 0 + leftover = lc - (num_shards*shard_sz) + leftover = 0 if leftover < 0 in_f = File.new input, 'r' in_lines = in_f.readlines refs_f = File.new refs, 'r' @@ -74,7 +75,10 @@ def make_shards(input, refs, num_shards, epoch, rand) shard_refs_files = [] in_fns = [] refs_fns = [] + new_num_shards = 0 0.upto(num_shards-1) { |shard| + break if index.size==0 + new_num_shards += 1 in_fn = "work/shard.#{shard}.#{epoch}.in" shard_in = File.new in_fn, 'w+' in_fns << in_fn @@ -98,7 +102,7 @@ def make_shards(input, refs, num_shards, epoch, rand) (shard_in_files + shard_refs_files).each do |f| f.close end in_f.close refs_f.close - return [in_fns, refs_fns] + return in_fns, refs_fns, new_num_shards end input_files = [] @@ -111,7 +115,7 @@ if predefined_shards end num_shards = input_files.size else - input_files, refs_files = make_shards input, refs, num_shards, 0, rand + input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand end 0.upto(epochs-1) { |epoch| @@ -158,7 +162,7 @@ end `#{cat} work/weights.*.#{epoch} > work/weights_cat` `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` if rand and reshard and epoch+1!=epochs - input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand + input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand end } -- cgit v1.2.3 From ba5ebf19c936ba64032120bf3c4c0df1f3e1481e Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 22 Jul 2014 11:25:42 +0200 Subject: Heidelberg LocalConfig.pm --- environment/LocalConfig.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 4fa0ab74..29a1cbea 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -69,10 +69,10 @@ my $CCONFIG = { # 'DefaultJobs' => 12, # }, 'cluster.cl.uni-heidelberg.de' => { - 'HOST_REGEXP' => qr/node25/, + 'HOST_REGEXP' => qr/(node\d\d\.cluster\.lan|cluster\.cl\.uni-heidelberg\.de)/i, 'JobControl' => 'qsub', - 'QSubMemFlag' => '-l h_vmem=', - 'DefaultJobs' => 13, + 'QSubMemFlag' => '-l mem_free=', + 'DefaultJobs' => 14, }, 'LOCAL' => { # LOCAL must be last in the list!!! 'HOST_REGEXP' => qr//, -- cgit v1.2.3 From 7201ef7d46e39f42923d89688a1cb82c757abe61 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 22 Jul 2014 11:40:59 +0200 Subject: mira qsub --- training/mira/mira.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/training/mira/mira.py b/training/mira/mira.py index 691a62a6..ec9c2d64 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -143,6 +143,10 @@ def main(): parser.add_argument('--pass-suffix', help='multipass decoding iteration. see documentation ' 'at www.cdec-decoder.org for more information') + parser.add_argument('--qsub', + help='use qsub', action='store_true') + parser.add_argument('--pmem', + help='memory for qsub', type=str, default='5G') parser.add_argument('-v', '--verbose', help='more verbose mira optimizers') args = parser.parse_args() @@ -317,6 +321,8 @@ def split_devset(dev, outdir): def optimize(args, script_dir, dev_size): parallelize = script_dir+'/../utils/parallelize.pl' + if args.qsub: + parallelize += " -p %s"%args.pmem decoder = script_dir+'/kbest_cut_mira' (source, refs) = split_devset(args.devset, args.output_dir) port = random.randint(15000,50000) @@ -358,9 +364,12 @@ def optimize(args, script_dir, dev_size): if args.verbose: decoder_cmd += ' -v' - #always use fork - parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( - parallelize, logdir, args.jobs) + if args.qsub: + parallel_cmd = '{0} -e {1} -j {2} --'.format( + parallelize, logdir, args.jobs) + else: + parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( + parallelize, logdir, args.jobs) cmd = parallel_cmd + ' ' + decoder_cmd logging.info('OPTIMIZATION COMMAND: {}'.format(cmd)) -- cgit v1.2.3 From a4b0e95574df5e7895ab19975412a5d883008b32 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 22 Jul 2014 11:41:09 +0200 Subject: pro fix --- training/pro/pro.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/training/pro/pro.pl b/training/pro/pro.pl index 3b30c379..a059477d 100755 --- a/training/pro/pro.pl +++ b/training/pro/pro.pl @@ -79,6 +79,7 @@ if (GetOptions( "help" => \$help, "reg=f" => \$reg, "reg-previous=f" => \$reg_previous, + "pmem=s" => \$pmem, "output-dir=s" => \$dir, ) == 0 || @ARGV!=0 || $help) { print_help(); -- cgit v1.2.3 From b26d2289e81266ed4b8ba069bfbeab77ede2923e Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 22 Jul 2014 11:46:41 +0200 Subject: ignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index a3f979db..72f4997c 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ decoder/logval_test decoder/parser_test decoder/rule_lexer.cc decoder/small_vector_test +decoder/t2s_test decoder/trule_test decoder/weights_test depcomp @@ -74,6 +75,7 @@ extractor/*_test extractor/compile extractor/extract extractor/run_extractor +extractor/sacompile gi/clda/src/clda gi/markov_al/ml gi/pf/align-lexonly @@ -209,6 +211,7 @@ training/dpmert/sentserver training/dtrain/dtrain training/latent_svm/latent_svm training/minrisk/minrisk_optimize +training/mira/ada_opt_sm training/mira/kbest_mira training/mira/kbest_cut_mira training/pro/mr_pro_map @@ -218,4 +221,6 @@ training/utils/lbfgs_test training/utils/optimize_test training/utils/sentclient training/utils/sentserver +utils/stringlib_test +word-aligner/binderiv word-aligner/fast_align -- cgit v1.2.3 From 7cdaf68c0189b9ad0e4ba4482e4c75d7643ba982 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 29 Jul 2014 15:02:22 +0200 Subject: make PassThrough1..N optional --- decoder/decoder.cc | 1 + decoder/scfg_translator.cc | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 6783cad0..2c044192 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -366,6 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("beam_prune3", po::value(), "Optional pass 3") ("add_pass_through_rules,P","Add rules to translate OOV words as themselves") + ("add_extra_pass_through_features,Q",po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") ("k_best,k",po::value(),"Extract the k best derivations") ("unique_k_best,r", "Unique k-best translation list") ("aligner,a", "Run as a word/phrase aligner (src & ref required)") diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 88f62769..57ad85e2 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -28,7 +28,7 @@ struct GlueGrammar : public TextGrammar { }; struct PassThroughGrammar : public TextGrammar { - PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0, const unsigned int num_pt_features=0); virtual bool HasRuleForSpan(int i, int j, int distance) const; }; @@ -56,7 +56,7 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { return (i == 0); } -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level, const unsigned num_pt_features) { unordered_set ss; for (int i = 0; i < input.size(); ++i) { const vector& alts = input[i]; @@ -64,12 +64,17 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; - if (length > 6) length = 6; - string len_feat = "PassThrough_0=1"; - len_feat[12] += length; - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); - pt->a_.push_back(AlignmentPoint(0,0)); + TRulePtr pt; + if (num_pt_features > 0) { + int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; + if (length > num_pt_features) length = num_pt_features; + string len_feat = "PassThrough_0=1"; + len_feat[12] += length; + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); + } else { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 ")); + pt->a_.push_back(AlignmentPoint(0,0)); + } AddRule(pt); RefineRule(pt, ctf_level); ss.insert(alts[k].label); @@ -86,6 +91,7 @@ struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as()), add_pass_through_rules(conf.count("add_pass_through_rules")), + num_pt_features(conf["add_pass_through_features"].as()), goal(conf["goal"].as()), default_nt(conf["scfg_default_nt"].as()), use_ctf_(conf.count("coarse_to_fine_beam_prune")) @@ -140,6 +146,7 @@ struct SCFGTranslatorImpl { const int max_span_limit; const bool add_pass_through_rules; + const unsigned int num_pt_features; const string goal; const string default_nt; const bool use_ctf_; @@ -187,7 +194,7 @@ struct SCFGTranslatorImpl { smeta->SetSourceLength(lattice.size()); if (add_pass_through_rules){ if (!SILENT) cerr << "Adding pass through grammar" << endl; - PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_); + PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features); g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } -- cgit v1.2.3 From ddb5935946b04fa85c205abc6140861f6fcf6c41 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 29 Jul 2014 15:02:43 +0200 Subject: fix --- training/dtrain/dtrain.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index eb23b813..07bd9b65 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -116,11 +116,11 @@ inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } inline ostream& _p2(ostream& out) { return out << setprecision(2); } inline ostream& _p5(ostream& out) { return out << setprecision(5); } -inline void printWordIDVec(vector& v) +inline void printWordIDVec(vector& v, ostream& os=cerr) { for (unsigned i = 0; i < v.size(); i++) { - cerr << TD::Convert(v[i]); - if (i < v.size()-1) cerr << " "; + os << TD::Convert(v[i]); + if (i < v.size()-1) os << " "; } } -- cgit v1.2.3 From 014714dfd7d19a9d84bfccf48f71502ba0e7024d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 2 Aug 2014 14:11:14 +0200 Subject: fix --- decoder/decoder.cc | 2 +- decoder/scfg_translator.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 2c044192..081da8d6 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("beam_prune3", po::value(), "Optional pass 3") ("add_pass_through_rules,P","Add rules to translate OOV words as themselves") - ("add_extra_pass_through_features,Q",po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") + ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") ("k_best,k",po::value(),"Extract the k best derivations") ("unique_k_best,r", "Unique k-best translation list") ("aligner,a", "Run as a word/phrase aligner (src & ref required)") diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 57ad85e2..0c3e4cf7 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -91,7 +91,7 @@ struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as()), add_pass_through_rules(conf.count("add_pass_through_rules")), - num_pt_features(conf["add_pass_through_features"].as()), + num_pt_features(conf["add_extra_pass_through_features"].as()), goal(conf["goal"].as()), default_nt(conf["scfg_default_nt"].as()), use_ctf_(conf.count("coarse_to_fine_beam_prune")) -- cgit v1.2.3 From f33ab2ff3f1cf135ffb80721e1f4d71d124bc8f9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 2 Aug 2014 14:34:57 +0200 Subject: finally --- decoder/decoder.cc | 2 +- decoder/scfg_translator.cc | 8 +++++--- training/dtrain/examples/toy/cdec.ini | 1 + training/dtrain/examples/toy/dtrain.ini | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 081da8d6..c384c33f 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -366,7 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("beam_prune3", po::value(), "Optional pass 3") ("add_pass_through_rules,P","Add rules to translate OOV words as themselves") - ("add_extra_pass_through_features,Q", po::value()->default_value(6), "Add PassThrough{1..N} features, capped at N.") + ("add_extra_pass_through_features,Q", po::value()->default_value(0), "Add PassThrough{1..N} features, capped at N.") ("k_best,k",po::value(),"Extract the k best derivations") ("unique_k_best,r", "Unique k-best translation list") ("aligner,a", "Run as a word/phrase aligner (src & ref required)") diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 0c3e4cf7..c3cfcaad 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -64,19 +64,21 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - TRulePtr pt; if (num_pt_features > 0) { int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; if (length > num_pt_features) length = num_pt_features; string len_feat = "PassThrough_0=1"; len_feat[12] += length; TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); } else { TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 ")); pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); } - AddRule(pt); - RefineRule(pt, ctf_level); ss.insert(alts[k].label); } } diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini index b14f4819..e6c19abe 100644 --- a/training/dtrain/examples/toy/cdec.ini +++ b/training/dtrain/examples/toy/cdec.ini @@ -1,3 +1,4 @@ formalism=scfg add_pass_through_rules=true grammar=grammar.gz +#add_extra_pass_through_features=6 diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index cd715f26..ef956df7 100644 --- a/training/dtrain/examples/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -2,7 +2,7 @@ decoder_config=cdec.ini input=src refs=tgt output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6 k=4 N=4 epochs=2 -- cgit v1.2.3