summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-09-07 13:57:52 -0400
committerChris Dyer <redpony@gmail.com>2014-09-07 13:57:52 -0400
commit2bc24dd0f10e2acbad118d5fce5aecdff6a90764 (patch)
tree99a79b38f1c293f299522c0ff080c045b346b179
parentb40c064987b1fb188daf040a068a459711385eac (diff)
parentf33ab2ff3f1cf135ffb80721e1f4d71d124bc8f9 (diff)
Merge pull request #50 from pks/master
alignment features, PassThroughN features, dtrain update, mira qsub, and pro fix
-rw-r--r--.gitignore5
-rw-r--r--decoder/Makefile.am1
-rw-r--r--decoder/cdec_ff.cc3
-rw-r--r--decoder/decoder.cc1
-rw-r--r--decoder/ff_lexical.h128
-rw-r--r--decoder/ff_rules.cc22
-rw-r--r--decoder/ff_rules.h13
-rw-r--r--decoder/scfg_translator.cc31
-rw-r--r--environment/LocalConfig.pm6
-rw-r--r--training/dtrain/dtrain.cc4
-rw-r--r--training/dtrain/dtrain.h6
-rw-r--r--training/dtrain/examples/standard/cdec.ini2
-rw-r--r--training/dtrain/examples/standard/expected-output115
-rw-r--r--training/dtrain/examples/toy/cdec.ini1
-rw-r--r--training/dtrain/examples/toy/dtrain.ini2
-rwxr-xr-xtraining/dtrain/parallelize.rb16
-rw-r--r--training/mira/kbest_cut_mira.cc8
-rwxr-xr-xtraining/mira/mira.py19
-rwxr-xr-xtraining/pro/pro.pl1
19 files changed, 259 insertions, 125 deletions
diff --git a/.gitignore b/.gitignore
index a3f979db..72f4997c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ decoder/logval_test
decoder/parser_test
decoder/rule_lexer.cc
decoder/small_vector_test
+decoder/t2s_test
decoder/trule_test
decoder/weights_test
depcomp
@@ -74,6 +75,7 @@ extractor/*_test
extractor/compile
extractor/extract
extractor/run_extractor
+extractor/sacompile
gi/clda/src/clda
gi/markov_al/ml
gi/pf/align-lexonly
@@ -209,6 +211,7 @@ training/dpmert/sentserver
training/dtrain/dtrain
training/latent_svm/latent_svm
training/minrisk/minrisk_optimize
+training/mira/ada_opt_sm
training/mira/kbest_mira
training/mira/kbest_cut_mira
training/pro/mr_pro_map
@@ -218,4 +221,6 @@ training/utils/lbfgs_test
training/utils/optimize_test
training/utils/sentclient
training/utils/sentserver
+utils/stringlib_test
+word-aligner/binderiv
word-aligner/fast_align
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 02e58479..e46a7120 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -50,6 +50,7 @@ libcdec_a_SOURCES = \
ff_external.h \
ff_factory.h \
ff_klm.h \
+ ff_lexical.h \
ff_lm.h \
ff_ngrams.h \
ff_parse_match.h \
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 0411908f..7f7e075b 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -24,6 +24,7 @@
#include "ff_charset.h"
#include "ff_wordset.h"
#include "ff_external.h"
+#include "ff_lexical.h"
void register_feature_functions() {
@@ -39,13 +40,13 @@ void register_feature_functions() {
RegisterFF<SourceWordPenalty>();
RegisterFF<ArityPenalty>();
RegisterFF<BLEUModel>();
+ RegisterFF<LexicalFeatures>();
//TODO: use for all features the new Register which requires static FF::usage(false,false) give name
ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());
ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());
ff_registry.Register("RuleContextFeatures", new FFFactory<RuleContextFeatures>());
ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
- ff_registry.Register("RuleWordAlignmentFeatures", new FFFactory<RuleWordAlignmentFeatures>());
ff_registry.Register("ParseMatchFeatures", new FFFactory<ParseMatchFeatures>);
ff_registry.Register("SoftSyntaxFeatures", new FFFactory<SoftSyntaxFeatures>);
ff_registry.Register("SoftSyntaxFeaturesMindist", new FFFactory<SoftSyntaxFeaturesMindist>);
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 6783cad0..c384c33f 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -366,6 +366,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("beam_prune3", po::value<double>(), "Optional pass 3")
("add_pass_through_rules,P","Add rules to translate OOV words as themselves")
+ ("add_extra_pass_through_features,Q", po::value<unsigned int>()->default_value(0), "Add PassThrough{1..N} features, capped at N.")
("k_best,k",po::value<int>(),"Extract the k best derivations")
("unique_k_best,r", "Unique k-best translation list")
("aligner,a", "Run as a word/phrase aligner (src & ref required)")
diff --git a/decoder/ff_lexical.h b/decoder/ff_lexical.h
new file mode 100644
index 00000000..21c85b27
--- /dev/null
+++ b/decoder/ff_lexical.h
@@ -0,0 +1,128 @@
+#ifndef FF_LEXICAL_H_
+#define FF_LEXICAL_H_
+
+#include <vector>
+#include <map>
+#include "trule.h"
+#include "ff.h"
+#include "hg.h"
+#include "array2d.h"
+#include "wordid.h"
+#include <sstream>
+#include <cassert>
+#include <cmath>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "sentence_metadata.h"
+#include "lattice.h"
+#include "fdict.h"
+#include "verbose.h"
+#include "tdict.h"
+#include "hg.h"
+
+using namespace std;
+
+namespace {
+ string Escape(const string& x) {
+ string y = x;
+ for (int i = 0; i < y.size(); ++i) {
+ if (y[i] == '=') y[i]='_';
+ if (y[i] == ';') y[i]='_';
+ }
+ return y;
+ }
+}
+
+class LexicalFeatures : public FeatureFunction {
+public:
+ LexicalFeatures(const std::string& param) {
+ if (param.empty()) {
+ cerr << "LexicalFeatures: using T,D,I\n";
+ T_ = true; I_ = true; D_ = true;
+ } else {
+ const vector<string> argv = SplitOnWhitespace(param);
+ assert(argv.size() == 3);
+ T_ = (bool) atoi(argv[0].c_str());
+ I_ = (bool) atoi(argv[1].c_str());
+ D_ = (bool) atoi(argv[2].c_str());
+ cerr << "T=" << T_ << " I=" << I_ << " D=" << D_ << endl;
+ }
+ };
+ static std::string usage(bool p,bool d) {
+ return usage_helper("LexicalFeatures","[0/1 0/1 0/1]","Sparse lexical word translation indicator features. If arguments are supplied, specify like this: translations insertions deletions",p,d);
+ }
+protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ virtual void PrepareForInput(const SentenceMetadata& smeta);
+private:
+ mutable std::map<const TRule*, SparseVector<double> > rule2feats_;
+ bool T_;
+ bool I_;
+ bool D_;
+};
+
+void LexicalFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+ rule2feats_.clear(); // std::map<const TRule*, SparseVector<double> >
+}
+
+void LexicalFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const {
+
+ map<const TRule*, SparseVector<double> >::iterator it = rule2feats_.find(edge.rule_.get());
+ if (it == rule2feats_.end()) {
+ const TRule& rule = *edge.rule_;
+ it = rule2feats_.insert(make_pair(&rule, SparseVector<double>())).first;
+ SparseVector<double>& f = it->second;
+ std::vector<bool> sf(edge.rule_->FLength(),false); // stores if source tokens are visited by alignment points
+ std::vector<bool> se(edge.rule_->ELength(),false); // stores if target tokens are visited by alignment points
+ int fid = 0;
+ // translations
+ for (unsigned i=0;i<rule.a_.size();++i) {
+ const AlignmentPoint& ap = rule.a_[i];
+ sf[ap.s_] = true; // mark index as seen
+ se[ap.t_] = true; // mark index as seen
+ ostringstream os;
+ os << "LT:" << Escape(TD::Convert(rule.f_[ap.s_])) << ":" << Escape(TD::Convert(rule.e_[ap.t_]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (T_)
+ f.add_value(fid, 1.0);
+ }
+ // word deletions
+ for (unsigned i=0;i<sf.size();++i) {
+ if (!sf[i] && rule.f_[i] > 0) {// if not visited and is terminal
+ ostringstream os;
+ os << "LD:" << Escape(TD::Convert(rule.f_[i]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (D_)
+ f.add_value(fid, 1.0);
+ }
+ }
+ // word insertions
+ for (unsigned i=0;i<se.size();++i) {
+ if (!se[i] && rule.e_[i] >= 1) {// if not visited and is terminal
+ ostringstream os;
+ os << "LI:" << Escape(TD::Convert(rule.e_[i]));
+ fid = FD::Convert(os.str());
+ if (fid <= 0) continue;
+ if (I_)
+ f.add_value(fid, 1.0);
+ }
+ }
+ }
+ (*features) += it->second;
+}
+
+
+#endif
diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc
index 7bccf084..9533caed 100644
--- a/decoder/ff_rules.cc
+++ b/decoder/ff_rules.cc
@@ -69,28 +69,6 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->add_value(it->second, 1);
}
-RuleWordAlignmentFeatures::RuleWordAlignmentFeatures(const std::string& param) {
-}
-
-void RuleWordAlignmentFeatures::PrepareForInput(const SentenceMetadata& smeta) {
-}
-
-void RuleWordAlignmentFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const Hypergraph::Edge& edge,
- const vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const {
- const TRule& rule = *edge.rule_;
- ostringstream os;
- vector<AlignmentPoint> als = rule.als();
- std::vector<AlignmentPoint>::const_iterator xx = als.begin();
- for (; xx != als.end(); ++xx) {
- os << "WA:" << TD::Convert(rule.f_[xx->s_]) << ":" << TD::Convert(rule.e_[xx->t_]);
- }
- features->add_value(FD::Convert(Escape(os.str())), 1);
-}
-
RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) {
}
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index 324d7a39..f210dc65 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -24,19 +24,6 @@ class RuleIdentityFeatures : public FeatureFunction {
mutable std::map<const TRule*, int> rule2_fid_;
};
-class RuleWordAlignmentFeatures : public FeatureFunction {
- public:
- RuleWordAlignmentFeatures(const std::string& param);
- protected:
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const HG::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* context) const;
- virtual void PrepareForInput(const SentenceMetadata& smeta);
-};
-
class RuleSourceBigramFeatures : public FeatureFunction {
public:
RuleSourceBigramFeatures(const std::string& param);
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 88f62769..c3cfcaad 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -28,7 +28,7 @@ struct GlueGrammar : public TextGrammar {
};
struct PassThroughGrammar : public TextGrammar {
- PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
+ PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0, const unsigned int num_pt_features=0);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
};
@@ -56,7 +56,7 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
return (i == 0);
}
-PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) {
+PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level, const unsigned num_pt_features) {
unordered_set<WordID> ss;
for (int i = 0; i < input.size(); ++i) {
const vector<LatticeArc>& alts = input[i];
@@ -64,14 +64,21 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat,
const int j = alts[k].dist2next + i;
const string& src = TD::Convert(alts[k].label);
if (ss.count(alts[k].label) == 0) {
- int length = static_cast<int>(log(UTF8StringLen(src)) / log(1.6)) + 1;
- if (length > 6) length = 6;
- string len_feat = "PassThrough_0=1";
- len_feat[12] += length;
- TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
- pt->a_.push_back(AlignmentPoint(0,0));
- AddRule(pt);
- RefineRule(pt, ctf_level);
+ if (num_pt_features > 0) {
+ int length = static_cast<int>(log(UTF8StringLen(src)) / log(1.6)) + 1;
+ if (length > num_pt_features) length = num_pt_features;
+ string len_feat = "PassThrough_0=1";
+ len_feat[12] += length;
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
+ } else {
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 "));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
+ }
ss.insert(alts[k].label);
}
}
@@ -86,6 +93,7 @@ struct SCFGTranslatorImpl {
SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :
max_span_limit(conf["scfg_max_span_limit"].as<int>()),
add_pass_through_rules(conf.count("add_pass_through_rules")),
+ num_pt_features(conf["add_extra_pass_through_features"].as<unsigned int>()),
goal(conf["goal"].as<string>()),
default_nt(conf["scfg_default_nt"].as<string>()),
use_ctf_(conf.count("coarse_to_fine_beam_prune"))
@@ -140,6 +148,7 @@ struct SCFGTranslatorImpl {
const int max_span_limit;
const bool add_pass_through_rules;
+ const unsigned int num_pt_features;
const string goal;
const string default_nt;
const bool use_ctf_;
@@ -187,7 +196,7 @@ struct SCFGTranslatorImpl {
smeta->SetSourceLength(lattice.size());
if (add_pass_through_rules){
if (!SILENT) cerr << "Adding pass through grammar" << endl;
- PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_);
+ PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features);
g->SetGrammarName("PassThrough");
glist.push_back(GrammarPtr(g));
}
diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index 4fa0ab74..29a1cbea 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -69,10 +69,10 @@ my $CCONFIG = {
# 'DefaultJobs' => 12,
# },
'cluster.cl.uni-heidelberg.de' => {
- 'HOST_REGEXP' => qr/node25/,
+ 'HOST_REGEXP' => qr/(node\d\d\.cluster\.lan|cluster\.cl\.uni-heidelberg\.de)/i,
'JobControl' => 'qsub',
- 'QSubMemFlag' => '-l h_vmem=',
- 'DefaultJobs' => 13,
+ 'QSubMemFlag' => '-l mem_free=',
+ 'DefaultJobs' => 14,
},
'LOCAL' => { # LOCAL must be last in the list!!!
'HOST_REGEXP' => qr//,
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index b01cf421..ccb50af2 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -438,7 +438,7 @@ main(int argc, char** argv)
score_t model_diff = it->first.model - it->second.model;
score_t loss = max(0.0, -1.0 * model_diff);
- if (check && ki == 1) cout << losses[pair_idx] - loss << endl;
+ if (check && ki==repeat-1) cout << losses[pair_idx] - loss << endl;
pair_idx++;
if (repeat > 1) {
@@ -455,7 +455,7 @@ main(int argc, char** argv)
margin = fabs(model_diff);
if (!rank_error && margin < loss_margin) margin_violations++;
}
- if (rank_error && ki==1) rank_errors++;
+ if (rank_error && ki==0) rank_errors++;
if (scale_bleu_diff) eta = it->first.score - it->second.score;
if (rank_error || margin < loss_margin) {
SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index eb23b813..07bd9b65 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -116,11 +116,11 @@ inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); }
inline ostream& _p2(ostream& out) { return out << setprecision(2); }
inline ostream& _p5(ostream& out) { return out << setprecision(5); }
-inline void printWordIDVec(vector<WordID>& v)
+inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr)
{
for (unsigned i = 0; i < v.size(); i++) {
- cerr << TD::Convert(v[i]);
- if (i < v.size()-1) cerr << " ";
+ os << TD::Convert(v[i]);
+ if (i < v.size()-1) os << " ";
}
}
diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini
index 6cba9e1e..3330dd71 100644
--- a/training/dtrain/examples/standard/cdec.ini
+++ b/training/dtrain/examples/standard/cdec.ini
@@ -21,7 +21,7 @@ feature_function=RuleIdentityFeatures
feature_function=RuleSourceBigramFeatures
feature_function=RuleTargetBigramFeatures
feature_function=RuleShape
-feature_function=RuleWordAlignmentFeatures
+feature_function=LexicalFeatures 1 1 1
#feature_function=SourceSpanSizeFeatures
#feature_function=SourceWordPenalty
#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
index fa831221..2460cfbb 100644
--- a/training/dtrain/examples/standard/expected-output
+++ b/training/dtrain/examples/standard/expected-output
@@ -4,7 +4,8 @@ Reading ./nc-wmt11.en.srilm.gz
----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
****************************************************************************************************
Example feature: Shape_S00000_T00000
-Seeding random number sequence to 4138446869
+T=1 I=1 D=1
+Seeding random number sequence to 2327685089
dtrain
Parameters:
@@ -36,87 +37,87 @@ Iteration #1 of 3.
. 10
Stopping after 10 input sentences.
WEIGHTS
- Glue = -80.3
- WordPenalty = -51.247
- LanguageModel = +282.46
- LanguageModel_OOV = -85.8
- PhraseModel_0 = -100.06
- PhraseModel_1 = -98.692
- PhraseModel_2 = -9.4958
- PhraseModel_3 = +18.535
- PhraseModel_4 = +62.35
- PhraseModel_5 = +7
- PhraseModel_6 = +31.4
- PassThrough = -126.5
+ Glue = +6.9
+ WordPenalty = -46.426
+ LanguageModel = +535.12
+ LanguageModel_OOV = -123.5
+ PhraseModel_0 = -160.73
+ PhraseModel_1 = -350.13
+ PhraseModel_2 = -187.81
+ PhraseModel_3 = +172.04
+ PhraseModel_4 = +0.90108
+ PhraseModel_5 = +21.6
+ PhraseModel_6 = +67.2
+ PassThrough = -149.7
---
- 1best avg score: 0.25631 (+0.25631)
- 1best avg model score: -4843.6 (-4843.6)
- avg # pairs: 744.4
+ 1best avg score: 0.23327 (+0.23327)
+ 1best avg model score: -9084.9 (-9084.9)
+ avg # pairs: 780.7
avg # rank err: 0 (meaningless)
avg # margin viol: 0
k-best loss imp: 100%
- non0 feature count: 1274
+ non0 feature count: 1389
avg list sz: 91.3
- avg f count: 143.72
-(time 0.4 min, 2.4 s/S)
+ avg f count: 146.2
+(time 0.37 min, 2.2 s/S)
Iteration #2 of 3.
. 10
WEIGHTS
- Glue = -117.4
- WordPenalty = -99.584
- LanguageModel = +395.05
- LanguageModel_OOV = -136.8
- PhraseModel_0 = +40.614
- PhraseModel_1 = -123.29
- PhraseModel_2 = -152
- PhraseModel_3 = -161.13
- PhraseModel_4 = -76.379
- PhraseModel_5 = +39.1
- PhraseModel_6 = +137.7
- PassThrough = -162.1
+ Glue = -43
+ WordPenalty = -22.019
+ LanguageModel = +591.53
+ LanguageModel_OOV = -252.1
+ PhraseModel_0 = -120.21
+ PhraseModel_1 = -43.589
+ PhraseModel_2 = +73.53
+ PhraseModel_3 = +113.7
+ PhraseModel_4 = -223.81
+ PhraseModel_5 = +64
+ PhraseModel_6 = +54.8
+ PassThrough = -331.1
---
- 1best avg score: 0.26751 (+0.011198)
- 1best avg model score: -10061 (-5216.9)
- avg # pairs: 639.1
+ 1best avg score: 0.29568 (+0.062413)
+ 1best avg model score: -15879 (-6794.1)
+ avg # pairs: 566.1
avg # rank err: 0 (meaningless)
avg # margin viol: 0
k-best loss imp: 100%
- non0 feature count: 1845
+ non0 feature count: 1931
avg list sz: 91.3
- avg f count: 139.88
-(time 0.35 min, 2.1 s/S)
+ avg f count: 139.89
+(time 0.33 min, 2 s/S)
Iteration #3 of 3.
. 10
WEIGHTS
- Glue = -101.1
- WordPenalty = -139.97
- LanguageModel = +327.98
- LanguageModel_OOV = -234.7
- PhraseModel_0 = -144.49
- PhraseModel_1 = -263.88
- PhraseModel_2 = -149.25
- PhraseModel_3 = -38.805
- PhraseModel_4 = +50.575
- PhraseModel_5 = -52.4
- PhraseModel_6 = +41.6
- PassThrough = -230.2
+ Glue = -44.3
+ WordPenalty = -131.85
+ LanguageModel = +230.91
+ LanguageModel_OOV = -285.4
+ PhraseModel_0 = -194.27
+ PhraseModel_1 = -294.83
+ PhraseModel_2 = -92.043
+ PhraseModel_3 = -140.24
+ PhraseModel_4 = +85.613
+ PhraseModel_5 = +238.1
+ PhraseModel_6 = +158.7
+ PassThrough = -359.6
---
- 1best avg score: 0.36222 (+0.094717)
- 1best avg model score: -17416 (-7355.5)
- avg # pairs: 661.2
+ 1best avg score: 0.37375 (+0.078067)
+ 1best avg model score: -14519 (+1359.7)
+ avg # pairs: 545.4
avg # rank err: 0 (meaningless)
avg # margin viol: 0
k-best loss imp: 100%
- non0 feature count: 2163
+ non0 feature count: 2218
avg list sz: 91.3
- avg f count: 132.53
-(time 0.33 min, 2 s/S)
+ avg f count: 137.77
+(time 0.35 min, 2.1 s/S)
Writing weights file to '-' ...
done
---
-Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.36222].
-This took 1.0833 min.
+Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375].
+This took 1.05 min.
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
index b14f4819..e6c19abe 100644
--- a/training/dtrain/examples/toy/cdec.ini
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -1,3 +1,4 @@
formalism=scfg
add_pass_through_rules=true
grammar=grammar.gz
+#add_extra_pass_through_features=6
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
index cd715f26..ef956df7 100644
--- a/training/dtrain/examples/toy/dtrain.ini
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -2,7 +2,7 @@ decoder_config=cdec.ini
input=src
refs=tgt
output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6
k=4
N=4
epochs=2
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 60ca9422..82600009 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -26,7 +26,6 @@ opts = Trollop::options do
end
usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
-
dtrain_dir = File.expand_path File.dirname(__FILE__)
if not opts[:dtrain_binary]
dtrain_bin = "#{dtrain_dir}/dtrain"
@@ -56,6 +55,7 @@ refs = opts[:references]
use_qsub = opts[:qsub]
shards_at_once = opts[:processes_at_once]
first_input_weights = opts[:first_input_weights]
+opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!=""
`mkdir work`
@@ -64,8 +64,9 @@ def make_shards(input, refs, num_shards, epoch, rand)
index = (0..lc-1).to_a
index.reverse!
index.shuffle! if rand
- shard_sz = lc / num_shards
- leftover = lc % num_shards
+ shard_sz = (lc / num_shards.to_f).round 0
+ leftover = lc - (num_shards*shard_sz)
+ leftover = 0 if leftover < 0
in_f = File.new input, 'r'
in_lines = in_f.readlines
refs_f = File.new refs, 'r'
@@ -74,7 +75,10 @@ def make_shards(input, refs, num_shards, epoch, rand)
shard_refs_files = []
in_fns = []
refs_fns = []
+ new_num_shards = 0
0.upto(num_shards-1) { |shard|
+ break if index.size==0
+ new_num_shards += 1
in_fn = "work/shard.#{shard}.#{epoch}.in"
shard_in = File.new in_fn, 'w+'
in_fns << in_fn
@@ -98,7 +102,7 @@ def make_shards(input, refs, num_shards, epoch, rand)
(shard_in_files + shard_refs_files).each do |f| f.close end
in_f.close
refs_f.close
- return [in_fns, refs_fns]
+ return in_fns, refs_fns, new_num_shards
end
input_files = []
@@ -111,7 +115,7 @@ if predefined_shards
end
num_shards = input_files.size
else
- input_files, refs_files = make_shards input, refs, num_shards, 0, rand
+ input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand
end
0.upto(epochs-1) { |epoch|
@@ -158,7 +162,7 @@ end
`#{cat} work/weights.*.#{epoch} > work/weights_cat`
`#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}`
if rand and reshard and epoch+1!=epochs
- input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand
+ input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand
end
}
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
index 56206593..724b1853 100644
--- a/training/mira/kbest_cut_mira.cc
+++ b/training/mira/kbest_cut_mira.cc
@@ -95,7 +95,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("stream,t", "Stream mode (used for realtime)")
("weights_output,O",po::value<string>(),"Directory to write weights to")
("output_dir,D",po::value<string>(),"Directory to place output in")
- ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+ ("decoder_config,c",po::value<string>(),"Decoder configuration file")
+ ("verbose,v",po::value<bool>()->zero_tokens(),"verbose stderr output");
po::options_description clo("Command line options");
clo.add_options()
("config", po::value<string>(), "Configuration file")
@@ -621,6 +622,7 @@ int main(int argc, char** argv) {
vector<string> corpus;
+ const bool VERBOSE = conf.count("verbose");
const string metric_name = conf["mt_metric"].as<string>();
optimizer = conf["optimizer"].as<int>();
fear_select = conf["fear"].as<int>();
@@ -783,7 +785,8 @@ int main(int argc, char** argv) {
double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);
double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);
const double loss = margin + mt_loss;
- cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl;
+ cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << endl;
+ if (VERBOSE) cerr << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) << endl;
if (loss > 0.0 || !checkloss) {
SparseVector<double> diff = cur_good.features;
diff -= cur_bad.features;
@@ -920,6 +923,7 @@ int main(int argc, char** argv) {
lambdas += (cur_pair[1]->features) * step_size;
lambdas -= (cur_pair[0]->features) * step_size;
+ if (VERBOSE) cerr << " Lambdas " << lambdas << endl;
//reload weights based on update
dense_weights.clear();
diff --git a/training/mira/mira.py b/training/mira/mira.py
index 3e6aa2db..ec9c2d64 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -143,6 +143,12 @@ def main():
parser.add_argument('--pass-suffix',
help='multipass decoding iteration. see documentation '
'at www.cdec-decoder.org for more information')
+ parser.add_argument('--qsub',
+ help='use qsub', action='store_true')
+ parser.add_argument('--pmem',
+ help='memory for qsub', type=str, default='5G')
+ parser.add_argument('-v', '--verbose',
+ help='more verbose mira optimizers')
args = parser.parse_args()
args.metric = args.metric.upper()
@@ -315,6 +321,8 @@ def split_devset(dev, outdir):
def optimize(args, script_dir, dev_size):
parallelize = script_dir+'/../utils/parallelize.pl'
+ if args.qsub:
+ parallelize += " -p %s"%args.pmem
decoder = script_dir+'/kbest_cut_mira'
(source, refs) = split_devset(args.devset, args.output_dir)
port = random.randint(15000,50000)
@@ -353,10 +361,15 @@ def optimize(args, script_dir, dev_size):
decoder_cmd += ' -a'
if not args.no_pseudo:
decoder_cmd += ' -e'
+ if args.verbose:
+ decoder_cmd += ' -v'
- #always use fork
- parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format(
- parallelize, logdir, args.jobs)
+ if args.qsub:
+ parallel_cmd = '{0} -e {1} -j {2} --'.format(
+ parallelize, logdir, args.jobs)
+ else:
+ parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format(
+ parallelize, logdir, args.jobs)
cmd = parallel_cmd + ' ' + decoder_cmd
logging.info('OPTIMIZATION COMMAND: {}'.format(cmd))
diff --git a/training/pro/pro.pl b/training/pro/pro.pl
index 3b30c379..a059477d 100755
--- a/training/pro/pro.pl
+++ b/training/pro/pro.pl
@@ -79,6 +79,7 @@ if (GetOptions(
"help" => \$help,
"reg=f" => \$reg,
"reg-previous=f" => \$reg_previous,
+ "pmem=s" => \$pmem,
"output-dir=s" => \$dir,
) == 0 || @ARGV!=0 || $help) {
print_help();