Merge remote-tracking branch 'upstream/master'

author: Patrick Simianer <p@simianer.de> 2014-06-12 13:56:42 +0200
committer: Patrick Simianer <p@simianer.de> 2014-06-12 13:56:42 +0200
commit: 244971287003d079e46193b8a209c28955f90134 (patch)
tree: 8beaae6b12b913acb213fc7f2415fd63886192f9 /training/mira
parent: 5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (diff)
parent: b66e838ed52decc0be1eb5817b2a77c3840db2c5 (diff)
2 files changed, 32 insertions, 40 deletions
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
index cde65332..724b1853 100644
--- a/training/mira/kbest_cut_mira.cc
+++ b/training/mira/kbest_cut_mira.cc
@@ -341,23 +341,22 @@ struct BasicObserver: public DecoderObserver {
 };
 
 struct TrainingObserver : public DecoderObserver {
-  TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
-    
-
-    if(!pseudo_doc && !sent_approx)
-    if(cur_pass > 0)     //calculate corpus bleu score from previous iterations 1-best for BLEU gain
-      {
-	ScoreP acc;
-	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
-	  if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); }
-	  acc->PlusEquals(*corpus_bleu_sent_stats[ii]);
-	  
-	}
-	corpus_bleu_stats = acc;
-	corpus_bleu_score = acc->ComputeScore();
+  TrainingObserver(const int k,
+                   const DocScorer& d,
+                   vector<GoodBadOracle>* o,
+                   vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) {
+    if(!pseudo_doc && !sent_approx) {
+      if(cur_pass > 0) {    //calculate corpus bleu score from previous iterations 1-best for BLEU gain
+        ScoreP acc;
+        for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) {
+          if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); }
+          acc->PlusEquals(*corpus_bleu_sent_stats[ii]);
+        }
+        corpus_bleu_stats = acc;
+        corpus_bleu_score = acc->ComputeScore();
       }
-
-}
+    }
+  }
   const DocScorer& ds;
   vector<ScoreP>& corpus_bleu_sent_stats;
   vector<GoodBadOracle>& oracles;
@@ -461,7 +460,6 @@ struct TrainingObserver : public DecoderObserver {
 	    }
 	  else //use sentence-level smoothing ( used when cur_pass=0 if not pseudo_doc)
 	    {
-	     
 	      sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore());
 	    }
 	
@@ -575,19 +573,15 @@ void ReadTrainingCorpus(const string& fname, vector<string>* c) {
   }
 }
 
-void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od)
-{
-  cerr << "Reading BLEU gain file ";
+void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od) {
+  cerr << "Reading previous score file ";
   string fname;
-  if(cur_pass == 0)
-    {
-      fname = od + "/run.raw.init";
-    }
-  else
-    {
-      int last_pass = cur_pass - 1; 
-      fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B";
-    }
+  if (cur_pass == 0) {
+    fname = od + "/run.raw.init";
+  } else {
+    int last_pass = cur_pass - 1; 
+    fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B";
+  }
   cerr << fname << "\n";
   ReadFile rf(fname);
   istream& in = *rf.stream();
@@ -604,7 +598,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore
     if (!acc) { acc = sentscore->GetZero(); }
     acc->PlusEquals(*sentscore);
     ++lc;
- 
   }
   
   assert(lc > 0);
@@ -612,7 +605,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore
   string details;
   acc->ScoreDetails(&details);
   cerr << "Previous run: " << details << score << endl;
-
 }
 
 
@@ -672,10 +664,9 @@ int main(int argc, char** argv) {
   
   //check training pass,if >0, then use previous iterations corpus bleu stats
   cur_pass = stream ? 0 : conf["pass"].as<int>();
-  if(cur_pass > 0)
-    {
-      ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir);
-    }
+  if(cur_pass > 0) {
+    ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir);
+  }
   
   cerr << "Using optimizer:" << optimizer << endl;
     
diff --git a/training/mira/mira.py b/training/mira/mira.py
index 539a0b0e..691a62a6 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -203,14 +203,15 @@ def main():
   if have_mpl: graph_file = graph(args.output_dir, hope_best_fear, args.metric)
 
   dev_results, dev_bleu = evaluate(args.devset, args.weights, args.config, 
-                         script_dir, args.output_dir)
+                         script_dir, args.output_dir, args.jobs)
   if args.test:
     if args.test_config:
       test_results, test_bleu = evaluate(args.test, args.weights, 
-                              args.test_config, script_dir, args.output_dir)
+                              args.test_config, script_dir, args.output_dir,
+                              args.jobs)
     else:
       test_results, test_bleu = evaluate(args.test, args.weights, args.config,
-                              script_dir, args.output_dir)
+                              script_dir, args.output_dir, args.jobs)
   else: 
     test_results = ''
     test_bleu = ''
@@ -240,11 +241,11 @@ def graph(output_dir, hope_best_fear, metric):
   return graph_file
 
 #evaluate a given test set using decode-and-evaluate.pl
-def evaluate(testset, weights, ini, script_dir, out_dir):
+def evaluate(testset, weights, ini, script_dir, out_dir, jobs):
   evaluator = '{}/../utils/decode-and-evaluate.pl'.format(script_dir)
   try:
     p = subprocess.Popen([evaluator, '-c', ini, '-w', weights, '-i', testset, 
-                         '-d', out_dir, '--jobs', args.jobs], stdout=subprocess.PIPE)
+                         '-d', out_dir, '--jobs', str(jobs)], stdout=subprocess.PIPE)
     results, err = p.communicate()
     bleu, results = results.split('\n',1)
   except subprocess.CalledProcessError:
author	Patrick Simianer <p@simianer.de>	2014-06-12 13:56:42 +0200
committer	Patrick Simianer <p@simianer.de>	2014-06-12 13:56:42 +0200
commit	244971287003d079e46193b8a209c28955f90134 (patch)
tree	8beaae6b12b913acb213fc7f2415fd63886192f9 /training/mira
parent	5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (diff)
parent	b66e838ed52decc0be1eb5817b2a77c3840db2c5 (diff)