migrate fast_score to the new API

author: Chris Dyer <cdyer@cs.cmu.edu> 2011-12-20 18:34:14 -0500
committer: Chris Dyer <cdyer@cs.cmu.edu> 2011-12-20 18:34:14 -0500
commit: ba939df399a160f9a8370911c840635d6cee4f58 (patch)
tree: a9142484bc6b5809d5633bbb30a029e14e23c52b
parent: a0b8bb731c73b2d2ed3e16c0fa79aea93b3be813 (diff)
7 files changed, 42 insertions, 22 deletions
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 6679d949..e7126675 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
 
 noinst_LIBRARIES = libmteval.a
 
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc
 
 fast_score_SOURCES = fast_score.cc
 fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
 
+#include "stringlib.h"
 #include "filelib.h"
 #include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
 
 using namespace std;
 namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
   opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
-        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+        ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
         ("in_file,i", po::value<string>()->default_value("-"), "Input file")
         ("help,h", "Help");
   po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
 int main(int argc, char** argv) {
   po::variables_map conf;
   InitCommandLine(argc, argv, &conf);
-  const string loss_function = conf["loss_function"].as<string>();
-  ScoreType type = ScoreTypeFromString(loss_function);
-  DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+  string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+  if (loss_function == "COMBI") {
+    cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+    loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+  } else if (loss_function == "BLEU") {
+    cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+    loss_function = "IBM_BLEU";
+  }
+  EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
   cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
 
   ReadFile rf(conf["in_file"].as<string>());
-  ScoreP acc;
+  SufficientStats acc;
   istream& in = *rf.stream();
   int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (line.empty() && !in) break;
+  string line;
+  while(getline(in, line)) {
     vector<WordID> sent;
     TD::ConvertSentence(line, &sent);
-    ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
-    if (!acc) { acc = sentscore->GetZero(); }
-    acc->PlusEquals(*sentscore);
+    SufficientStats t;
+    ds[lc]->Evaluate(sent, &t);
+    acc += t;
     ++lc;
   }
   assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
   if (lc != ds.size())
     cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
          << ds.size() << "): scoring partial set!\n";
-  float score = acc->ComputeScore();
-  string details;
-  acc->ScoreDetails(&details);
+  float score = metric->ComputeScore(acc);
+  const string details = metric->DetailedScore(acc);
   cerr << details << endl;
   cout << score << endl;
   return 0;
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 6139757d..1018319d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -173,7 +173,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
 
 template <unsigned int N = 4u, BleuType BrevityType = IBM>
 struct BleuMetric : public EvaluationMetric {
-  BleuMetric() : EvaluationMetric("IBM_BLEU") {}
+  BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
   unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
   shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
     return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -208,7 +208,8 @@ struct BleuMetric : public EvaluationMetric {
     vector<float> precs(N);
     float bp;
     float bleu = ComputeBreakdown(stats, &bp, &precs);
-    sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+    sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+       MetricId().c_str(),
        bleu*100.0,
        precs[0]*100.0,
        precs[1]*100.0,
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 8c969e58..f75acf1d 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -473,3 +473,15 @@ float TERMetric::ComputeScore(const SufficientStats& stats) const {
   return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
 }
 
+string TERMetric::DetailedScore(const SufficientStats& stats) const {
+  char buf[200];
+  sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)",
+     ComputeScore(stats) * 100.0f,
+     stats[kINSERTIONS],
+     stats[kDELETIONS],
+     stats[kSUBSTITUTIONS],
+     stats[kSHIFTS],
+     stats[kREF_WORDCOUNT]);
+  return buf;
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 6c020cfa..3190fc1b 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -10,6 +10,7 @@ class TERMetric : public EvaluationMetric {
 
  public:
   virtual unsigned SufficientStatisticsVectorSize() const;
+  virtual std::string DetailedScore(const SufficientStats& stats) const;
   virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
                                            const std::vector<std::vector<WordID> >& refs,
                                            SufficientStats* out) const;
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index 5db053de..ba9cdc06 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -288,7 +288,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
 
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 11e791c1..c382a972 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -308,7 +308,7 @@ while (1){
 	    $retries++;
 	}
 	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
 	chomp $dec_score;
 	print STDERR "DECODER SCORE: $dec_score\n";
author	Chris Dyer <cdyer@cs.cmu.edu>	2011-12-20 18:34:14 -0500
committer	Chris Dyer <cdyer@cs.cmu.edu>	2011-12-20 18:34:14 -0500
commit	ba939df399a160f9a8370911c840635d6cee4f58 (patch)
tree	a9142484bc6b5809d5633bbb30a029e14e23c52b
parent	a0b8bb731c73b2d2ed3e16c0fa79aea93b3be813 (diff)