summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-12-20 18:34:14 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-12-20 18:34:14 -0500
commite4c5e87db2139aa0f8655b063da7d8b5199cb46d (patch)
treebfbb13d1c9131ed865ed3c0b42744d5d9e474b22
parent2eb3bb96c6f780c477585b33273fc0c0d56c80e4 (diff)
migrate fast_score to the new API
-rw-r--r--mteval/Makefile.am2
-rw-r--r--mteval/fast_score.cc40
-rw-r--r--mteval/ns.cc5
-rw-r--r--mteval/ns_ter.cc12
-rw-r--r--mteval/ns_ter.h1
-rwxr-xr-xpro-train/dist-pro.pl2
-rwxr-xr-xvest/dist-vest.pl2
7 files changed, 42 insertions, 22 deletions
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 6679d949..e7126675 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -10,7 +10,7 @@ endif
noinst_LIBRARIES = libmteval.a
-libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc
+libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc
fast_score_SOURCES = fast_score.cc
fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
+#include "stringlib.h"
#include "filelib.h"
#include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
using namespace std;
namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
- ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
- ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+ ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+ ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
("in_file,i", po::value<string>()->default_value("-"), "Input file")
("help,h", "Help");
po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
int main(int argc, char** argv) {
po::variables_map conf;
InitCommandLine(argc, argv, &conf);
- const string loss_function = conf["loss_function"].as<string>();
- ScoreType type = ScoreTypeFromString(loss_function);
- DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+ string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+ if (loss_function == "COMBI") {
+ cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+ loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+ } else if (loss_function == "BLEU") {
+ cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+ loss_function = "IBM_BLEU";
+ }
+ EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+ DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
ReadFile rf(conf["in_file"].as<string>());
- ScoreP acc;
+ SufficientStats acc;
istream& in = *rf.stream();
int lc = 0;
- while(in) {
- string line;
- getline(in, line);
- if (line.empty() && !in) break;
+ string line;
+ while(getline(in, line)) {
vector<WordID> sent;
TD::ConvertSentence(line, &sent);
- ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
- if (!acc) { acc = sentscore->GetZero(); }
- acc->PlusEquals(*sentscore);
+ SufficientStats t;
+ ds[lc]->Evaluate(sent, &t);
+ acc += t;
++lc;
}
assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
if (lc != ds.size())
cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
<< ds.size() << "): scoring partial set!\n";
- float score = acc->ComputeScore();
- string details;
- acc->ScoreDetails(&details);
+ float score = metric->ComputeScore(acc);
+ const string details = metric->DetailedScore(acc);
cerr << details << endl;
cout << score << endl;
return 0;
diff --git a/mteval/ns.cc b/mteval/ns.cc
index 6139757d..1018319d 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -173,7 +173,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator {
template <unsigned int N = 4u, BleuType BrevityType = IBM>
struct BleuMetric : public EvaluationMetric {
- BleuMetric() : EvaluationMetric("IBM_BLEU") {}
+ BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {}
unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; }
shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const vector<vector<WordID> >& refs) const {
return shared_ptr<SegmentEvaluator>(new BleuSegmentEvaluator<N,BrevityType>(refs, this));
@@ -208,7 +208,8 @@ struct BleuMetric : public EvaluationMetric {
vector<float> precs(N);
float bp;
float bleu = ComputeBreakdown(stats, &bp, &precs);
- sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+ sprintf(buf, "%s = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+ MetricId().c_str(),
bleu*100.0,
precs[0]*100.0,
precs[1]*100.0,
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc
index 8c969e58..f75acf1d 100644
--- a/mteval/ns_ter.cc
+++ b/mteval/ns_ter.cc
@@ -473,3 +473,15 @@ float TERMetric::ComputeScore(const SufficientStats& stats) const {
return edits / static_cast<float>(stats[kREF_WORDCOUNT]);
}
+string TERMetric::DetailedScore(const SufficientStats& stats) const {
+ char buf[200];
+ sprintf(buf, "TER = %.2f, %3.f|%3.f|%3.f|%3.f (len=%3.f)",
+ ComputeScore(stats) * 100.0f,
+ stats[kINSERTIONS],
+ stats[kDELETIONS],
+ stats[kSUBSTITUTIONS],
+ stats[kSHIFTS],
+ stats[kREF_WORDCOUNT]);
+ return buf;
+}
+
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index 6c020cfa..3190fc1b 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -10,6 +10,7 @@ class TERMetric : public EvaluationMetric {
public:
virtual unsigned SufficientStatisticsVectorSize() const;
+ virtual std::string DetailedScore(const SufficientStats& stats) const;
virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
const std::vector<std::vector<WordID> >& refs,
SufficientStats* out) const;
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index 5db053de..ba9cdc06 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -288,7 +288,7 @@ while (1){
$retries++;
}
die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
- my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+ my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
chomp $dec_score;
print STDERR "DECODER SCORE: $dec_score\n";
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 11e791c1..c382a972 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -308,7 +308,7 @@ while (1){
$retries++;
}
die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
- my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
+ my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
chomp $dec_score;
print STDERR "DECODER SCORE: $dec_score\n";