summaryrefslogtreecommitdiff
path: root/mteval/fast_score.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
committerPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
commitc3a9ea64251605532c7954959662643a6a927bb7 (patch)
treefed6048a5acdaf3834740107771c2bc48f26fd4d /mteval/fast_score.cc
parent867bca3e5fa0cdd63bf032e5859fb5092d9a4ca1 (diff)
parenta45af4a3704531a8382cd231f6445b3a33b598a3 (diff)
merge with upstream
Diffstat (limited to 'mteval/fast_score.cc')
-rw-r--r--mteval/fast_score.cc40
1 files changed, 23 insertions, 17 deletions
diff --git a/mteval/fast_score.cc b/mteval/fast_score.cc
index 5ee264a6..a271ccc5 100644
--- a/mteval/fast_score.cc
+++ b/mteval/fast_score.cc
@@ -4,9 +4,11 @@
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
+#include "stringlib.h"
#include "filelib.h"
#include "tdict.h"
-#include "scorer.h"
+#include "ns.h"
+#include "ns_docscorer.h"
using namespace std;
namespace po = boost::program_options;
@@ -14,8 +16,8 @@ namespace po = boost::program_options;
void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
- ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation(s) (tokenized text file)")
- ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)")
+ ("reference,r",po::value<vector<string> >(), "[1 or more required] Reference translation(s) in tokenized text files")
+ ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
("in_file,i", po::value<string>()->default_value("-"), "Input file")
("help,h", "Help");
po::options_description dcmdline_options;
@@ -35,24 +37,29 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
int main(int argc, char** argv) {
po::variables_map conf;
InitCommandLine(argc, argv, &conf);
- const string loss_function = conf["loss_function"].as<string>();
- ScoreType type = ScoreTypeFromString(loss_function);
- DocScorer ds(type, conf["reference"].as<vector<string> >(), "");
+ string loss_function = UppercaseString(conf["evaluation_metric"].as<string>());
+ if (loss_function == "COMBI") {
+ cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n";
+ loss_function = "COMB:TER=-0.5;IBM_BLEU=0.5";
+ } else if (loss_function == "BLEU") {
+ cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n";
+ loss_function = "IBM_BLEU";
+ }
+ EvaluationMetric* metric = EvaluationMetric::Instance(loss_function);
+ DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
ReadFile rf(conf["in_file"].as<string>());
- ScoreP acc;
+ SufficientStats acc;
istream& in = *rf.stream();
int lc = 0;
- while(in) {
- string line;
- getline(in, line);
- if (line.empty() && !in) break;
+ string line;
+ while(getline(in, line)) {
vector<WordID> sent;
TD::ConvertSentence(line, &sent);
- ScoreP sentscore = ds[lc]->ScoreCandidate(sent);
- if (!acc) { acc = sentscore->GetZero(); }
- acc->PlusEquals(*sentscore);
+ SufficientStats t;
+ ds[lc]->Evaluate(sent, &t);
+ acc += t;
++lc;
}
assert(lc > 0);
@@ -63,9 +70,8 @@ int main(int argc, char** argv) {
if (lc != ds.size())
cerr << "Fewer sentences in hyp (" << lc << ") than refs ("
<< ds.size() << "): scoring partial set!\n";
- float score = acc->ComputeScore();
- string details;
- acc->ScoreDetails(&details);
+ float score = metric->ComputeScore(acc);
+ const string details = metric->DetailedScore(acc);
cerr << details << endl;
cout << score << endl;
return 0;