From fe29dcbab1ae427a1644b128b0ef30aa27647931 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 10 Nov 2012 23:02:34 -0500 Subject: set meteor path with environment variable --- mteval/ns.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'mteval/ns.cc') diff --git a/mteval/ns.cc b/mteval/ns.cc index 3af7cc63..f3a82ce0 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -12,6 +12,7 @@ #include #include "tdict.h" +#include "filelib.h" #include "stringlib.h" using namespace std; @@ -234,7 +235,13 @@ struct BleuMetric : public EvaluationMetric { EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { static bool is_first = true; + static string meteor_jar_path = "/cab0/tools/meteor-1.3/meteor-1.3.jar"; if (is_first) { + const char* ppath = getenv("METEOR_JAR"); + if (ppath) { + cerr << "METEOR_JAR environment variable set to " << ppath << endl; + meteor_jar_path = ppath; + } instances_["NULL"] = NULL; is_first = false; } @@ -252,7 +259,11 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { } else if (metric_id == "TER") { m = new TERMetric; } else if (metric_id == "METEOR") { - m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /cab0/tools/meteor-1.3/meteor-1.3.jar - - -mira -lower -t tune -l en"); + if (!FileExists(meteor_jar_path)) { + cerr << meteor_jar_path << " not found. Set METEOR_JAR environment variable.\n"; + abort(); + } + m = new ExternalMetric("METEOR", "java -Xmx1536m -jar " + meteor_jar_path + " - - -mira -lower -t tune -l en"); } else if (metric_id.find("COMB:") == 0) { m = new CombinationMetric(metric_id); } else if (metric_id == "CER") { -- cgit v1.2.3 From 1a43362c3aa079688415ec89d67ee0f41210f9dd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 17 Nov 2012 17:21:30 -0500 Subject: make meteor jar configurable at build time --- configure.ac | 23 ++++++++++++++++++++--- mteval/Makefile.am | 14 +++++++++++++- mteval/ns.cc | 17 +++++++++-------- 3 files changed, 42 insertions(+), 12 deletions(-) (limited to 'mteval/ns.cc') diff --git a/configure.ac b/configure.ac index cb132d66..233009ca 100644 --- a/configure.ac +++ b/configure.ac @@ -13,6 +13,7 @@ AC_LANG_CPLUSPLUS BOOST_REQUIRE([1.44]) BOOST_PROGRAM_OPTIONS BOOST_SYSTEM +BOOST_SERIALIZATION BOOST_TEST AM_PATH_PYTHON AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H)) @@ -26,10 +27,24 @@ AM_CONDITIONAL([MPI], [test "x$mpi" = xyes]) if test "x$mpi" = xyes then - BOOST_SERIALIZATION AC_DEFINE([HAVE_MPI], [1], [flag for MPI]) - # TODO BOOST_MPI needs to be implemented - LIBS="$LIBS -lboost_mpi $BOOST_SERIALIZATION_LIBS" + LIBS="$LIBS -lboost_mpi" +fi + +AM_CONDITIONAL([HAVE_METEOR], false) +AC_ARG_WITH(meteor, + [AC_HELP_STRING([--with-meteor=PATH], [(optional) path to METEOR jar])], + [with_meteor=$withval], + [with_meteor=no] + ) + +if test "x$with_meteor" != 'xno' +then + AC_CHECK_FILE([$with_meteor], + [AC_DEFINE([HAVE_METEOR], [1], [flag for METEOR jar library])], + [AC_MSG_ERROR([Cannot find METEOR jar!])]) + AC_SUBST(METEOR_JAR,"${with_meteor}") + AM_CONDITIONAL([HAVE_METEOR], true) fi AM_CONDITIONAL([HAVE_CMPH], false) @@ -129,6 +144,8 @@ AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) AC_CONFIG_FILES([example_extff/Makefile]) +AC_CONFIG_FILES([mteval/meteor_jar.cc]) + AC_CONFIG_FILES([python/setup.py]) AC_OUTPUT diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 22550c99..5e9bba91 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -8,7 +8,19 @@ TESTS = scorer_test noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc +libmteval_a_SOURCES = \ + aer_scorer.cc \ + comb_scorer.cc \ + external_scorer.cc \ + meteor_jar.cc \ + ns.cc \ + ns_cer.cc \ + ns_comb.cc \ + ns_docscorer.cc \ + ns_ext.cc \ + ns_ter.cc \ + scorer.cc \ + ter.cc fast_score_SOURCES = fast_score.cc fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/ns.cc b/mteval/ns.cc index f3a82ce0..7d73061c 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -19,6 +19,8 @@ using namespace std; map EvaluationMetric::instances_; +extern const char* meteor_jar_path; + SegmentEvaluator::~SegmentEvaluator() {} EvaluationMetric::~EvaluationMetric() {} @@ -235,13 +237,7 @@ struct BleuMetric : public EvaluationMetric { EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { static bool is_first = true; - static string meteor_jar_path = "/cab0/tools/meteor-1.3/meteor-1.3.jar"; if (is_first) { - const char* ppath = getenv("METEOR_JAR"); - if (ppath) { - cerr << "METEOR_JAR environment variable set to " << ppath << endl; - meteor_jar_path = ppath; - } instances_["NULL"] = NULL; is_first = false; } @@ -259,11 +255,16 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { } else if (metric_id == "TER") { m = new TERMetric; } else if (metric_id == "METEOR") { +#if HAVE_METEOR if (!FileExists(meteor_jar_path)) { - cerr << meteor_jar_path << " not found. Set METEOR_JAR environment variable.\n"; + cerr << meteor_jar_path << " not found!\n"; abort(); } - m = new ExternalMetric("METEOR", "java -Xmx1536m -jar " + meteor_jar_path + " - - -mira -lower -t tune -l en"); + m = new ExternalMetric("METEOR", string("java -Xmx1536m -jar ") + meteor_jar_path + " - - -mira -lower -t tune -l en"); +#else + cerr << "cdec was not built with the --with-meteor option." << endl; + abort(); +#endif } else if (metric_id.find("COMB:") == 0) { m = new CombinationMetric(metric_id); } else if (metric_id == "CER") { -- cgit v1.2.3 From 95c364f2cb002241c4a62bedb1c5ef6f1e9a7f22 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 16 Feb 2013 03:40:30 -0500 Subject: add thing --- mteval/ns.cc | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mteval/ns.cc') diff --git a/mteval/ns.cc b/mteval/ns.cc index 7d73061c..d8214558 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -3,6 +3,7 @@ #include "ns_ext.h" #include "ns_comb.h" #include "ns_cer.h" +#include "ns_ssk.h" #include #include @@ -252,6 +253,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { m = new BleuMetric<4, NIST>; } else if (metric_id == "KOEHN_BLEU") { m = new BleuMetric<4, Koehn>; + } else if (metric_id == "SSK") { + m = new SSKMetric; } else if (metric_id == "TER") { m = new TERMetric; } else if (metric_id == "METEOR") { -- cgit v1.2.3 From 8628a6ed6e2e1179d0927c35a2b2d059b254d034 Mon Sep 17 00:00:00 2001 From: Jeff Flanigan Date: Thu, 21 Feb 2013 19:51:06 -0500 Subject: Add QCRI_BLEU --- mteval/ns.cc | 25 +++++++++++++++++++------ python/pkg/cdec/score.py | 2 +- python/src/mteval.pxi | 2 ++ 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'mteval/ns.cc') diff --git a/mteval/ns.cc b/mteval/ns.cc index d8214558..b64d4798 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -61,7 +61,7 @@ string EvaluationMetric::DetailedScore(const SufficientStats& stats) const { return os.str(); } -enum BleuType { IBM, Koehn, NIST }; +enum BleuType { IBM, Koehn, NIST, QCRI }; template struct BleuSegmentEvaluator : public SegmentEvaluator { BleuSegmentEvaluator(const vector >& refs, const EvaluationMetric* em) : evaluation_metric(em) { @@ -91,7 +91,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { float& ref_len = out->fields[2*N + 1]; hyp_len = hyp.size(); ref_len = lengths_[0]; - if (lengths_.size() > 1 && BrevityType == IBM) { + if (lengths_.size() > 1 && (BrevityType == IBM || BrevityType == QCRI)) { float bestd = 2000000; float hl = hyp.size(); float bl = -1; @@ -186,7 +186,7 @@ struct BleuSegmentEvaluator : public SegmentEvaluator { template struct BleuMetric : public EvaluationMetric { - BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : "NIST_BLEU")) {} + BleuMetric() : EvaluationMetric(BrevityType == IBM ? "IBM_BLEU" : (BrevityType == Koehn ? "KOEHN_BLEU" : (BrevityType == NIST ? "NIST_BLEU" : "QCRI_BLEU"))) {} unsigned SufficientStatisticsVectorSize() const { return N*2 + 2; } boost::shared_ptr CreateSegmentEvaluator(const vector >& refs) const { return boost::shared_ptr(new BleuSegmentEvaluator(refs, this)); @@ -194,26 +194,37 @@ struct BleuMetric : public EvaluationMetric { float ComputeBreakdown(const SufficientStats& stats, float* bp, vector* out) const { if (out) { out->clear(); } float log_bleu = 0; + float log_bleu_adj = 0; // for QCRI int count = 0; + float alpha = BrevityType == QCRI ? 1 : 0.01; for (int i = 0; i < N; ++i) { if (stats.fields[i+N] > 0) { float cor_count = stats.fields[i]; // correct_ngram_hit_counts[i]; // smooth bleu - if (!cor_count) { cor_count = 0.01; } + if (!cor_count) { cor_count = alpha; } float lprec = log(cor_count) - log(stats.fields[i+N]); // log(hyp_ngram_counts[i]); if (out) out->push_back(exp(lprec)); log_bleu += lprec; + if (BrevityType == QCRI) + log_bleu_adj += log(alpha) - log(stats.fields[i+N] + alpha); ++count; } } log_bleu /= count; + log_bleu_adj /= count; float lbp = 0.0; const float& hyp_len = stats.fields[2*N]; const float& ref_len = stats.fields[2*N + 1]; - if (hyp_len < ref_len) - lbp = (hyp_len - ref_len) / hyp_len; + if (hyp_len < ref_len) { + if (BrevityType == QCRI) + lbp = (hyp_len - ref_len - alpha) / hyp_len; + else + lbp = (hyp_len - ref_len) / hyp_len; + } log_bleu += lbp; if (bp) *bp = exp(lbp); + if (BrevityType == QCRI) + return exp(log_bleu) - exp(lbp + log_bleu_adj); return exp(log_bleu); } string DetailedScore(const SufficientStats& stats) const { @@ -253,6 +264,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { m = new BleuMetric<4, NIST>; } else if (metric_id == "KOEHN_BLEU") { m = new BleuMetric<4, Koehn>; + } else if (metric_id == "QCRI_BLEU") { + m = new BleuMetric<4, QCRI>; } else if (metric_id == "SSK") { m = new SSKMetric; } else if (metric_id == "TER") { diff --git a/python/pkg/cdec/score.py b/python/pkg/cdec/score.py index 22257774..829dfdfd 100644 --- a/python/pkg/cdec/score.py +++ b/python/pkg/cdec/score.py @@ -1 +1 @@ -from _cdec import BLEU, TER, CER, Metric +from _cdec import BLEU, TER, CER, SSK, QCRI, Metric diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi index f3bec393..436a1e01 100644 --- a/python/src/mteval.pxi +++ b/python/src/mteval.pxi @@ -192,5 +192,7 @@ cdef class Metric: return [] BLEU = Scorer('IBM_BLEU') +QCRI = Scorer('QCRI_BLEU') TER = Scorer('TER') CER = Scorer('CER') +SSK = Scorer('SSK') -- cgit v1.2.3