summaryrefslogtreecommitdiff
path: root/mteval
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
committerPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
commitcb9fb7088dde35881516c088db402abe747d49fa (patch)
treea91e4935a7941f1b261f76d88ab41fa3078a1891 /mteval
parent0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff)
parentb1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'mteval')
-rw-r--r--mteval/Makefile.am3
-rw-r--r--mteval/aer_scorer.h4
-rw-r--r--mteval/comb_scorer.h4
-rw-r--r--mteval/external_scorer.h4
-rw-r--r--mteval/levenshtein.h29
-rw-r--r--mteval/ns.cc3
-rw-r--r--mteval/ns.h4
-rw-r--r--mteval/ns_cer.cc26
-rw-r--r--mteval/ns_cer.h7
-rw-r--r--mteval/ns_comb.h4
-rw-r--r--mteval/ns_docscorer.h4
-rw-r--r--mteval/ns_ext.h4
-rw-r--r--mteval/ns_ssk.h4
-rw-r--r--mteval/ns_ter.h4
-rw-r--r--mteval/ns_wer.cc35
-rw-r--r--mteval/ns_wer.h20
-rw-r--r--mteval/ter.h4
17 files changed, 116 insertions, 47 deletions
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index c833eb01..aac3e6b5 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -14,6 +14,7 @@ libmteval_a_SOURCES = \
aer_scorer.h \
comb_scorer.h \
external_scorer.h \
+ levenshtein.h \
ns.h \
ns_cer.h \
ns_comb.h \
@@ -21,6 +22,7 @@ libmteval_a_SOURCES = \
ns_ext.h \
ns_ssk.h \
ns_ter.h \
+ ns_wer.h \
scorer.h \
ter.h \
aer_scorer.cc \
@@ -34,6 +36,7 @@ libmteval_a_SOURCES = \
ns_ext.cc \
ns_ssk.cc \
ns_ter.cc \
+ ns_wer.cc \
scorer.cc \
ter.cc
diff --git a/mteval/aer_scorer.h b/mteval/aer_scorer.h
index 6d53d359..cd1238f3 100644
--- a/mteval/aer_scorer.h
+++ b/mteval/aer_scorer.h
@@ -1,5 +1,5 @@
-#ifndef _AER_SCORER_
-#define _AER_SCORER_
+#ifndef AER_SCORER_
+#define AER_SCORER_
#include <boost/shared_ptr.hpp>
diff --git a/mteval/comb_scorer.h b/mteval/comb_scorer.h
index 346be576..d17d089d 100644
--- a/mteval/comb_scorer.h
+++ b/mteval/comb_scorer.h
@@ -1,5 +1,5 @@
-#ifndef _COMB_SCORER_
-#define _COMB_SCORER_
+#ifndef COMB_SCORER_H_
+#define COMB_SCORER_H_
#include "scorer.h"
diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h
index 85535655..9565d5af 100644
--- a/mteval/external_scorer.h
+++ b/mteval/external_scorer.h
@@ -1,5 +1,5 @@
-#ifndef _EXTERNAL_SCORER_H_
-#define _EXTERNAL_SCORER_H_
+#ifndef EXTERNAL_SCORER_H_
+#define EXTERNAL_SCORER_H_
#include <vector>
#include <string>
diff --git a/mteval/levenshtein.h b/mteval/levenshtein.h
new file mode 100644
index 00000000..3ae56cf5
--- /dev/null
+++ b/mteval/levenshtein.h
@@ -0,0 +1,29 @@
+#ifndef LEVENSHTEIN_H_
+#define LEVENSHTEIN_H_
+
+namespace cdec {
+
+template <typename V>
+inline unsigned LevenshteinDistance(const V& a, const V& b) {
+ const unsigned m = a.size(), n = b.size();
+ std::vector<unsigned> edit((m + 1) * 2);
+ for (unsigned i = 0; i <= n; i++) {
+ for (unsigned j = 0; j <= m; j++) {
+ if (i == 0)
+ edit[j] = j;
+ else if (j == 0)
+ edit[(i % 2) * (m + 1)] = i;
+ else
+ edit[(i % 2) * (m + 1) + j] = std::min(std::min(
+ edit[(i % 2) * (m + 1) + j - 1] + 1,
+ edit[((i - 1) % 2) * (m + 1) + j] + 1),
+ edit[((i - 1) % 2) * (m + 1) + (j - 1)]
+ + (a[j - 1] == b[i - 1] ? 0 : 1));
+ }
+ }
+ return edit[(n % 2) * (m + 1) + m];
+}
+
+}
+
+#endif
diff --git a/mteval/ns.cc b/mteval/ns.cc
index c1ea238b..075e0121 100644
--- a/mteval/ns.cc
+++ b/mteval/ns.cc
@@ -3,6 +3,7 @@
#include "ns_ext.h"
#include "ns_comb.h"
#include "ns_cer.h"
+#include "ns_wer.h"
#include "ns_ssk.h"
#include <cstdio>
@@ -285,6 +286,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {
m = new CombinationMetric(metric_id);
} else if (metric_id == "CER") {
m = new CERMetric;
+ } else if (metric_id == "WER") {
+ m = new WERMetric;
} else {
cerr << "Implement please: " << metric_id << endl;
abort();
diff --git a/mteval/ns.h b/mteval/ns.h
index 153bf0b8..f6329b65 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -1,5 +1,5 @@
-#ifndef _NS_H_
-#define _NS_H_
+#ifndef NS_H_
+#define NS_H_
#include <string>
#include <vector>
diff --git a/mteval/ns_cer.cc b/mteval/ns_cer.cc
index a843d471..da6683b1 100644
--- a/mteval/ns_cer.cc
+++ b/mteval/ns_cer.cc
@@ -1,5 +1,6 @@
#include "ns_cer.h"
#include "tdict.h"
+#include "levenshtein.h"
static const unsigned kNUMFIELDS = 2;
static const unsigned kEDITDISTANCE = 0;
@@ -13,27 +14,6 @@ unsigned CERMetric::SufficientStatisticsVectorSize() const {
return 2;
}
-unsigned CERMetric::EditDistance(const std::string& hyp,
- const std::string& ref) const {
- const unsigned m = hyp.size(), n = ref.size();
- std::vector<unsigned> edit((m + 1) * 2);
- for(unsigned i = 0; i < n + 1; i++) {
- for(unsigned j = 0; j < m + 1; j++) {
- if(i == 0)
- edit[j] = j;
- else if(j == 0)
- edit[(i%2)*(m+1)] = i;
- else
- edit[(i%2)*(m+1) + j] = std::min(std::min(edit[(i%2)*(m+1) + j-1] + 1,
- edit[((i-1)%2)*(m+1) + j] + 1),
- edit[((i-1)%2)*(m+1) + (j-1)]
- + (hyp[j-1] == ref[i-1] ? 0 : 1));
-
- }
- }
- return edit[(n%2)*(m+1) + m];
-}
-
void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
const std::vector<std::vector<WordID> >& refs,
SufficientStats* out) const {
@@ -42,7 +22,7 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
float best_score = hyp_str.size();
for (size_t i = 0; i < refs.size(); ++i) {
std::string ref_str(TD::GetString(refs[i]));
- float score = EditDistance(hyp_str, ref_str);
+ float score = cdec::LevenshteinDistance(hyp_str, ref_str);
if (score < best_score) {
out->fields[kEDITDISTANCE] = score;
out->fields[kCHARCOUNT] = ref_str.size();
@@ -50,6 +30,8 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
}
}
}
+
float CERMetric::ComputeScore(const SufficientStats& stats) const {
return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT];
}
+
diff --git a/mteval/ns_cer.h b/mteval/ns_cer.h
index 9d211181..d9927f78 100644
--- a/mteval/ns_cer.h
+++ b/mteval/ns_cer.h
@@ -1,13 +1,10 @@
-#ifndef _NS_CER_H_
-#define _NS_CER_H_
+#ifndef NS_CER_H_
+#define NS_CER_H_
#include "ns.h"
class CERMetric : public EvaluationMetric {
friend class EvaluationMetric;
- private:
- unsigned EditDistance(const std::string& hyp,
- const std::string& ref) const;
protected:
CERMetric() : EvaluationMetric("CER") {}
diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h
index 140e7e6a..22cba169 100644
--- a/mteval/ns_comb.h
+++ b/mteval/ns_comb.h
@@ -1,5 +1,5 @@
-#ifndef _NS_COMB_H_
-#define _NS_COMB_H_
+#ifndef NS_COMB_H_
+#define NS_COMB_H_
#include "ns.h"
diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h
index b3c28fc9..5feae2df 100644
--- a/mteval/ns_docscorer.h
+++ b/mteval/ns_docscorer.h
@@ -1,5 +1,5 @@
-#ifndef _NS_DOC_SCORER_H_
-#define _NS_DOC_SCORER_H_
+#ifndef NS_DOC_SCORER_H_
+#define NS_DOC_SCORER_H_
#include <vector>
#include <string>
diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h
index 78badb2e..77be14b9 100644
--- a/mteval/ns_ext.h
+++ b/mteval/ns_ext.h
@@ -1,5 +1,5 @@
-#ifndef _NS_EXTERNAL_SCORER_H_
-#define _NS_EXTERNAL_SCORER_H_
+#ifndef NS_EXTERNAL_SCORER_H_
+#define NS_EXTERNAL_SCORER_H_
#include "ns.h"
diff --git a/mteval/ns_ssk.h b/mteval/ns_ssk.h
index 0d418770..fdace6eb 100644
--- a/mteval/ns_ssk.h
+++ b/mteval/ns_ssk.h
@@ -1,5 +1,5 @@
-#ifndef _NS_SSK_H_
-#define _NS_SSK_H_
+#ifndef NS_SSK_H_
+#define NS_SSK_H_
#include "ns.h"
diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h
index c5c25413..cffd1bd7 100644
--- a/mteval/ns_ter.h
+++ b/mteval/ns_ter.h
@@ -1,5 +1,5 @@
-#ifndef _NS_TER_H_
-#define _NS_TER_H_
+#ifndef NS_TER_H_
+#define NS_TER_H_
#include "ns.h"
diff --git a/mteval/ns_wer.cc b/mteval/ns_wer.cc
new file mode 100644
index 00000000..f9b2bbbb
--- /dev/null
+++ b/mteval/ns_wer.cc
@@ -0,0 +1,35 @@
+#include "ns_wer.h"
+#include "tdict.h"
+#include "levenshtein.h"
+
+static const unsigned kNUMFIELDS = 2;
+static const unsigned kEDITDISTANCE = 0;
+static const unsigned kCHARCOUNT = 1;
+
+bool WERMetric::IsErrorMetric() const {
+ return true;
+}
+
+unsigned WERMetric::SufficientStatisticsVectorSize() const {
+ return 2;
+}
+
+void WERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const {
+ out->fields.resize(kNUMFIELDS);
+ float best_score = hyp.size();
+ for (size_t i = 0; i < refs.size(); ++i) {
+ float score = cdec::LevenshteinDistance(hyp, refs[i]);
+ if (score < best_score) {
+ out->fields[kEDITDISTANCE] = score;
+ out->fields[kCHARCOUNT] = refs[i].size();
+ best_score = score;
+ }
+ }
+}
+
+float WERMetric::ComputeScore(const SufficientStats& stats) const {
+ return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT];
+}
+
diff --git a/mteval/ns_wer.h b/mteval/ns_wer.h
new file mode 100644
index 00000000..45da70c5
--- /dev/null
+++ b/mteval/ns_wer.h
@@ -0,0 +1,20 @@
+#ifndef NS_WER_H_
+#define NS_WER_H_
+
+#include "ns.h"
+
+class WERMetric : public EvaluationMetric {
+ friend class EvaluationMetric;
+ protected:
+ WERMetric() : EvaluationMetric("WER") {}
+
+ public:
+ virtual bool IsErrorMetric() const;
+ virtual unsigned SufficientStatisticsVectorSize() const;
+ virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
+ const std::vector<std::vector<WordID> >& refs,
+ SufficientStats* out) const;
+ virtual float ComputeScore(const SufficientStats& stats) const;
+};
+
+#endif
diff --git a/mteval/ter.h b/mteval/ter.h
index 43314791..0758c6b6 100644
--- a/mteval/ter.h
+++ b/mteval/ter.h
@@ -1,5 +1,5 @@
-#ifndef _TER_H_
-#define _TER_H_
+#ifndef TER_H_
+#define TER_H_
#include "scorer.h"