diff options
author | Patrick Simianer <p@simianer.de> | 2014-10-13 19:03:48 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-10-13 19:03:48 +0100 |
commit | cb9fb7088dde35881516c088db402abe747d49fa (patch) | |
tree | a91e4935a7941f1b261f76d88ab41fa3078a1891 /mteval | |
parent | 0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff) | |
parent | b1ed81ef3216b212295afa76c5d20a56fb647204 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'mteval')
-rw-r--r-- | mteval/Makefile.am | 3 | ||||
-rw-r--r-- | mteval/aer_scorer.h | 4 | ||||
-rw-r--r-- | mteval/comb_scorer.h | 4 | ||||
-rw-r--r-- | mteval/external_scorer.h | 4 | ||||
-rw-r--r-- | mteval/levenshtein.h | 29 | ||||
-rw-r--r-- | mteval/ns.cc | 3 | ||||
-rw-r--r-- | mteval/ns.h | 4 | ||||
-rw-r--r-- | mteval/ns_cer.cc | 26 | ||||
-rw-r--r-- | mteval/ns_cer.h | 7 | ||||
-rw-r--r-- | mteval/ns_comb.h | 4 | ||||
-rw-r--r-- | mteval/ns_docscorer.h | 4 | ||||
-rw-r--r-- | mteval/ns_ext.h | 4 | ||||
-rw-r--r-- | mteval/ns_ssk.h | 4 | ||||
-rw-r--r-- | mteval/ns_ter.h | 4 | ||||
-rw-r--r-- | mteval/ns_wer.cc | 35 | ||||
-rw-r--r-- | mteval/ns_wer.h | 20 | ||||
-rw-r--r-- | mteval/ter.h | 4 |
17 files changed, 116 insertions, 47 deletions
diff --git a/mteval/Makefile.am b/mteval/Makefile.am index c833eb01..aac3e6b5 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -14,6 +14,7 @@ libmteval_a_SOURCES = \ aer_scorer.h \ comb_scorer.h \ external_scorer.h \ + levenshtein.h \ ns.h \ ns_cer.h \ ns_comb.h \ @@ -21,6 +22,7 @@ libmteval_a_SOURCES = \ ns_ext.h \ ns_ssk.h \ ns_ter.h \ + ns_wer.h \ scorer.h \ ter.h \ aer_scorer.cc \ @@ -34,6 +36,7 @@ libmteval_a_SOURCES = \ ns_ext.cc \ ns_ssk.cc \ ns_ter.cc \ + ns_wer.cc \ scorer.cc \ ter.cc diff --git a/mteval/aer_scorer.h b/mteval/aer_scorer.h index 6d53d359..cd1238f3 100644 --- a/mteval/aer_scorer.h +++ b/mteval/aer_scorer.h @@ -1,5 +1,5 @@ -#ifndef _AER_SCORER_ -#define _AER_SCORER_ +#ifndef AER_SCORER_ +#define AER_SCORER_ #include <boost/shared_ptr.hpp> diff --git a/mteval/comb_scorer.h b/mteval/comb_scorer.h index 346be576..d17d089d 100644 --- a/mteval/comb_scorer.h +++ b/mteval/comb_scorer.h @@ -1,5 +1,5 @@ -#ifndef _COMB_SCORER_ -#define _COMB_SCORER_ +#ifndef COMB_SCORER_H_ +#define COMB_SCORER_H_ #include "scorer.h" diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h index 85535655..9565d5af 100644 --- a/mteval/external_scorer.h +++ b/mteval/external_scorer.h @@ -1,5 +1,5 @@ -#ifndef _EXTERNAL_SCORER_H_ -#define _EXTERNAL_SCORER_H_ +#ifndef EXTERNAL_SCORER_H_ +#define EXTERNAL_SCORER_H_ #include <vector> #include <string> diff --git a/mteval/levenshtein.h b/mteval/levenshtein.h new file mode 100644 index 00000000..3ae56cf5 --- /dev/null +++ b/mteval/levenshtein.h @@ -0,0 +1,29 @@ +#ifndef LEVENSHTEIN_H_ +#define LEVENSHTEIN_H_ + +namespace cdec { + +template <typename V> +inline unsigned LevenshteinDistance(const V& a, const V& b) { + const unsigned m = a.size(), n = b.size(); + std::vector<unsigned> edit((m + 1) * 2); + for (unsigned i = 0; i <= n; i++) { + for (unsigned j = 0; j <= m; j++) { + if (i == 0) + edit[j] = j; + else if (j == 0) + edit[(i % 2) * (m + 1)] = i; + else + edit[(i % 2) * (m + 1) + j] = std::min(std::min( + edit[(i % 2) * (m + 1) + j - 1] + 1, + edit[((i - 1) % 2) * (m + 1) + j] + 1), + edit[((i - 1) % 2) * (m + 1) + (j - 1)] + + (a[j - 1] == b[i - 1] ? 0 : 1)); + } + } + return edit[(n % 2) * (m + 1) + m]; +} + +} + +#endif diff --git a/mteval/ns.cc b/mteval/ns.cc index c1ea238b..075e0121 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -3,6 +3,7 @@ #include "ns_ext.h" #include "ns_comb.h" #include "ns_cer.h" +#include "ns_wer.h" #include "ns_ssk.h" #include <cstdio> @@ -285,6 +286,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { m = new CombinationMetric(metric_id); } else if (metric_id == "CER") { m = new CERMetric; + } else if (metric_id == "WER") { + m = new WERMetric; } else { cerr << "Implement please: " << metric_id << endl; abort(); diff --git a/mteval/ns.h b/mteval/ns.h index 153bf0b8..f6329b65 100644 --- a/mteval/ns.h +++ b/mteval/ns.h @@ -1,5 +1,5 @@ -#ifndef _NS_H_ -#define _NS_H_ +#ifndef NS_H_ +#define NS_H_ #include <string> #include <vector> diff --git a/mteval/ns_cer.cc b/mteval/ns_cer.cc index a843d471..da6683b1 100644 --- a/mteval/ns_cer.cc +++ b/mteval/ns_cer.cc @@ -1,5 +1,6 @@ #include "ns_cer.h" #include "tdict.h" +#include "levenshtein.h" static const unsigned kNUMFIELDS = 2; static const unsigned kEDITDISTANCE = 0; @@ -13,27 +14,6 @@ unsigned CERMetric::SufficientStatisticsVectorSize() const { return 2; } -unsigned CERMetric::EditDistance(const std::string& hyp, - const std::string& ref) const { - const unsigned m = hyp.size(), n = ref.size(); - std::vector<unsigned> edit((m + 1) * 2); - for(unsigned i = 0; i < n + 1; i++) { - for(unsigned j = 0; j < m + 1; j++) { - if(i == 0) - edit[j] = j; - else if(j == 0) - edit[(i%2)*(m+1)] = i; - else - edit[(i%2)*(m+1) + j] = std::min(std::min(edit[(i%2)*(m+1) + j-1] + 1, - edit[((i-1)%2)*(m+1) + j] + 1), - edit[((i-1)%2)*(m+1) + (j-1)] - + (hyp[j-1] == ref[i-1] ? 0 : 1)); - - } - } - return edit[(n%2)*(m+1) + m]; -} - void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const { @@ -42,7 +22,7 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, float best_score = hyp_str.size(); for (size_t i = 0; i < refs.size(); ++i) { std::string ref_str(TD::GetString(refs[i])); - float score = EditDistance(hyp_str, ref_str); + float score = cdec::LevenshteinDistance(hyp_str, ref_str); if (score < best_score) { out->fields[kEDITDISTANCE] = score; out->fields[kCHARCOUNT] = ref_str.size(); @@ -50,6 +30,8 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, } } } + float CERMetric::ComputeScore(const SufficientStats& stats) const { return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT]; } + diff --git a/mteval/ns_cer.h b/mteval/ns_cer.h index 9d211181..d9927f78 100644 --- a/mteval/ns_cer.h +++ b/mteval/ns_cer.h @@ -1,13 +1,10 @@ -#ifndef _NS_CER_H_ -#define _NS_CER_H_ +#ifndef NS_CER_H_ +#define NS_CER_H_ #include "ns.h" class CERMetric : public EvaluationMetric { friend class EvaluationMetric; - private: - unsigned EditDistance(const std::string& hyp, - const std::string& ref) const; protected: CERMetric() : EvaluationMetric("CER") {} diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h index 140e7e6a..22cba169 100644 --- a/mteval/ns_comb.h +++ b/mteval/ns_comb.h @@ -1,5 +1,5 @@ -#ifndef _NS_COMB_H_ -#define _NS_COMB_H_ +#ifndef NS_COMB_H_ +#define NS_COMB_H_ #include "ns.h" diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h index b3c28fc9..5feae2df 100644 --- a/mteval/ns_docscorer.h +++ b/mteval/ns_docscorer.h @@ -1,5 +1,5 @@ -#ifndef _NS_DOC_SCORER_H_ -#define _NS_DOC_SCORER_H_ +#ifndef NS_DOC_SCORER_H_ +#define NS_DOC_SCORER_H_ #include <vector> #include <string> diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h index 78badb2e..77be14b9 100644 --- a/mteval/ns_ext.h +++ b/mteval/ns_ext.h @@ -1,5 +1,5 @@ -#ifndef _NS_EXTERNAL_SCORER_H_ -#define _NS_EXTERNAL_SCORER_H_ +#ifndef NS_EXTERNAL_SCORER_H_ +#define NS_EXTERNAL_SCORER_H_ #include "ns.h" diff --git a/mteval/ns_ssk.h b/mteval/ns_ssk.h index 0d418770..fdace6eb 100644 --- a/mteval/ns_ssk.h +++ b/mteval/ns_ssk.h @@ -1,5 +1,5 @@ -#ifndef _NS_SSK_H_ -#define _NS_SSK_H_ +#ifndef NS_SSK_H_ +#define NS_SSK_H_ #include "ns.h" diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h index c5c25413..cffd1bd7 100644 --- a/mteval/ns_ter.h +++ b/mteval/ns_ter.h @@ -1,5 +1,5 @@ -#ifndef _NS_TER_H_ -#define _NS_TER_H_ +#ifndef NS_TER_H_ +#define NS_TER_H_ #include "ns.h" diff --git a/mteval/ns_wer.cc b/mteval/ns_wer.cc new file mode 100644 index 00000000..f9b2bbbb --- /dev/null +++ b/mteval/ns_wer.cc @@ -0,0 +1,35 @@ +#include "ns_wer.h" +#include "tdict.h" +#include "levenshtein.h" + +static const unsigned kNUMFIELDS = 2; +static const unsigned kEDITDISTANCE = 0; +static const unsigned kCHARCOUNT = 1; + +bool WERMetric::IsErrorMetric() const { + return true; +} + +unsigned WERMetric::SufficientStatisticsVectorSize() const { + return 2; +} + +void WERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, + const std::vector<std::vector<WordID> >& refs, + SufficientStats* out) const { + out->fields.resize(kNUMFIELDS); + float best_score = hyp.size(); + for (size_t i = 0; i < refs.size(); ++i) { + float score = cdec::LevenshteinDistance(hyp, refs[i]); + if (score < best_score) { + out->fields[kEDITDISTANCE] = score; + out->fields[kCHARCOUNT] = refs[i].size(); + best_score = score; + } + } +} + +float WERMetric::ComputeScore(const SufficientStats& stats) const { + return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT]; +} + diff --git a/mteval/ns_wer.h b/mteval/ns_wer.h new file mode 100644 index 00000000..45da70c5 --- /dev/null +++ b/mteval/ns_wer.h @@ -0,0 +1,20 @@ +#ifndef NS_WER_H_ +#define NS_WER_H_ + +#include "ns.h" + +class WERMetric : public EvaluationMetric { + friend class EvaluationMetric; + protected: + WERMetric() : EvaluationMetric("WER") {} + + public: + virtual bool IsErrorMetric() const; + virtual unsigned SufficientStatisticsVectorSize() const; + virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, + const std::vector<std::vector<WordID> >& refs, + SufficientStats* out) const; + virtual float ComputeScore(const SufficientStats& stats) const; +}; + +#endif diff --git a/mteval/ter.h b/mteval/ter.h index 43314791..0758c6b6 100644 --- a/mteval/ter.h +++ b/mteval/ter.h @@ -1,5 +1,5 @@ -#ifndef _TER_H_ -#define _TER_H_ +#ifndef TER_H_ +#define TER_H_ #include "scorer.h" |