diff options
Diffstat (limited to 'mteval')
| -rw-r--r-- | mteval/Makefile.am | 3 | ||||
| -rw-r--r-- | mteval/aer_scorer.h | 4 | ||||
| -rw-r--r-- | mteval/comb_scorer.h | 4 | ||||
| -rw-r--r-- | mteval/external_scorer.h | 4 | ||||
| -rw-r--r-- | mteval/levenshtein.h | 29 | ||||
| -rw-r--r-- | mteval/ns.cc | 3 | ||||
| -rw-r--r-- | mteval/ns.h | 4 | ||||
| -rw-r--r-- | mteval/ns_cer.cc | 26 | ||||
| -rw-r--r-- | mteval/ns_cer.h | 7 | ||||
| -rw-r--r-- | mteval/ns_comb.h | 4 | ||||
| -rw-r--r-- | mteval/ns_docscorer.h | 4 | ||||
| -rw-r--r-- | mteval/ns_ext.h | 4 | ||||
| -rw-r--r-- | mteval/ns_ssk.h | 4 | ||||
| -rw-r--r-- | mteval/ns_ter.h | 4 | ||||
| -rw-r--r-- | mteval/ns_wer.cc | 35 | ||||
| -rw-r--r-- | mteval/ns_wer.h | 20 | ||||
| -rw-r--r-- | mteval/ter.h | 4 | 
17 files changed, 116 insertions, 47 deletions
| diff --git a/mteval/Makefile.am b/mteval/Makefile.am index c833eb01..aac3e6b5 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -14,6 +14,7 @@ libmteval_a_SOURCES = \    aer_scorer.h \    comb_scorer.h \    external_scorer.h \ +  levenshtein.h \    ns.h \    ns_cer.h \    ns_comb.h \ @@ -21,6 +22,7 @@ libmteval_a_SOURCES = \    ns_ext.h \    ns_ssk.h \    ns_ter.h \ +  ns_wer.h \    scorer.h \    ter.h \    aer_scorer.cc \ @@ -34,6 +36,7 @@ libmteval_a_SOURCES = \    ns_ext.cc \    ns_ssk.cc \    ns_ter.cc \ +  ns_wer.cc \    scorer.cc \    ter.cc diff --git a/mteval/aer_scorer.h b/mteval/aer_scorer.h index 6d53d359..cd1238f3 100644 --- a/mteval/aer_scorer.h +++ b/mteval/aer_scorer.h @@ -1,5 +1,5 @@ -#ifndef _AER_SCORER_ -#define _AER_SCORER_ +#ifndef AER_SCORER_ +#define AER_SCORER_  #include <boost/shared_ptr.hpp> diff --git a/mteval/comb_scorer.h b/mteval/comb_scorer.h index 346be576..d17d089d 100644 --- a/mteval/comb_scorer.h +++ b/mteval/comb_scorer.h @@ -1,5 +1,5 @@ -#ifndef _COMB_SCORER_ -#define _COMB_SCORER_ +#ifndef COMB_SCORER_H_ +#define COMB_SCORER_H_  #include "scorer.h" diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h index 85535655..9565d5af 100644 --- a/mteval/external_scorer.h +++ b/mteval/external_scorer.h @@ -1,5 +1,5 @@ -#ifndef _EXTERNAL_SCORER_H_ -#define _EXTERNAL_SCORER_H_ +#ifndef EXTERNAL_SCORER_H_ +#define EXTERNAL_SCORER_H_  #include <vector>  #include <string> diff --git a/mteval/levenshtein.h b/mteval/levenshtein.h new file mode 100644 index 00000000..3ae56cf5 --- /dev/null +++ b/mteval/levenshtein.h @@ -0,0 +1,29 @@ +#ifndef LEVENSHTEIN_H_ +#define LEVENSHTEIN_H_ + +namespace cdec { + +template <typename V> +inline unsigned LevenshteinDistance(const V& a, const V& b) { +  const unsigned m = a.size(), n = b.size(); +  std::vector<unsigned> edit((m + 1) * 2); +  for (unsigned i = 0; i <= n; i++) { +    for (unsigned j = 0; j <= m; j++) { +      if (i == 0) +        edit[j] = j; +      else if (j == 0) +        edit[(i % 2) * (m + 1)] = i; +      else +        edit[(i % 2) * (m + 1) + j] = std::min(std::min( +                                edit[(i % 2) * (m + 1) + j - 1] + 1, +                                edit[((i - 1) % 2) * (m + 1) + j] + 1), +                                edit[((i - 1) % 2) * (m + 1) + (j - 1)]  +                                    + (a[j - 1] == b[i - 1] ? 0 : 1)); +    } +  } +  return edit[(n % 2) * (m + 1) + m]; +} + +} + +#endif diff --git a/mteval/ns.cc b/mteval/ns.cc index c1ea238b..075e0121 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -3,6 +3,7 @@  #include "ns_ext.h"  #include "ns_comb.h"  #include "ns_cer.h" +#include "ns_wer.h"  #include "ns_ssk.h"  #include <cstdio> @@ -285,6 +286,8 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {        m = new CombinationMetric(metric_id);      } else if (metric_id == "CER") {        m = new CERMetric; +    } else if (metric_id == "WER") { +      m = new WERMetric;      } else {        cerr << "Implement please: " << metric_id << endl;        abort(); diff --git a/mteval/ns.h b/mteval/ns.h index 153bf0b8..f6329b65 100644 --- a/mteval/ns.h +++ b/mteval/ns.h @@ -1,5 +1,5 @@ -#ifndef _NS_H_ -#define _NS_H_ +#ifndef NS_H_ +#define NS_H_  #include <string>  #include <vector> diff --git a/mteval/ns_cer.cc b/mteval/ns_cer.cc index a843d471..da6683b1 100644 --- a/mteval/ns_cer.cc +++ b/mteval/ns_cer.cc @@ -1,5 +1,6 @@  #include "ns_cer.h"  #include "tdict.h" +#include "levenshtein.h"  static const unsigned kNUMFIELDS = 2;  static const unsigned kEDITDISTANCE = 0; @@ -13,27 +14,6 @@ unsigned CERMetric::SufficientStatisticsVectorSize() const {    return 2;  } -unsigned CERMetric::EditDistance(const std::string& hyp, -                                 const std::string& ref) const { -  const unsigned m = hyp.size(), n = ref.size(); -  std::vector<unsigned> edit((m + 1) * 2); -  for(unsigned i = 0; i < n + 1; i++) { -    for(unsigned j = 0; j < m + 1; j++) { -      if(i == 0) -        edit[j] = j; -      else if(j == 0) -        edit[(i%2)*(m+1)] = i; -      else -        edit[(i%2)*(m+1) + j] = std::min(std::min(edit[(i%2)*(m+1) + j-1] + 1, -                                                   edit[((i-1)%2)*(m+1) + j] + 1), -                                                   edit[((i-1)%2)*(m+1) + (j-1)]  -                                                   + (hyp[j-1] == ref[i-1] ? 0 : 1)); -       -    } -  } -  return edit[(n%2)*(m+1) + m]; -} -  void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,                                              const std::vector<std::vector<WordID> >& refs,                                              SufficientStats* out) const { @@ -42,7 +22,7 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,    float best_score = hyp_str.size();    for (size_t i = 0; i < refs.size(); ++i) {      std::string ref_str(TD::GetString(refs[i])); -    float score = EditDistance(hyp_str, ref_str); +    float score = cdec::LevenshteinDistance(hyp_str, ref_str);      if (score < best_score) {        out->fields[kEDITDISTANCE] = score;        out->fields[kCHARCOUNT] = ref_str.size(); @@ -50,6 +30,8 @@ void CERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp,      }    }  } +  float CERMetric::ComputeScore(const SufficientStats& stats) const {    return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT];  } + diff --git a/mteval/ns_cer.h b/mteval/ns_cer.h index 9d211181..d9927f78 100644 --- a/mteval/ns_cer.h +++ b/mteval/ns_cer.h @@ -1,13 +1,10 @@ -#ifndef _NS_CER_H_ -#define _NS_CER_H_ +#ifndef NS_CER_H_ +#define NS_CER_H_  #include "ns.h"  class CERMetric : public EvaluationMetric {    friend class EvaluationMetric; - private: -  unsigned EditDistance(const std::string& hyp, -                        const std::string& ref) const;   protected:    CERMetric() : EvaluationMetric("CER") {} diff --git a/mteval/ns_comb.h b/mteval/ns_comb.h index 140e7e6a..22cba169 100644 --- a/mteval/ns_comb.h +++ b/mteval/ns_comb.h @@ -1,5 +1,5 @@ -#ifndef _NS_COMB_H_ -#define _NS_COMB_H_ +#ifndef NS_COMB_H_ +#define NS_COMB_H_  #include "ns.h" diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h index b3c28fc9..5feae2df 100644 --- a/mteval/ns_docscorer.h +++ b/mteval/ns_docscorer.h @@ -1,5 +1,5 @@ -#ifndef _NS_DOC_SCORER_H_ -#define _NS_DOC_SCORER_H_ +#ifndef NS_DOC_SCORER_H_ +#define NS_DOC_SCORER_H_  #include <vector>  #include <string> diff --git a/mteval/ns_ext.h b/mteval/ns_ext.h index 78badb2e..77be14b9 100644 --- a/mteval/ns_ext.h +++ b/mteval/ns_ext.h @@ -1,5 +1,5 @@ -#ifndef _NS_EXTERNAL_SCORER_H_ -#define _NS_EXTERNAL_SCORER_H_ +#ifndef NS_EXTERNAL_SCORER_H_ +#define NS_EXTERNAL_SCORER_H_  #include "ns.h" diff --git a/mteval/ns_ssk.h b/mteval/ns_ssk.h index 0d418770..fdace6eb 100644 --- a/mteval/ns_ssk.h +++ b/mteval/ns_ssk.h @@ -1,5 +1,5 @@ -#ifndef _NS_SSK_H_ -#define _NS_SSK_H_ +#ifndef NS_SSK_H_ +#define NS_SSK_H_  #include "ns.h" diff --git a/mteval/ns_ter.h b/mteval/ns_ter.h index c5c25413..cffd1bd7 100644 --- a/mteval/ns_ter.h +++ b/mteval/ns_ter.h @@ -1,5 +1,5 @@ -#ifndef _NS_TER_H_ -#define _NS_TER_H_ +#ifndef NS_TER_H_ +#define NS_TER_H_  #include "ns.h" diff --git a/mteval/ns_wer.cc b/mteval/ns_wer.cc new file mode 100644 index 00000000..f9b2bbbb --- /dev/null +++ b/mteval/ns_wer.cc @@ -0,0 +1,35 @@ +#include "ns_wer.h" +#include "tdict.h" +#include "levenshtein.h" + +static const unsigned kNUMFIELDS = 2; +static const unsigned kEDITDISTANCE = 0; +static const unsigned kCHARCOUNT = 1; + +bool WERMetric::IsErrorMetric() const { +  return true; +} + +unsigned WERMetric::SufficientStatisticsVectorSize() const { +  return 2; +} + +void WERMetric::ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                            const std::vector<std::vector<WordID> >& refs, +                                            SufficientStats* out) const { +  out->fields.resize(kNUMFIELDS); +  float best_score = hyp.size(); +  for (size_t i = 0; i < refs.size(); ++i) { +    float score = cdec::LevenshteinDistance(hyp, refs[i]); +    if (score < best_score) { +      out->fields[kEDITDISTANCE] = score; +      out->fields[kCHARCOUNT] = refs[i].size(); +      best_score = score; +    } +  } +} + +float WERMetric::ComputeScore(const SufficientStats& stats) const { +  return stats.fields[kEDITDISTANCE] / stats.fields[kCHARCOUNT]; +} + diff --git a/mteval/ns_wer.h b/mteval/ns_wer.h new file mode 100644 index 00000000..45da70c5 --- /dev/null +++ b/mteval/ns_wer.h @@ -0,0 +1,20 @@ +#ifndef NS_WER_H_ +#define NS_WER_H_ + +#include "ns.h" + +class WERMetric : public EvaluationMetric { +  friend class EvaluationMetric; + protected: +  WERMetric() : EvaluationMetric("WER") {} + + public: +  virtual bool IsErrorMetric() const; +  virtual unsigned SufficientStatisticsVectorSize() const; +  virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp, +                                           const std::vector<std::vector<WordID> >& refs, +                                           SufficientStats* out) const; +  virtual float ComputeScore(const SufficientStats& stats) const; +}; + +#endif diff --git a/mteval/ter.h b/mteval/ter.h index 43314791..0758c6b6 100644 --- a/mteval/ter.h +++ b/mteval/ter.h @@ -1,5 +1,5 @@ -#ifndef _TER_H_ -#define _TER_H_ +#ifndef TER_H_ +#define TER_H_  #include "scorer.h" | 
