1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
#ifndef NS_H_
#define NS_H_
#include <string>
#include <vector>
#include <map>
#include <boost/shared_ptr.hpp>
#include "wordid.h"
#include <iostream>
std::vector<WordID> Characterize(const std::vector<WordID>& reference);
std::vector<std::vector<WordID> > Characterize(const std::vector<std::vector<WordID> >& references);
class SufficientStats {
public:
SufficientStats() : id_() {}
explicit SufficientStats(const std::string& encoded);
SufficientStats(const std::string& mid, const std::vector<float>& f) :
id_(mid), fields(f) {}
SufficientStats& operator+=(const SufficientStats& delta) {
if (id_.empty() && delta.id_.size()) id_ = delta.id_;
if (fields.size() != delta.fields.size())
fields.resize(std::max(fields.size(), delta.fields.size()));
for (unsigned i = 0; i < delta.fields.size(); ++i)
fields[i] += delta.fields[i];
return *this;
}
SufficientStats& operator-=(const SufficientStats& delta) {
if (id_.empty() && delta.id_.size()) id_ = delta.id_;
if (fields.size() != delta.fields.size())
fields.resize(std::max(fields.size(), delta.fields.size()));
for (unsigned i = 0; i < delta.fields.size(); ++i)
fields[i] -= delta.fields[i];
return *this;
}
SufficientStats& operator*=(const double& scalar) {
for (unsigned i = 0; i < fields.size(); ++i)
fields[i] *= scalar;
return *this;
}
SufficientStats& operator/=(const double& scalar) {
for (unsigned i = 0; i < fields.size(); ++i)
fields[i] /= scalar;
return *this;
}
bool operator==(const SufficientStats& other) const {
return other.fields == fields;
}
bool IsAdditiveIdentity() const {
for (unsigned i = 0; i < fields.size(); ++i)
if (fields[i]) return false;
return true;
}
size_t size() const { return fields.size(); }
float operator[](size_t i) const {
if (i < fields.size()) return fields[i];
return 0;
}
void Encode(std::string* out) const;
void swap(SufficientStats& other) {
id_.swap(other.id_);
fields.swap(other.fields);
}
std::string id_;
std::vector<float> fields;
};
inline const SufficientStats operator+(const SufficientStats& a, const SufficientStats& b) {
SufficientStats res(a);
return res += b;
}
inline const SufficientStats operator-(const SufficientStats& a, const SufficientStats& b) {
SufficientStats res(a);
return res -= b;
}
struct SegmentEvaluator {
virtual ~SegmentEvaluator();
virtual void Evaluate(const std::vector<WordID>& hyp, SufficientStats* out) const = 0;
std::string src; // this may not always be available
};
// Instructions for implementing a new metric
// To Instance(), add something that creates the metric
// Implement ComputeScore(const SufficientStats& stats) const;
// Implement ONE of the following:
// 1) void ComputeSufficientStatistics(const std::vector<std::vector<WordID> >& refs, SufficientStats* out) const;
// 2) a new SegmentEvaluator class AND CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
// [The later (#2) is only used when it is necessary to precompute per-segment data from a set of refs]
// OPTIONAL: Override SufficientStatisticsVectorSize() if it is easy to do so
class EvaluationMetric {
public:
static EvaluationMetric* Instance(const std::string& metric_id = "IBM_BLEU");
protected:
EvaluationMetric(const std::string& id) : name_(id) {}
virtual ~EvaluationMetric();
public:
const std::string& MetricId() const { return name_; }
// returns true for metrics like WER and TER where lower scores are better
// false for metrics like BLEU and METEOR where higher scores are better
virtual bool IsErrorMetric() const;
virtual unsigned SufficientStatisticsVectorSize() const;
virtual float ComputeScore(const SufficientStats& stats) const = 0;
virtual std::string DetailedScore(const SufficientStats& stats) const;
virtual boost::shared_ptr<SegmentEvaluator> CreateSegmentEvaluator(const std::vector<std::vector<WordID> >& refs) const;
virtual void ComputeSufficientStatistics(const std::vector<WordID>& hyp,
const std::vector<std::vector<WordID> >& refs,
SufficientStats* out) const;
private:
static std::map<std::string, EvaluationMetric*> instances_;
const std::string name_;
};
#endif
|