summaryrefslogtreecommitdiff
path: root/python/src/mteval.pxi
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
committerKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
commit122f46c31102b683eaab3ad81a3a98accbc694bb (patch)
tree8d499d789b159ebed25bb23b6983813d064a6296 /python/src/mteval.pxi
parentac664bdb0e481539cf77098a7dd0e1ec8d937ba0 (diff)
parent193d137056c3c4f73d66f8db84691d63307de894 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/src/mteval.pxi')
-rw-r--r--python/src/mteval.pxi192
1 files changed, 192 insertions, 0 deletions
diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi
new file mode 100644
index 00000000..cd1c3c81
--- /dev/null
+++ b/python/src/mteval.pxi
@@ -0,0 +1,192 @@
+cimport mteval
+
+cdef SufficientStats as_stats(x, y):
+ if isinstance(x, SufficientStats):
+ return x
+ elif x == 0 and isinstance(y, SufficientStats):
+ stats = SufficientStats()
+ stats.stats = new mteval.SufficientStats()
+ stats.metric = (<SufficientStats> y).metric
+ return stats
+
+cdef class Candidate:
+ cdef mteval.const_Candidate* candidate
+ cdef public float score
+
+ property words:
+ def __get__(self):
+ return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8')
+
+ property fmap:
+ def __get__(self):
+ cdef SparseVector fmap = SparseVector.__new__(SparseVector)
+ fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap)
+ return fmap
+
+cdef class SufficientStats:
+ cdef mteval.SufficientStats* stats
+ cdef mteval.EvaluationMetric* metric
+
+ def __dealloc__(self):
+ del self.stats
+
+ property score:
+ def __get__(self):
+ return self.metric.ComputeScore(self.stats[0])
+
+ property detail:
+ def __get__(self):
+ return self.metric.DetailedScore(self.stats[0]).c_str()
+
+ def __len__(self):
+ return self.stats.size()
+
+ def __iter__(self):
+ for i in range(len(self)):
+ yield self[i]
+
+ def __getitem__(self, int index):
+ if not 0 <= index < len(self):
+ raise IndexError('sufficient stats vector index out of range')
+ return self.stats[0][index]
+
+ def __iadd__(SufficientStats self, SufficientStats other):
+ self.stats[0] += other.stats[0]
+ return self
+
+ def __add__(x, y):
+ cdef SufficientStats sx = as_stats(x, y)
+ cdef SufficientStats sy = as_stats(y, x)
+ cdef SufficientStats result = SufficientStats()
+ result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0]))
+ result.metric = sx.metric
+ return result
+
+cdef class CandidateSet:
+ cdef shared_ptr[mteval.SegmentEvaluator]* scorer
+ cdef mteval.EvaluationMetric* metric
+ cdef mteval.CandidateSet* cs
+
+ def __cinit__(self, SegmentEvaluator evaluator):
+ self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0])
+ self.metric = evaluator.metric
+ self.cs = new mteval.CandidateSet()
+
+ def __dealloc__(self):
+ del self.scorer
+ del self.cs
+
+ def __len__(self):
+ return self.cs.size()
+
+ def __getitem__(self,int k):
+ if not 0 <= k < self.cs.size():
+ raise IndexError('candidate set index out of range')
+ cdef Candidate candidate = Candidate()
+ candidate.candidate = &self.cs[0][k]
+ candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats)
+ return candidate
+
+ def __iter__(self):
+ cdef unsigned i
+ for i in range(len(self)):
+ yield self[i]
+
+ def add_kbest(self, Hypergraph hypergraph, unsigned k):
+ self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get())
+
+cdef class SegmentEvaluator:
+ cdef shared_ptr[mteval.SegmentEvaluator]* scorer
+ cdef mteval.EvaluationMetric* metric
+
+ def __dealloc__(self):
+ del self.scorer
+
+ def evaluate(self, sentence):
+ cdef vector[WordID] hyp
+ cdef SufficientStats sf = SufficientStats()
+ sf.metric = self.metric
+ sf.stats = new mteval.SufficientStats()
+ ConvertSentence(string(as_str(sentence.strip())), &hyp)
+ self.scorer.get().Evaluate(hyp, sf.stats)
+ return sf
+
+ def candidate_set(self):
+ return CandidateSet(self)
+
+cdef class Scorer:
+ cdef string* name
+ cdef mteval.EvaluationMetric* metric
+
+ def __cinit__(self, bytes name=None):
+ if name:
+ self.name = new string(name)
+ self.metric = mteval.MetricInstance(self.name[0])
+
+ def __dealloc__(self):
+ del self.name
+
+ def __call__(self, refs):
+ if isinstance(refs, unicode) or isinstance(refs, str):
+ refs = [refs]
+ cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()
+ cdef vector[WordID]* refv
+ cdef bytes ref_str
+ for ref in refs:
+ refv = new vector[WordID]()
+ ConvertSentence(string(as_str(ref.strip())), refv)
+ refsv.push_back(refv[0])
+ del refv
+ cdef unsigned i
+ cdef SegmentEvaluator evaluator = SegmentEvaluator()
+ evaluator.metric = self.metric
+ evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](
+ self.metric.CreateSegmentEvaluator(refsv[0]))
+ del refsv # in theory should not delete but store in SegmentEvaluator
+ return evaluator
+
+ def __str__(self):
+ return self.name.c_str()
+
+cdef float _compute_score(void* metric_, mteval.SufficientStats* stats):
+ cdef Metric metric = <Metric> metric_
+ cdef list ss = []
+ cdef unsigned i
+ for i in range(stats.size()):
+ ss.append(stats[0][i])
+ return metric.score(ss)
+
+cdef void _compute_sufficient_stats(void* metric_,
+ string* hyp,
+ vector[string]* refs,
+ mteval.SufficientStats* out):
+ cdef Metric metric = <Metric> metric_
+ cdef list refs_ = []
+ cdef unsigned i
+ for i in range(refs.size()):
+ refs_.append(refs[0][i].c_str())
+ cdef list ss = metric.evaluate(hyp.c_str(), refs_)
+ out.fields.resize(len(ss))
+ for i in range(len(ss)):
+ out.fields[i] = ss[i]
+
+cdef class Metric:
+ cdef Scorer scorer
+ def __cinit__(self):
+ self.scorer = Scorer()
+ self.scorer.name = new string(as_str(self.__class__.__name__))
+ self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0],
+ <void*> self, _compute_sufficient_stats, _compute_score)
+
+ def __call__(self, refs):
+ return self.scorer(refs)
+
+ def score(SufficientStats stats):
+ return 0
+
+ def evaluate(self, hyp, refs):
+ return []
+
+BLEU = Scorer('IBM_BLEU')
+TER = Scorer('TER')
+CER = Scorer('CER')