cimport mteval cdef SufficientStats as_stats(x, y): if isinstance(x, SufficientStats): return x elif x == 0 and isinstance(y, SufficientStats): stats = SufficientStats() stats.stats = new mteval.SufficientStats() stats.metric = ( y).metric return stats cdef class Candidate: cdef mteval.const_Candidate* candidate cdef public float score property words: def __get__(self): return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8') property fmap: def __get__(self): cdef SparseVector fmap = SparseVector.__new__(SparseVector) fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap) return fmap cdef class SufficientStats: cdef mteval.SufficientStats* stats cdef mteval.EvaluationMetric* metric def __dealloc__(self): del self.stats property score: def __get__(self): return self.metric.ComputeScore(self.stats[0]) property detail: def __get__(self): return self.metric.DetailedScore(self.stats[0]).c_str() def __len__(self): return self.stats.size() def __iter__(self): for i in range(len(self)): yield self[i] def __getitem__(self, int index): if not 0 <= index < len(self): raise IndexError('sufficient stats vector index out of range') return self.stats[0][index] def __iadd__(SufficientStats self, SufficientStats other): self.stats[0] += other.stats[0] return self def __add__(x, y): cdef SufficientStats sx = as_stats(x, y) cdef SufficientStats sy = as_stats(y, x) cdef SufficientStats result = SufficientStats() result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) result.metric = sx.metric return result cdef class CandidateSet: cdef shared_ptr[mteval.SegmentEvaluator]* scorer cdef mteval.EvaluationMetric* metric cdef mteval.CandidateSet* cs def __cinit__(self, SegmentEvaluator evaluator): self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) self.metric = evaluator.metric self.cs = new mteval.CandidateSet() def __dealloc__(self): del self.scorer del self.cs def __len__(self): return self.cs.size() def __getitem__(self,int k): if not 0 <= k < self.cs.size(): raise IndexError('candidate set index out of range') cdef Candidate candidate = Candidate() candidate.candidate = &self.cs[0][k] candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) return candidate def __iter__(self): cdef unsigned i for i in range(len(self)): yield self[i] def add_kbest(self, Hypergraph hypergraph, unsigned k): self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) cdef class SegmentEvaluator: cdef shared_ptr[mteval.SegmentEvaluator]* scorer cdef mteval.EvaluationMetric* metric def __dealloc__(self): del self.scorer def evaluate(self, sentence): cdef vector[WordID] hyp cdef SufficientStats sf = SufficientStats() sf.metric = self.metric sf.stats = new mteval.SufficientStats() ConvertSentence(string(as_str(sentence.strip())), &hyp) self.scorer.get().Evaluate(hyp, sf.stats) return sf def candidate_set(self): return CandidateSet(self) cdef class Scorer: cdef string* name cdef mteval.EvaluationMetric* metric def __cinit__(self, bytes name=None): if name: self.name = new string(name) self.metric = mteval.MetricInstance(self.name[0]) def __dealloc__(self): del self.name def __call__(self, refs): if isinstance(refs, unicode) or isinstance(refs, str): refs = [refs] cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() cdef vector[WordID]* refv cdef bytes ref_str for ref in refs: refv = new vector[WordID]() ConvertSentence(string(as_str(ref.strip())), refv) refsv.push_back(refv[0]) del refv cdef unsigned i cdef SegmentEvaluator evaluator = SegmentEvaluator() evaluator.metric = self.metric evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( self.metric.CreateSegmentEvaluator(refsv[0])) del refsv # in theory should not delete but store in SegmentEvaluator return evaluator def __str__(self): return self.name.c_str() cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): cdef Metric metric = metric_ cdef list ss = [] cdef unsigned i for i in range(stats.size()): ss.append(stats[0][i]) return metric.score(ss) cdef void _compute_sufficient_stats(void* metric_, string* hyp, vector[string]* refs, mteval.SufficientStats* out): cdef Metric metric = metric_ cdef list refs_ = [] cdef unsigned i for i in range(refs.size()): refs_.append(refs[0][i].c_str()) cdef list ss = metric.evaluate(hyp.c_str(), refs_) out.fields.resize(len(ss)) for i in range(len(ss)): out.fields[i] = ss[i] cdef class Metric: cdef Scorer scorer def __cinit__(self): self.scorer = Scorer() self.scorer.name = new string(as_str(self.__class__.__name__)) self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], self, _compute_sufficient_stats, _compute_score) def __call__(self, refs): return self.scorer(refs) def score(SufficientStats stats): return 0 def evaluate(self, hyp, refs): return [] BLEU = Scorer('IBM_BLEU') TER = Scorer('TER') CER = Scorer('CER')