diff options
Diffstat (limited to 'python/src/mteval.pxi')
-rw-r--r-- | python/src/mteval.pxi | 198 |
1 files changed, 0 insertions, 198 deletions
diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi deleted file mode 100644 index 436a1e01..00000000 --- a/python/src/mteval.pxi +++ /dev/null @@ -1,198 +0,0 @@ -cimport mteval - -cdef SufficientStats as_stats(x, y): - if isinstance(x, SufficientStats): - return x - elif x == 0 and isinstance(y, SufficientStats): - stats = SufficientStats() - stats.stats = new mteval.SufficientStats() - stats.metric = (<SufficientStats> y).metric - return stats - -cdef class Candidate: - cdef mteval.const_Candidate* candidate - cdef public float score - - property words: - def __get__(self): - return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8') - - property fmap: - def __get__(self): - cdef SparseVector fmap = SparseVector.__new__(SparseVector) - fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap) - return fmap - -cdef class SufficientStats: - cdef mteval.SufficientStats* stats - cdef mteval.EvaluationMetric* metric - - def __dealloc__(self): - del self.stats - - property score: - def __get__(self): - return self.metric.ComputeScore(self.stats[0]) - - property detail: - def __get__(self): - return str(self.metric.DetailedScore(self.stats[0]).c_str()) - - def __len__(self): - return self.stats.size() - - def __iter__(self): - for i in range(len(self)): - yield self[i] - - def __getitem__(self, int index): - if not 0 <= index < len(self): - raise IndexError('sufficient stats vector index out of range') - return self.stats[0][index] - - def __iadd__(SufficientStats self, SufficientStats other): - self.stats[0] += other.stats[0] - return self - - def __add__(x, y): - cdef SufficientStats sx = as_stats(x, y) - cdef SufficientStats sy = as_stats(y, x) - cdef SufficientStats result = SufficientStats() - result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) - result.metric = sx.metric - return result - -cdef class CandidateSet: - cdef shared_ptr[mteval.SegmentEvaluator]* scorer - cdef mteval.EvaluationMetric* metric - cdef mteval.CandidateSet* cs - - def __cinit__(self, SegmentEvaluator evaluator): - self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) - self.metric = evaluator.metric - self.cs = new mteval.CandidateSet() - - def __dealloc__(self): - del self.scorer - del self.cs - - def __len__(self): - return self.cs.size() - - def __getitem__(self,int k): - if not 0 <= k < self.cs.size(): - raise IndexError('candidate set index out of range') - cdef Candidate candidate = Candidate() - candidate.candidate = &self.cs[0][k] - candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) - return candidate - - def __iter__(self): - cdef unsigned i - for i in range(len(self)): - yield self[i] - - def add_kbest(self, Hypergraph hypergraph, unsigned k): - """cs.add_kbest(Hypergraph hypergraph, int k) -> Extract K-best hypotheses - from the hypergraph and add them to the candidate set.""" - self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) - -cdef class SegmentEvaluator: - cdef shared_ptr[mteval.SegmentEvaluator]* scorer - cdef mteval.EvaluationMetric* metric - - def __dealloc__(self): - del self.scorer - - def evaluate(self, sentence): - """se.evaluate(sentence) -> SufficientStats for the given hypothesis.""" - cdef vector[WordID] hyp - cdef SufficientStats sf = SufficientStats() - sf.metric = self.metric - sf.stats = new mteval.SufficientStats() - ConvertSentence(as_str(sentence.strip()), &hyp) - self.scorer.get().Evaluate(hyp, sf.stats) - return sf - - def candidate_set(self): - """se.candidate_set() -> Candidate set using this segment evaluator for scoring.""" - return CandidateSet(self) - -cdef class Scorer: - cdef string* name - cdef mteval.EvaluationMetric* metric - - def __cinit__(self, bytes name=None): - if name: - self.name = new string(name) - self.metric = mteval.MetricInstance(self.name[0]) - - def __dealloc__(self): - del self.name - - def __call__(self, refs): - if isinstance(refs, basestring): - refs = [refs] - cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() - cdef vector[WordID]* refv - for ref in refs: - refv = new vector[WordID]() - ConvertSentence(as_str(ref.strip()), refv) - refsv.push_back(refv[0]) - del refv - cdef unsigned i - cdef SegmentEvaluator evaluator = SegmentEvaluator() - evaluator.metric = self.metric - evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( - self.metric.CreateSegmentEvaluator(refsv[0])) - del refsv # in theory should not delete but store in SegmentEvaluator - return evaluator - - def __str__(self): - return str(self.name.c_str()) - -cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): - cdef Metric metric = <Metric> metric_ - cdef list ss = [] - cdef unsigned i - for i in range(stats.size()): - ss.append(stats[0][i]) - return metric.score(ss) - -cdef void _compute_sufficient_stats(void* metric_, - string* hyp, - vector[string]* refs, - mteval.SufficientStats* out): - cdef Metric metric = <Metric> metric_ - cdef list refs_ = [] - cdef unsigned i - for i in range(refs.size()): - refs_.append(str(refs[0][i].c_str())) - cdef list ss = metric.evaluate(str(hyp.c_str()), refs_) - out.fields.resize(len(ss)) - for i in range(len(ss)): - out.fields[i] = ss[i] - -cdef class Metric: - cdef Scorer scorer - def __cinit__(self): - self.scorer = Scorer() - cdef bytes class_name = self.__class__.__name__ - self.scorer.name = new string(class_name) - self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], - <void*> self, _compute_sufficient_stats, _compute_score) - - def __call__(self, refs): - return self.scorer(refs) - - def score(SufficientStats stats): - return 0 - - def evaluate(self, hyp, refs): - return [] - -BLEU = Scorer('IBM_BLEU') -QCRI = Scorer('QCRI_BLEU') -TER = Scorer('TER') -CER = Scorer('CER') -SSK = Scorer('SSK') |