diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
commit | 122f46c31102b683eaab3ad81a3a98accbc694bb (patch) | |
tree | 8d499d789b159ebed25bb23b6983813d064a6296 /python/src/mteval.pxi | |
parent | ac664bdb0e481539cf77098a7dd0e1ec8d937ba0 (diff) | |
parent | 193d137056c3c4f73d66f8db84691d63307de894 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/src/mteval.pxi')
-rw-r--r-- | python/src/mteval.pxi | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi new file mode 100644 index 00000000..cd1c3c81 --- /dev/null +++ b/python/src/mteval.pxi @@ -0,0 +1,192 @@ +cimport mteval + +cdef SufficientStats as_stats(x, y): + if isinstance(x, SufficientStats): + return x + elif x == 0 and isinstance(y, SufficientStats): + stats = SufficientStats() + stats.stats = new mteval.SufficientStats() + stats.metric = (<SufficientStats> y).metric + return stats + +cdef class Candidate: + cdef mteval.const_Candidate* candidate + cdef public float score + + property words: + def __get__(self): + return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8') + + property fmap: + def __get__(self): + cdef SparseVector fmap = SparseVector.__new__(SparseVector) + fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap) + return fmap + +cdef class SufficientStats: + cdef mteval.SufficientStats* stats + cdef mteval.EvaluationMetric* metric + + def __dealloc__(self): + del self.stats + + property score: + def __get__(self): + return self.metric.ComputeScore(self.stats[0]) + + property detail: + def __get__(self): + return self.metric.DetailedScore(self.stats[0]).c_str() + + def __len__(self): + return self.stats.size() + + def __iter__(self): + for i in range(len(self)): + yield self[i] + + def __getitem__(self, int index): + if not 0 <= index < len(self): + raise IndexError('sufficient stats vector index out of range') + return self.stats[0][index] + + def __iadd__(SufficientStats self, SufficientStats other): + self.stats[0] += other.stats[0] + return self + + def __add__(x, y): + cdef SufficientStats sx = as_stats(x, y) + cdef SufficientStats sy = as_stats(y, x) + cdef SufficientStats result = SufficientStats() + result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) + result.metric = sx.metric + return result + +cdef class CandidateSet: + cdef shared_ptr[mteval.SegmentEvaluator]* scorer + cdef mteval.EvaluationMetric* metric + cdef mteval.CandidateSet* cs + + def __cinit__(self, SegmentEvaluator evaluator): + self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) + self.metric = evaluator.metric + self.cs = new mteval.CandidateSet() + + def __dealloc__(self): + del self.scorer + del self.cs + + def __len__(self): + return self.cs.size() + + def __getitem__(self,int k): + if not 0 <= k < self.cs.size(): + raise IndexError('candidate set index out of range') + cdef Candidate candidate = Candidate() + candidate.candidate = &self.cs[0][k] + candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) + return candidate + + def __iter__(self): + cdef unsigned i + for i in range(len(self)): + yield self[i] + + def add_kbest(self, Hypergraph hypergraph, unsigned k): + self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) + +cdef class SegmentEvaluator: + cdef shared_ptr[mteval.SegmentEvaluator]* scorer + cdef mteval.EvaluationMetric* metric + + def __dealloc__(self): + del self.scorer + + def evaluate(self, sentence): + cdef vector[WordID] hyp + cdef SufficientStats sf = SufficientStats() + sf.metric = self.metric + sf.stats = new mteval.SufficientStats() + ConvertSentence(string(as_str(sentence.strip())), &hyp) + self.scorer.get().Evaluate(hyp, sf.stats) + return sf + + def candidate_set(self): + return CandidateSet(self) + +cdef class Scorer: + cdef string* name + cdef mteval.EvaluationMetric* metric + + def __cinit__(self, bytes name=None): + if name: + self.name = new string(name) + self.metric = mteval.MetricInstance(self.name[0]) + + def __dealloc__(self): + del self.name + + def __call__(self, refs): + if isinstance(refs, unicode) or isinstance(refs, str): + refs = [refs] + cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() + cdef vector[WordID]* refv + cdef bytes ref_str + for ref in refs: + refv = new vector[WordID]() + ConvertSentence(string(as_str(ref.strip())), refv) + refsv.push_back(refv[0]) + del refv + cdef unsigned i + cdef SegmentEvaluator evaluator = SegmentEvaluator() + evaluator.metric = self.metric + evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( + self.metric.CreateSegmentEvaluator(refsv[0])) + del refsv # in theory should not delete but store in SegmentEvaluator + return evaluator + + def __str__(self): + return self.name.c_str() + +cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): + cdef Metric metric = <Metric> metric_ + cdef list ss = [] + cdef unsigned i + for i in range(stats.size()): + ss.append(stats[0][i]) + return metric.score(ss) + +cdef void _compute_sufficient_stats(void* metric_, + string* hyp, + vector[string]* refs, + mteval.SufficientStats* out): + cdef Metric metric = <Metric> metric_ + cdef list refs_ = [] + cdef unsigned i + for i in range(refs.size()): + refs_.append(refs[0][i].c_str()) + cdef list ss = metric.evaluate(hyp.c_str(), refs_) + out.fields.resize(len(ss)) + for i in range(len(ss)): + out.fields[i] = ss[i] + +cdef class Metric: + cdef Scorer scorer + def __cinit__(self): + self.scorer = Scorer() + self.scorer.name = new string(as_str(self.__class__.__name__)) + self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], + <void*> self, _compute_sufficient_stats, _compute_score) + + def __call__(self, refs): + return self.scorer(refs) + + def score(SufficientStats stats): + return 0 + + def evaluate(self, hyp, refs): + return [] + +BLEU = Scorer('IBM_BLEU') +TER = Scorer('TER') +CER = Scorer('CER') |