summaryrefslogtreecommitdiff
path: root/python/src/mteval.pxi
blob: dabdf927157784e729ae81633fb7af718aca4711 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
cimport mteval

cdef char* as_str(sentence, error_msg='Cannot convert type %s to str'):
    cdef bytes ret
    if isinstance(sentence, unicode):
        ret = sentence.encode('utf8')
    elif isinstance(sentence, str):
        ret = sentence
    else:
        raise TypeError(error_msg % type(sentence))
    return ret

cdef class Candidate:
    cdef mteval.Candidate* candidate
    cdef public float score

    property words:
        def __get__(self):
            return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8')

    property fmap:
        def __get__(self):
            cdef SparseVector fmap = SparseVector()
            fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap)
            return fmap

cdef class SufficientStats:
    cdef mteval.SufficientStats* stats
    cdef mteval.EvaluationMetric* metric

    def __dealloc__(self):
        del self.stats

    property score:
        def __get__(self):
            return self.metric.ComputeScore(self.stats[0])

    property detail:
        def __get__(self):
            return self.metric.DetailedScore(self.stats[0]).c_str()

    def __len__(self):
        return self.stats.size()

    def __iter__(self):
        for i in range(len(self)):
            yield self.stats[0][i]

    def __iadd__(SufficientStats self, SufficientStats other):
        self.stats[0] += other.stats[0]
        return self

    def __add__(SufficientStats x, SufficientStats y):
        cdef SufficientStats result = SufficientStats()
        result.stats = new mteval.SufficientStats(mteval.add(x.stats[0], y.stats[0]))
        result.metric = x.metric
        return result

cdef class SegmentEvaluator:
    cdef shared_ptr[mteval.SegmentEvaluator]* scorer
    cdef mteval.EvaluationMetric* metric
    
    def __dealloc__(self):
        del self.scorer

    def evaluate(self, sentence):
        cdef vector[WordID] hyp
        cdef SufficientStats sf = SufficientStats()
        sf.metric = self.metric
        sf.stats = new mteval.SufficientStats()
        ConvertSentence(string(as_str(sentence.strip())), &hyp)
        self.scorer.get().Evaluate(hyp, sf.stats)
        return sf

    def candidate_set(self, Hypergraph hypergraph, unsigned k):
        cdef mteval.CandidateSet* cs = new mteval.CandidateSet()
        cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get())
        cdef Candidate candidate
        cdef unsigned i
        for i in range(cs.size()):
            candidate = Candidate()
            candidate.candidate = &cs[0][i]
            candidate.score = self.metric.ComputeScore(cs[0][i].eval_feats)
            yield candidate
        del cs

cdef class Scorer:
    cdef string* name

    def __cinit__(self, char* name):
        self.name = new string(name)
    
    def __call__(self, refs):
        cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0])
        if isinstance(refs, unicode) or isinstance(refs, str):
            refs = [refs]
        cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]()
        cdef vector[WordID]* refv
        cdef bytes ref_str
        for ref in refs:
            refv = new vector[WordID]()
            ConvertSentence(string(as_str(ref.strip())), refv)
            refsv.push_back(refv[0])
            del refv
        cdef unsigned i
        cdef SegmentEvaluator evaluator = SegmentEvaluator()
        evaluator.metric = metric
        evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0]))
        del refsv # in theory should not delete but store in SegmentEvaluator
        return evaluator

    def __str__(self):
        return self.name.c_str()

BLEU = Scorer('IBM_BLEU')
TER = Scorer('TER')