From 9c9213239263e8e8de2f154068cc3ad44e0c2100 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Tue, 14 Aug 2012 22:50:37 -0400 Subject: [cdec.sa] Explicit feature names in grammar extractor output + sparse features in extractor + hg.intersect(string) + basestring = str|unicode --- python/src/sa/features.pxi | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 python/src/sa/features.pxi (limited to 'python/src/sa/features.pxi') diff --git a/python/src/sa/features.pxi b/python/src/sa/features.pxi new file mode 100644 index 00000000..fcb93f26 --- /dev/null +++ b/python/src/sa/features.pxi @@ -0,0 +1,34 @@ +cdef StringMap FD = StringMap() + +INITIAL_CAPACITY = 7 # default number of features +INCREMENT = INITIAL_CAPACITY # double size + +cdef class FeatureVector: + def __cinit__(self): + self.names = IntList(INITIAL_CAPACITY, INCREMENT) + self.values = FloatList(INITIAL_CAPACITY, INCREMENT) + + def set(self, unsigned name, float value): + self.names.append(name) + self.values.append(value) + + def __iter__(self): + cdef unsigned i + for i in range(self.names.len): + yield (FD.word(self.names[i]), self.values[i]) + + def __str__(self): + return ' '.join('%s=%s' % feat for feat in self) + +cdef class Scorer: + cdef models + def __init__(self, *models): + names = [FD.index(model.__name__) for model in models] + self.models = zip(names, models) + + cdef FeatureVector score(self, Phrase fphrase, Phrase ephrase, + unsigned paircount, unsigned fcount, unsigned fsample_count): + cdef FeatureVector scores = FeatureVector() + for name, model in self.models: + scores.set(name, model(fphrase, ephrase, paircount, fcount, fsample_count)) + return scores -- cgit v1.2.3