diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-08-14 22:50:37 -0400 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-08-14 22:50:37 -0400 |
commit | 9c9213239263e8e8de2f154068cc3ad44e0c2100 (patch) | |
tree | a9ee2f722e4dc5705ae9f90f6fb3b67a278c5fd9 /python/src/sa/features.pxi | |
parent | 0823824b5fa1504b6b2c48328aa8fc8468017cba (diff) |
[cdec.sa] Explicit feature names in grammar extractor output
+ sparse features in extractor
+ hg.intersect(string)
+ basestring = str|unicode
Diffstat (limited to 'python/src/sa/features.pxi')
-rw-r--r-- | python/src/sa/features.pxi | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/python/src/sa/features.pxi b/python/src/sa/features.pxi new file mode 100644 index 00000000..fcb93f26 --- /dev/null +++ b/python/src/sa/features.pxi @@ -0,0 +1,34 @@ +cdef StringMap FD = StringMap() + +INITIAL_CAPACITY = 7 # default number of features +INCREMENT = INITIAL_CAPACITY # double size + +cdef class FeatureVector: + def __cinit__(self): + self.names = IntList(INITIAL_CAPACITY, INCREMENT) + self.values = FloatList(INITIAL_CAPACITY, INCREMENT) + + def set(self, unsigned name, float value): + self.names.append(name) + self.values.append(value) + + def __iter__(self): + cdef unsigned i + for i in range(self.names.len): + yield (FD.word(self.names[i]), self.values[i]) + + def __str__(self): + return ' '.join('%s=%s' % feat for feat in self) + +cdef class Scorer: + cdef models + def __init__(self, *models): + names = [FD.index(<char *>model.__name__) for model in models] + self.models = zip(names, models) + + cdef FeatureVector score(self, Phrase fphrase, Phrase ephrase, + unsigned paircount, unsigned fcount, unsigned fsample_count): + cdef FeatureVector scores = FeatureVector() + for name, model in self.models: + scores.set(name, model(fphrase, ephrase, paircount, fcount, fsample_count)) + return scores |