summaryrefslogtreecommitdiff
path: root/python/src/sa/features.pxi
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-08-14 22:50:37 -0400
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-08-14 22:50:37 -0400
commit9c9213239263e8e8de2f154068cc3ad44e0c2100 (patch)
treea9ee2f722e4dc5705ae9f90f6fb3b67a278c5fd9 /python/src/sa/features.pxi
parent0823824b5fa1504b6b2c48328aa8fc8468017cba (diff)
[cdec.sa] Explicit feature names in grammar extractor output
+ sparse features in extractor + hg.intersect(string) + basestring = str|unicode
Diffstat (limited to 'python/src/sa/features.pxi')
-rw-r--r--python/src/sa/features.pxi34
1 files changed, 34 insertions, 0 deletions
diff --git a/python/src/sa/features.pxi b/python/src/sa/features.pxi
new file mode 100644
index 00000000..fcb93f26
--- /dev/null
+++ b/python/src/sa/features.pxi
@@ -0,0 +1,34 @@
+cdef StringMap FD = StringMap()
+
+INITIAL_CAPACITY = 7 # default number of features
+INCREMENT = INITIAL_CAPACITY # double size
+
+cdef class FeatureVector:
+ def __cinit__(self):
+ self.names = IntList(INITIAL_CAPACITY, INCREMENT)
+ self.values = FloatList(INITIAL_CAPACITY, INCREMENT)
+
+ def set(self, unsigned name, float value):
+ self.names.append(name)
+ self.values.append(value)
+
+ def __iter__(self):
+ cdef unsigned i
+ for i in range(self.names.len):
+ yield (FD.word(self.names[i]), self.values[i])
+
+ def __str__(self):
+ return ' '.join('%s=%s' % feat for feat in self)
+
+cdef class Scorer:
+ cdef models
+ def __init__(self, *models):
+ names = [FD.index(<char *>model.__name__) for model in models]
+ self.models = zip(names, models)
+
+ cdef FeatureVector score(self, Phrase fphrase, Phrase ephrase,
+ unsigned paircount, unsigned fcount, unsigned fsample_count):
+ cdef FeatureVector scores = FeatureVector()
+ for name, model in self.models:
+ scores.set(name, model(fphrase, ephrase, paircount, fcount, fsample_count))
+ return scores