diff options
Diffstat (limited to 'python/src')
| -rw-r--r-- | python/src/sa/_sa.c | 102 | ||||
| -rw-r--r-- | python/src/sa/default_scorer.pxi | 74 | ||||
| -rw-r--r-- | python/src/sa/features.pxi | 6 | ||||
| -rw-r--r-- | python/src/sa/rulefactory.pxi | 20 | 
4 files changed, 124 insertions, 78 deletions
diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index d04a8f98..a1530dda 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17 on Wed Sep  5 10:20:00 2012 */ +/* Generated by Cython 0.17 on Wed Sep  5 12:38:10 2012 */  #define PY_SSIZE_T_CLEAN  #include "Python.h" @@ -54767,7 +54767,7 @@ static int __pyx_pf_3_sa_6Scorer___init__(struct __pyx_obj_3_sa_Scorer *__pyx_v_   *         names = [FD.index(<char *>model.__name__) for model in models]   *         self.models = zip(names, models)             # <<<<<<<<<<<<<<   *  - *     cdef FeatureVector score(self, c): + *     cdef FeatureVector score(self, ctx):   */    __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}    __Pyx_GOTREF(__pyx_t_1); @@ -54804,12 +54804,12 @@ static int __pyx_pf_3_sa_6Scorer___init__(struct __pyx_obj_3_sa_Scorer *__pyx_v_  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":29   *         self.models = zip(names, models)   *  - *     cdef FeatureVector score(self, c):             # <<<<<<<<<<<<<< + *     cdef FeatureVector score(self, ctx):             # <<<<<<<<<<<<<<   *         cdef FeatureVector scores = FeatureVector()   *         for name, model in self.models:   */ -static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __pyx_obj_3_sa_Scorer *__pyx_v_self, PyObject *__pyx_v_c) { +static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __pyx_obj_3_sa_Scorer *__pyx_v_self, PyObject *__pyx_v_ctx) {    struct __pyx_obj_3_sa_FeatureVector *__pyx_v_scores = 0;    PyObject *__pyx_v_name = NULL;    PyObject *__pyx_v_model = NULL; @@ -54823,9 +54823,6 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __    PyObject *__pyx_t_6 = NULL;    PyObject *__pyx_t_7 = NULL;    PyObject *(*__pyx_t_8)(PyObject *); -  PyObject *__pyx_t_9 = NULL; -  PyObject *__pyx_t_10 = NULL; -  PyObject *__pyx_t_11 = NULL;    int __pyx_lineno = 0;    const char *__pyx_filename = NULL;    int __pyx_clineno = 0; @@ -54833,10 +54830,10 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":30   *  - *     cdef FeatureVector score(self, c): + *     cdef FeatureVector score(self, ctx):   *         cdef FeatureVector scores = FeatureVector()             # <<<<<<<<<<<<<<   *         for name, model in self.models: - *             scores.set(name, model(c.fphrase, c.ephrase, c.paircount, c.fcount, c.fsample_count)) + *             scores.set(name, model(ctx))   */    __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_FeatureVector)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}    __Pyx_GOTREF(__pyx_t_1); @@ -54844,10 +54841,10 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __    __pyx_t_1 = 0;    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":31 - *     cdef FeatureVector score(self, c): + *     cdef FeatureVector score(self, ctx):   *         cdef FeatureVector scores = FeatureVector()   *         for name, model in self.models:             # <<<<<<<<<<<<<< - *             scores.set(name, model(c.fphrase, c.ephrase, c.paircount, c.fcount, c.fsample_count)) + *             scores.set(name, model(ctx))   *         return scores   */    if (PyList_CheckExact(__pyx_v_self->models) || PyTuple_CheckExact(__pyx_v_self->models)) { @@ -54943,60 +54940,38 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":32   *         cdef FeatureVector scores = FeatureVector()   *         for name, model in self.models: - *             scores.set(name, model(c.fphrase, c.ephrase, c.paircount, c.fcount, c.fsample_count))             # <<<<<<<<<<<<<< + *             scores.set(name, model(ctx))             # <<<<<<<<<<<<<<   *         return scores   */      __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_scores), __pyx_n_s__set); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}      __Pyx_GOTREF(__pyx_t_4); -    __pyx_t_6 = PyObject_GetAttr(__pyx_v_c, __pyx_n_s__fphrase); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} +    __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}      __Pyx_GOTREF(__pyx_t_6); -    __pyx_t_5 = PyObject_GetAttr(__pyx_v_c, __pyx_n_s__ephrase); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} +    __Pyx_INCREF(__pyx_v_ctx); +    PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_ctx); +    __Pyx_GIVEREF(__pyx_v_ctx); +    __pyx_t_5 = PyObject_Call(__pyx_v_model, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}      __Pyx_GOTREF(__pyx_t_5); -    __pyx_t_7 = PyObject_GetAttr(__pyx_v_c, __pyx_n_s__paircount); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_7); -    __pyx_t_9 = PyObject_GetAttr(__pyx_v_c, __pyx_n_s__fcount); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_9); -    __pyx_t_10 = PyObject_GetAttr(__pyx_v_c, __pyx_n_s__fsample_count); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_10); -    __pyx_t_11 = PyTuple_New(5); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_11); -    PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_6); -    __Pyx_GIVEREF(__pyx_t_6); -    PyTuple_SET_ITEM(__pyx_t_11, 1, __pyx_t_5); -    __Pyx_GIVEREF(__pyx_t_5); -    PyTuple_SET_ITEM(__pyx_t_11, 2, __pyx_t_7); -    __Pyx_GIVEREF(__pyx_t_7); -    PyTuple_SET_ITEM(__pyx_t_11, 3, __pyx_t_9); -    __Pyx_GIVEREF(__pyx_t_9); -    PyTuple_SET_ITEM(__pyx_t_11, 4, __pyx_t_10); -    __Pyx_GIVEREF(__pyx_t_10); -    __pyx_t_6 = 0; -    __pyx_t_5 = 0; -    __pyx_t_7 = 0; -    __pyx_t_9 = 0; -    __pyx_t_10 = 0; -    __pyx_t_10 = PyObject_Call(__pyx_v_model, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_10); -    __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; -    __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_11); +    __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; +    __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} +    __Pyx_GOTREF(__pyx_t_6);      __Pyx_INCREF(__pyx_v_name); -    PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_v_name); +    PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_name);      __Pyx_GIVEREF(__pyx_v_name); -    PyTuple_SET_ITEM(__pyx_t_11, 1, __pyx_t_10); -    __Pyx_GIVEREF(__pyx_t_10); -    __pyx_t_10 = 0; -    __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} -    __Pyx_GOTREF(__pyx_t_10); +    PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_5); +    __Pyx_GIVEREF(__pyx_t_5); +    __pyx_t_5 = 0; +    __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} +    __Pyx_GOTREF(__pyx_t_5);      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; -    __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; -    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; +    __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; +    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;    }    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":33   *         for name, model in self.models: - *             scores.set(name, model(c.fphrase, c.ephrase, c.paircount, c.fcount, c.fsample_count)) + *             scores.set(name, model(ctx))   *         return scores             # <<<<<<<<<<<<<<   */    __Pyx_XDECREF(((PyObject *)__pyx_r)); @@ -55012,9 +54987,6 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __    __Pyx_XDECREF(__pyx_t_5);    __Pyx_XDECREF(__pyx_t_6);    __Pyx_XDECREF(__pyx_t_7); -  __Pyx_XDECREF(__pyx_t_9); -  __Pyx_XDECREF(__pyx_t_10); -  __Pyx_XDECREF(__pyx_t_11);    __Pyx_AddTraceback("_sa.Scorer.score", __pyx_clineno, __pyx_lineno, __pyx_filename);    __pyx_r = 0;    __pyx_L0:; @@ -64732,7 +64704,7 @@ PyMODINIT_FUNC PyInit__sa(void)   *    * from collections import defaultdict, Counter, namedtuple             # <<<<<<<<<<<<<<   *  - * FeatureContext = namedtuple("FeatureContext", + * FeatureContext = namedtuple('FeatureContext',   */    __pyx_t_1 = PyList_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}    __Pyx_GOTREF(__pyx_t_1); @@ -64777,19 +64749,19 @@ PyMODINIT_FUNC PyInit__sa(void)    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":14   * from collections import defaultdict, Counter, namedtuple   *  - * FeatureContext = namedtuple("FeatureContext",             # <<<<<<<<<<<<<< - *   ["fphrase", - *    "ephrase", + * FeatureContext = namedtuple('FeatureContext',             # <<<<<<<<<<<<<< + *     ['fphrase', + *      'ephrase',   */    __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__namedtuple); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}    __Pyx_GOTREF(__pyx_t_2);    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":15   *  - * FeatureContext = namedtuple("FeatureContext", - *   ["fphrase",             # <<<<<<<<<<<<<< - *    "ephrase", - *    "paircount", + * FeatureContext = namedtuple('FeatureContext', + *     ['fphrase',             # <<<<<<<<<<<<<< + *      'ephrase', + *      'paircount',   */    __pyx_t_1 = PyList_New(8); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}    __Pyx_GOTREF(__pyx_t_1); @@ -64833,7 +64805,7 @@ PyMODINIT_FUNC PyInit__sa(void)    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":25 - *   ]) + *     ])   *    * cdef int PRECOMPUTE = 0             # <<<<<<<<<<<<<<   * cdef int MERGE = 1 @@ -64900,7 +64872,7 @@ PyMODINIT_FUNC PyInit__sa(void)    __pyx_t_3 = 0;    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":1 - * cdef StringMap FD = StringMap()             # <<<<<<<<<<<<<< + * cdef StringMap FD = StringMap() # Feature name dictionary             # <<<<<<<<<<<<<<   *    * INITIAL_CAPACITY = 7 # default number of features   */ @@ -64913,7 +64885,7 @@ PyMODINIT_FUNC PyInit__sa(void)    __pyx_t_3 = 0;    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":3 - * cdef StringMap FD = StringMap() + * cdef StringMap FD = StringMap() # Feature name dictionary   *    * INITIAL_CAPACITY = 7 # default number of features             # <<<<<<<<<<<<<<   * INCREMENT = INITIAL_CAPACITY # double size diff --git a/python/src/sa/default_scorer.pxi b/python/src/sa/default_scorer.pxi new file mode 100644 index 00000000..483f4743 --- /dev/null +++ b/python/src/sa/default_scorer.pxi @@ -0,0 +1,74 @@ +from libc.stdlib cimport malloc, realloc, free +from libc.math cimport log10 + +MAXSCORE = -99 +EgivenFCoherent = 0 +SampleCountF = 1 +CountEF = 2 +MaxLexFgivenE = 3 +MaxLexEgivenF = 4 +IsSingletonF = 5 +IsSingletonFE = 6 +NFEATURES = 7 + +cdef class DefaultScorer(Scorer): +    cdef BiLex ttable +    cdef int* fid + +    def __dealloc__(self): +        free(self.fid) + +    def __init__(self, BiLex ttable): +        self.ttable = ttable +        self.fid = <int*> malloc(NFEATURES*sizeof(int)) +        cdef unsigned i +        for i, fnames in enumerate(('EgivenFCoherent', 'SampleCountF', 'CountEF', +                'MaxLexFgivenE', 'MaxLexEgivenF', 'IsSingletonF', 'IsSingletonFE')): +            self.fid[i] = FD.index(fnames) + +    cdef FeatureVector score(self, Phrase fphrase, Phrase ephrase, +            unsigned paircount, unsigned fcount, unsigned fsample_count): +        cdef FeatureVector scores = FeatureVector() + +        #  EgivenFCoherent +        cdef float efc = <float>paircount/fsample_count +        scores.set(self.fid[EgivenFCoherent], -log10(efc) if efc > 0 else MAXSCORE) + +        # SampleCountF +        scores.set(self.fid[SampleCountF], log10(1 + fsample_count)) + +        # CountEF +        scores.set(self.fid[CountEF], log10(1 + paircount)) + +        # MaxLexFgivenE TODO typify +        ewords = ephrase.words +        ewords.append('NULL') +        cdef float mlfe = 0, max_score = -1 +        for f in fphrase.words: +            for e in ewords: +                score = self.ttable.get_score(f, e, 1) +                if score > max_score: +                    max_score = score +            mlfe += -log10(max_score) if max_score > 0 else MAXSCORE +        scores.set(self.fid[MaxLexFgivenE], mlfe) + +        # MaxLexEgivenF TODO same +        fwords = fphrase.words +        fwords.append('NULL') +        cdef float mlef = 0 +        max_score = -1 +        for e in ephrase.words: +            for f in fwords: +                score = self.ttable.get_score(f, e, 0) +                if score > max_score: +                    max_score = score +            mlef += -log10(max_score) if max_score > 0 else MAXSCORE +        scores.set(self.fid[MaxLexEgivenF], mlef) + +        # IsSingletonF +        scores.set(self.fid[IsSingletonF], (fcount == 1)) + +        # IsSingletonFE +        scores.set(self.fid[IsSingletonFE], (paircount == 1)) + +        return scores diff --git a/python/src/sa/features.pxi b/python/src/sa/features.pxi index eeef4feb..9b9ecf3c 100644 --- a/python/src/sa/features.pxi +++ b/python/src/sa/features.pxi @@ -1,4 +1,4 @@ -cdef StringMap FD = StringMap() +cdef StringMap FD = StringMap() # Feature name dictionary  INITIAL_CAPACITY = 7 # default number of features  INCREMENT = INITIAL_CAPACITY # double size @@ -26,8 +26,8 @@ cdef class Scorer:          names = [FD.index(<char *>model.__name__) for model in models]          self.models = zip(names, models) -    cdef FeatureVector score(self, c): +    cdef FeatureVector score(self, ctx):          cdef FeatureVector scores = FeatureVector()          for name, model in self.models: -            scores.set(name, model(c.fphrase, c.ephrase, c.paircount, c.fcount, c.fsample_count)) +            scores.set(name, model(ctx))          return scores diff --git a/python/src/sa/rulefactory.pxi b/python/src/sa/rulefactory.pxi index 69cadac9..287b9a67 100644 --- a/python/src/sa/rulefactory.pxi +++ b/python/src/sa/rulefactory.pxi @@ -11,16 +11,16 @@ from libc.math cimport fmod, ceil, floor, log  from collections import defaultdict, Counter, namedtuple -FeatureContext = namedtuple("FeatureContext", -  ["fphrase",  -   "ephrase",  -   "paircount",  -   "fcount",  -   "fsample_count", -   "input_span", -   "matches", -   "test_sentence" -  ]) +FeatureContext = namedtuple('FeatureContext', +    ['fphrase',  +     'ephrase',  +     'paircount',  +     'fcount',  +     'fsample_count', +     'input_span', +     'matches', +     'test_sentence' +    ])  cdef int PRECOMPUTE = 0  cdef int MERGE = 1  | 
