From 06f90d83a1feafad301d365a4a437e44f68be45b Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Sat, 21 Jul 2012 01:22:53 -0400 Subject: [python] Support for grammars - Translation rules can now be create programatically - Grammars = list of translation rules can be used for translation - Feature expectations on the hypergraph (inside_outside) --- python/cdec/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'python/cdec/__init__.py') diff --git a/python/cdec/__init__.py b/python/cdec/__init__.py index 0d7b8782..89c323ba 100644 --- a/python/cdec/__init__.py +++ b/python/cdec/__init__.py @@ -1,2 +1 @@ -from _cdec import Decoder, Lattice -import score +from _cdec import Decoder, Lattice, TRule, NT, NTRef -- cgit v1.2.3 From eeef93717fed2901f6fcd2e3fd11118e0e309af7 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Mon, 23 Jul 2012 19:59:44 -0400 Subject: [python] Evaluation metrics in Python --- python/cdec/__init__.py | 2 +- python/cdec/score.py | 2 +- python/src/_cdec.cpp | 1685 +++++++++++++++++++++++++++++++++++++---------- python/src/decoder.pxd | 2 - python/src/grammar.pxd | 1 - python/src/mteval.pxd | 13 +- python/src/mteval.pxi | 59 +- python/src/observer.h | 1 - python/src/py_scorer.h | 44 ++ sa-extract/Makefile | 10 +- 10 files changed, 1462 insertions(+), 357 deletions(-) create mode 100644 python/src/py_scorer.h (limited to 'python/cdec/__init__.py') diff --git a/python/cdec/__init__.py b/python/cdec/__init__.py index 89c323ba..19058493 100644 --- a/python/cdec/__init__.py +++ b/python/cdec/__init__.py @@ -1 +1 @@ -from _cdec import Decoder, Lattice, TRule, NT, NTRef +from _cdec import Decoder, Lattice, TRule, NT, NTRef, ParseFailed, InvalidConfig diff --git a/python/cdec/score.py b/python/cdec/score.py index c107446f..d9486ef2 100644 --- a/python/cdec/score.py +++ b/python/cdec/score.py @@ -1 +1 @@ -from _cdec import BLEU, TER +from _cdec import BLEU, TER, Metric diff --git a/python/src/_cdec.cpp b/python/src/_cdec.cpp index 3c95ca50..6d704b1d 100644 --- a/python/src/_cdec.cpp +++ b/python/src/_cdec.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.16 on Fri Jul 20 18:16:48 2012 */ +/* Generated by Cython 0.16 on Mon Jul 23 19:56:55 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -301,6 +301,7 @@ #include "observer.h" #include "decoder/kbest.h" #include "mteval/ns.h" +#include "py_scorer.h" #include "training/candidate_set.h" #ifdef _OPENMP #include @@ -423,6 +424,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_12___get__; struct __pyx_obj_5_cdec___pyx_scope_struct_21___iter__; struct __pyx_obj_5_cdec_DenseVector; struct __pyx_obj_5_cdec___pyx_scope_struct_7___iter__; +struct __pyx_obj_5_cdec_Metric; struct __pyx_obj_5_cdec_SufficientStats; struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest; struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features; @@ -450,16 +452,17 @@ struct __pyx_opt_args_5_cdec_as_str { PyObject *error_msg; }; -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":112 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":117 * return CandidateSet(self) * * cdef class Scorer: # <<<<<<<<<<<<<< * cdef string* name - * + * cdef mteval.EvaluationMetric* metric */ struct __pyx_obj_5_cdec_Scorer { PyObject_HEAD std::string *name; + EvaluationMetric *metric; }; @@ -561,7 +564,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_2__phrase { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":60 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":65 * return result * * cdef class CandidateSet: # <<<<<<<<<<<<<< @@ -634,7 +637,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_3_genexpr { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":93 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":98 * self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) * * cdef class SegmentEvaluator: # <<<<<<<<<<<<<< @@ -653,7 +656,7 @@ struct __pyx_obj_5_cdec_SegmentEvaluator { * * def __iter__(self): # <<<<<<<<<<<<<< * for i in range(len(self)): - * yield self.stats[0][i] + * yield self[i] */ struct __pyx_obj_5_cdec___pyx_scope_struct_20___iter__ { PyObject_HEAD @@ -850,7 +853,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_12___get__ { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":85 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":90 * return candidate * * def __iter__(self): # <<<<<<<<<<<<<< @@ -898,6 +901,19 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_7___iter__ { }; +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":173 + * out.fields[i] = ss[i] + * + * cdef class Metric: # <<<<<<<<<<<<<< + * cdef Scorer scorer + * def __cinit__(self): + */ +struct __pyx_obj_5_cdec_Metric { + PyObject_HEAD + struct __pyx_obj_5_cdec_Scorer *scorer; +}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":26 * return fmap * @@ -1330,6 +1346,8 @@ static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) { #define __Pyx_PyIter_Next(obj) __Pyx_PyIter_Next2(obj, NULL); static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject *, PyObject *); /*proto*/ +static CYTHON_INLINE int __Pyx_CheckKeywordStrings(PyObject *kwdict, const char* function_name, int kw_allowed); /*proto*/ + static double __Pyx__PyObject_AsDouble(PyObject* obj); /* proto */ #define __Pyx_PyObject_AsDouble(obj) \ ((likely(PyFloat_CheckExact(obj))) ? \ @@ -1523,6 +1541,7 @@ static PyTypeObject *__pyx_ptype_5_cdec_SufficientStats = 0; static PyTypeObject *__pyx_ptype_5_cdec_CandidateSet = 0; static PyTypeObject *__pyx_ptype_5_cdec_SegmentEvaluator = 0; static PyTypeObject *__pyx_ptype_5_cdec_Scorer = 0; +static PyTypeObject *__pyx_ptype_5_cdec_Metric = 0; static PyTypeObject *__pyx_ptype_5_cdec_Decoder = 0; static PyTypeObject *__pyx_ptype_5_cdec___pyx_scope_struct____iter__ = 0; static PyTypeObject *__pyx_ptype_5_cdec___pyx_scope_struct_1___iter__ = 0; @@ -1551,6 +1570,8 @@ static PyTypeObject *__pyx_ptype_5_cdec___pyx_scope_struct_23___cinit__ = 0; static PyTypeObject *__pyx_ptype_5_cdec___pyx_scope_struct_24_genexpr = 0; static char *__pyx_f_5_cdec_as_str(PyObject *, struct __pyx_opt_args_5_cdec_as_str *__pyx_optional_args); /*proto*/ static struct __pyx_obj_5_cdec_SufficientStats *__pyx_f_5_cdec_as_stats(PyObject *, PyObject *); /*proto*/ +static float __pyx_f_5_cdec__compute_score(void *, SufficientStats *); /*proto*/ +static void __pyx_f_5_cdec__compute_sufficient_stats(void *, std::string *, std::vector *, SufficientStats *); /*proto*/ #define __Pyx_MODULE_NAME "_cdec" int __pyx_module_is_main__cdec = 0; @@ -1676,8 +1697,9 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_5score___get__(struct __pyx_o static PyObject *__pyx_pf_5_cdec_15SufficientStats_6detail___get__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self); /* proto */ static Py_ssize_t __pyx_pf_5_cdec_15SufficientStats_2__len__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_15SufficientStats_4__iter__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__iadd__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_other); /* proto */ -static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y); /* proto */ +static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__getitem__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, int __pyx_v_index); /* proto */ +static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__iadd__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_other); /* proto */ +static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y); /* proto */ static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_CandidateSet *__pyx_v_self, struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_evaluator); /* proto */ static void __pyx_pf_5_cdec_12CandidateSet_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5_cdec_CandidateSet *__pyx_v_self); /* proto */ static Py_ssize_t __pyx_pf_5_cdec_12CandidateSet_4__len__(struct __pyx_obj_5_cdec_CandidateSet *__pyx_v_self); /* proto */ @@ -1687,10 +1709,14 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_11add_kbest(struct __pyx_obj_5_c static void __pyx_pf_5_cdec_16SegmentEvaluator___dealloc__(CYTHON_UNUSED struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_self, PyObject *__pyx_v_sentence); /* proto */ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_obj_5_cdec_SegmentEvaluator *__pyx_v_self); /* proto */ -static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, char *__pyx_v_name); /* proto */ +static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, PyObject *__pyx_v_name); /* proto */ static void __pyx_pf_5_cdec_6Scorer_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5_cdec_Scorer *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, PyObject *__pyx_v_refs); /* proto */ static PyObject *__pyx_pf_5_cdec_6Scorer_6__str__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self); /* proto */ +static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_6Metric_2__call__(struct __pyx_obj_5_cdec_Metric *__pyx_v_self, PyObject *__pyx_v_refs); /* proto */ +static PyObject *__pyx_pf_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx_obj_5_cdec_Metric *__pyx_v_stats); /* proto */ +static PyObject *__pyx_pf_5_cdec_6Metric_6evaluate(CYTHON_UNUSED struct __pyx_obj_5_cdec_Metric *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_hyp, CYTHON_UNUSED PyObject *__pyx_v_refs); /* proto */ static PyObject *__pyx_pf_5_cdec_2_make_config(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_config); /* proto */ static PyObject *__pyx_pf_5_cdec_7Decoder_9__cinit___genexpr(PyObject *__pyx_self); /* proto */ static int __pyx_pf_5_cdec_7Decoder___cinit__(struct __pyx_obj_5_cdec_Decoder *__pyx_v_self, PyObject *__pyx_v_config_str, PyObject *__pyx_v_config); /* proto */ @@ -1726,15 +1752,16 @@ static char __pyx_k_27[] = "digraph lattice {"; static char __pyx_k_35[] = "}"; static char __pyx_k_38[] = "/Users/vchahun/Sandbox/cdec/python/src/lattice.pxi"; static char __pyx_k_39[] = "\n"; -static char __pyx_k_41[] = "candidate set index out of range"; -static char __pyx_k_43[] = "%s %s"; -static char __pyx_k_44[] = "%s = %s"; -static char __pyx_k_46[] = "formalism \"%s\" unknown"; -static char __pyx_k_47[] = "cannot initialize weights with %s"; -static char __pyx_k_48[] = "#"; -static char __pyx_k_51[] = "Cannot translate input type %s"; -static char __pyx_k_54[] = "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi"; -static char __pyx_k_59[] = "/Users/vchahun/Sandbox/cdec/python/src/_cdec.pyx"; +static char __pyx_k_41[] = "sufficient stats vector index out of range"; +static char __pyx_k_43[] = "candidate set index out of range"; +static char __pyx_k_45[] = "%s %s"; +static char __pyx_k_46[] = "%s = %s"; +static char __pyx_k_48[] = "formalism \"%s\" unknown"; +static char __pyx_k_49[] = "cannot initialize weights with %s"; +static char __pyx_k_50[] = "#"; +static char __pyx_k_53[] = "Cannot translate input type %s"; +static char __pyx_k_56[] = "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi"; +static char __pyx_k_61[] = "/Users/vchahun/Sandbox/cdec/python/src/_cdec.pyx"; static char __pyx_k__a[] = "a"; static char __pyx_k__e[] = "e"; static char __pyx_k__f[] = "f"; @@ -1746,6 +1773,7 @@ static char __pyx_k__cat[] = "cat"; static char __pyx_k__dot[] = "dot"; static char __pyx_k__fst[] = "fst"; static char __pyx_k__get[] = "get"; +static char __pyx_k__hyp[] = "hyp"; static char __pyx_k__inp[] = "inp"; static char __pyx_k__key[] = "key"; static char __pyx_k__lhs[] = "lhs"; @@ -1769,6 +1797,7 @@ static char __pyx_k__label[] = "label"; static char __pyx_k__lines[] = "lines"; static char __pyx_k__range[] = "range"; static char __pyx_k__rules[] = "rules"; +static char __pyx_k__score[] = "score"; static char __pyx_k__split[] = "split"; static char __pyx_k__strip[] = "strip"; static char __pyx_k__value[] = "value"; @@ -1788,14 +1817,17 @@ static char __pyx_k__IBM_BLEU[] = "IBM_BLEU"; static char __pyx_k__KeyError[] = "KeyError"; static char __pyx_k____exit__[] = "__exit__"; static char __pyx_k____main__[] = "__main__"; +static char __pyx_k____name__[] = "__name__"; static char __pyx_k____test__[] = "__test__"; static char __pyx_k__encoding[] = "encoding"; +static char __pyx_k__evaluate[] = "evaluate"; static char __pyx_k__in_edges[] = "in_edges"; static char __pyx_k__lexalign[] = "lexalign"; static char __pyx_k__lextrans[] = "lextrans"; static char __pyx_k__sentence[] = "sentence"; static char __pyx_k__Exception[] = "Exception"; static char __pyx_k__TypeError[] = "TypeError"; +static char __pyx_k____class__[] = "__class__"; static char __pyx_k____enter__[] = "__enter__"; static char __pyx_k__enumerate[] = "enumerate"; static char __pyx_k__evaluator[] = "evaluator"; @@ -1835,13 +1867,14 @@ static PyObject *__pyx_kp_s_39; static PyObject *__pyx_kp_s_4; static PyObject *__pyx_kp_s_41; static PyObject *__pyx_kp_s_43; -static PyObject *__pyx_kp_s_44; +static PyObject *__pyx_kp_s_45; static PyObject *__pyx_kp_s_46; -static PyObject *__pyx_kp_s_47; static PyObject *__pyx_kp_s_48; -static PyObject *__pyx_kp_s_51; -static PyObject *__pyx_kp_s_54; -static PyObject *__pyx_kp_s_59; +static PyObject *__pyx_kp_s_49; +static PyObject *__pyx_kp_s_50; +static PyObject *__pyx_kp_s_53; +static PyObject *__pyx_kp_s_56; +static PyObject *__pyx_kp_s_61; static PyObject *__pyx_kp_s_7; static PyObject *__pyx_kp_s_8; static PyObject *__pyx_kp_s_9; @@ -1856,9 +1889,11 @@ static PyObject *__pyx_n_s__ParseFailed; static PyObject *__pyx_n_s__TER; static PyObject *__pyx_n_s__TypeError; static PyObject *__pyx_n_s__ValueError; +static PyObject *__pyx_n_s____class__; static PyObject *__pyx_n_s____enter__; static PyObject *__pyx_n_s____exit__; static PyObject *__pyx_n_s____main__; +static PyObject *__pyx_n_s____name__; static PyObject *__pyx_n_s____test__; static PyObject *__pyx_n_s___cdec; static PyObject *__pyx_n_s___make_config; @@ -1877,6 +1912,7 @@ static PyObject *__pyx_n_s__encode; static PyObject *__pyx_n_s__encoding; static PyObject *__pyx_n_s__enumerate; static PyObject *__pyx_n_s__eval; +static PyObject *__pyx_n_s__evaluate; static PyObject *__pyx_n_s__evaluator; static PyObject *__pyx_n_s__f; static PyObject *__pyx_n_s__formalism; @@ -1884,6 +1920,7 @@ static PyObject *__pyx_n_s__fst; static PyObject *__pyx_n_s__genexpr; static PyObject *__pyx_n_s__get; static PyObject *__pyx_n_s__grammar; +static PyObject *__pyx_n_s__hyp; static PyObject *__pyx_n_s__hypergraph; static PyObject *__pyx_n_s__i; static PyObject *__pyx_n_s__in_edges; @@ -1909,6 +1946,7 @@ static PyObject *__pyx_n_s__refs; static PyObject *__pyx_n_s__replace; static PyObject *__pyx_n_s__rules; static PyObject *__pyx_n_s__scfg; +static PyObject *__pyx_n_s__score; static PyObject *__pyx_n_s__scores; static PyObject *__pyx_n_s__self; static PyObject *__pyx_n_s__sentence; @@ -1936,16 +1974,17 @@ static PyObject *__pyx_k_tuple_33; static PyObject *__pyx_k_tuple_36; static PyObject *__pyx_k_tuple_40; static PyObject *__pyx_k_tuple_42; -static PyObject *__pyx_k_tuple_45; -static PyObject *__pyx_k_tuple_49; -static PyObject *__pyx_k_tuple_50; +static PyObject *__pyx_k_tuple_44; +static PyObject *__pyx_k_tuple_47; +static PyObject *__pyx_k_tuple_51; static PyObject *__pyx_k_tuple_52; -static PyObject *__pyx_k_tuple_55; -static PyObject *__pyx_k_tuple_56; +static PyObject *__pyx_k_tuple_54; static PyObject *__pyx_k_tuple_57; +static PyObject *__pyx_k_tuple_58; +static PyObject *__pyx_k_tuple_59; static PyObject *__pyx_k_codeobj_37; -static PyObject *__pyx_k_codeobj_53; -static PyObject *__pyx_k_codeobj_58; +static PyObject *__pyx_k_codeobj_55; +static PyObject *__pyx_k_codeobj_60; /* "_cdec.pyx":6 * cimport decoder @@ -7390,7 +7429,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG * for trule in rules: * if not isinstance(trule, BaseTRule): # <<<<<<<<<<<<<< * raise ValueError('the grammar should contain TRule objects') - * _g.AddRule(( trule).rule[0]) + * _g.AddRule(( trule).rule[0]) */ __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_BaseTRule)); __Pyx_INCREF(__pyx_t_4); @@ -7403,7 +7442,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG * for trule in rules: * if not isinstance(trule, BaseTRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< - * _g.AddRule(( trule).rule[0]) + * _g.AddRule(( trule).rule[0]) */ __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_14), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -7417,9 +7456,9 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":176 * if not isinstance(trule, BaseTRule): * raise ValueError('the grammar should contain TRule objects') - * _g.AddRule(( trule).rule[0]) # <<<<<<<<<<<<<< + * _g.AddRule(( trule).rule[0]) # <<<<<<<<<<<<<< */ - __pyx_v__g->AddRule((((struct __pyx_obj_5_cdec_TRule *)__pyx_v_trule)->__pyx_base.rule[0])); + __pyx_v__g->AddRule((((struct __pyx_obj_5_cdec_BaseTRule *)__pyx_v_trule)->rule[0])); } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -13071,7 +13110,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_5__iter__(PyObject *__pyx_v_s * * def __iter__(self): # <<<<<<<<<<<<<< * for i in range(len(self)): - * yield self.stats[0][i] + * yield self[i] */ static PyObject *__pyx_pf_5_cdec_15SufficientStats_4__iter__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self) { @@ -13118,7 +13157,6 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator14(__pyx_GeneratorO PyObject *__pyx_t_2 = NULL; PyObject *__pyx_t_3 = NULL; PyObject *(*__pyx_t_4)(PyObject *); - unsigned int __pyx_t_5; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("None", 0); switch (__pyx_generator->resume_label) { @@ -13135,7 +13173,7 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator14(__pyx_GeneratorO * * def __iter__(self): * for i in range(len(self)): # <<<<<<<<<<<<<< - * yield self.stats[0][i] + * yield self[i] * */ __pyx_t_1 = PyObject_Length(((PyObject *)__pyx_cur_scope->__pyx_v_self)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -13185,12 +13223,11 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator14(__pyx_GeneratorO /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":46 * def __iter__(self): * for i in range(len(self)): - * yield self.stats[0][i] # <<<<<<<<<<<<<< + * yield self[i] # <<<<<<<<<<<<<< * - * def __iadd__(SufficientStats self, SufficientStats other): + * def __getitem__(self, int index): */ - __pyx_t_5 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_i); if (unlikely((__pyx_t_5 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyFloat_FromDouble(((__pyx_cur_scope->__pyx_v_self->stats[0])[__pyx_t_5])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetItem(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i); if (!__pyx_t_2) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; @@ -13226,13 +13263,111 @@ static PyObject *__pyx_gb_5_cdec_15SufficientStats_6generator14(__pyx_GeneratorO } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__iadd__(PyObject *__pyx_v_self, PyObject *__pyx_v_other); /*proto*/ -static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__iadd__(PyObject *__pyx_v_self, PyObject *__pyx_v_other) { +static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_arg_index); /*proto*/ +static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_arg_index) { + int __pyx_v_index; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); + assert(__pyx_arg_index); { + __pyx_v_index = __Pyx_PyInt_AsInt(__pyx_arg_index); if (unlikely((__pyx_v_index == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("_cdec.SufficientStats.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5_cdec_15SufficientStats_7__getitem__(((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_self), ((int)__pyx_v_index)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":48 + * yield self[i] + * + * def __getitem__(self, int index): # <<<<<<<<<<<<<< + * if not 0 <= index < len(self): + * raise IndexError('sufficient stats vector index out of range') + */ + +static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__getitem__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, int __pyx_v_index) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + Py_ssize_t __pyx_t_2; + int __pyx_t_3; + PyObject *__pyx_t_4 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__getitem__", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":49 + * + * def __getitem__(self, int index): + * if not 0 <= index < len(self): # <<<<<<<<<<<<<< + * raise IndexError('sufficient stats vector index out of range') + * return self.stats[0][index] + */ + __pyx_t_1 = (0 <= __pyx_v_index); + if (__pyx_t_1) { + __pyx_t_2 = PyObject_Length(((PyObject *)__pyx_v_self)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = (__pyx_v_index < __pyx_t_2); + } + __pyx_t_3 = (!__pyx_t_1); + if (__pyx_t_3) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":50 + * def __getitem__(self, int index): + * if not 0 <= index < len(self): + * raise IndexError('sufficient stats vector index out of range') # <<<<<<<<<<<<<< + * return self.stats[0][index] + * + */ + __pyx_t_4 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_42), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_Raise(__pyx_t_4, 0, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + goto __pyx_L3; + } + __pyx_L3:; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":51 + * if not 0 <= index < len(self): + * raise IndexError('sufficient stats vector index out of range') + * return self.stats[0][index] # <<<<<<<<<<<<<< + * + * def __iadd__(SufficientStats self, SufficientStats other): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_4 = PyFloat_FromDouble(((__pyx_v_self->stats[0])[__pyx_v_index])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_4); + __Pyx_AddTraceback("_cdec.SufficientStats.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__iadd__(PyObject *__pyx_v_self, PyObject *__pyx_v_other); /*proto*/ +static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__iadd__(PyObject *__pyx_v_self, PyObject *__pyx_v_other) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__iadd__ (wrapper)", 0); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_5_cdec_SufficientStats, 1, "other", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_r = __pyx_pf_5_cdec_15SufficientStats_7__iadd__(((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_self), ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_other)); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_5_cdec_SufficientStats, 1, "other", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_pf_5_cdec_15SufficientStats_9__iadd__(((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_self), ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_v_other)); goto __pyx_L0; __pyx_L1_error:; __pyx_r = NULL; @@ -13241,20 +13376,20 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_8__iadd__(PyObject *__pyx_v_s return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":48 - * yield self.stats[0][i] +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":53 + * return self.stats[0][index] * * def __iadd__(SufficientStats self, SufficientStats other): # <<<<<<<<<<<<<< * self.stats[0] += other.stats[0] * return self */ -static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__iadd__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_other) { +static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__iadd__(struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_self, struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_other) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__iadd__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":54 * * def __iadd__(SufficientStats self, SufficientStats other): * self.stats[0] += other.stats[0] # <<<<<<<<<<<<<< @@ -13263,7 +13398,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__iadd__(struct __pyx_obj_5_ */ (__pyx_v_self->stats[0]) += (__pyx_v_other->stats[0]); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":55 * def __iadd__(SufficientStats self, SufficientStats other): * self.stats[0] += other.stats[0] * return self # <<<<<<<<<<<<<< @@ -13283,17 +13418,17 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_7__iadd__(struct __pyx_obj_5_ } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y); /*proto*/ -static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y) { +static PyObject *__pyx_pw_5_cdec_15SufficientStats_12__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y); /*proto*/ +static PyObject *__pyx_pw_5_cdec_15SufficientStats_12__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__add__ (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_15SufficientStats_9__add__(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y)); + __pyx_r = __pyx_pf_5_cdec_15SufficientStats_11__add__(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":52 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":57 * return self * * def __add__(x, y): # <<<<<<<<<<<<<< @@ -13301,7 +13436,7 @@ static PyObject *__pyx_pw_5_cdec_15SufficientStats_10__add__(PyObject *__pyx_v_x * cdef SufficientStats sy = as_stats(y, x) */ -static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y) { +static PyObject *__pyx_pf_5_cdec_15SufficientStats_11__add__(PyObject *__pyx_v_x, PyObject *__pyx_v_y) { struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_sx = 0; struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_sy = 0; struct __pyx_obj_5_cdec_SufficientStats *__pyx_v_result = 0; @@ -13313,43 +13448,43 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__add__(PyObject *__pyx_v_x, int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__add__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":58 * * def __add__(x, y): * cdef SufficientStats sx = as_stats(x, y) # <<<<<<<<<<<<<< * cdef SufficientStats sy = as_stats(y, x) * cdef SufficientStats result = SufficientStats() */ - __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_stats(__pyx_v_x, __pyx_v_y)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_stats(__pyx_v_x, __pyx_v_y)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_sx = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":59 * def __add__(x, y): * cdef SufficientStats sx = as_stats(x, y) * cdef SufficientStats sy = as_stats(y, x) # <<<<<<<<<<<<<< * cdef SufficientStats result = SufficientStats() * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) */ - __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_stats(__pyx_v_y, __pyx_v_x)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((PyObject *)__pyx_f_5_cdec_as_stats(__pyx_v_y, __pyx_v_x)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_sy = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":60 * cdef SufficientStats sx = as_stats(x, y) * cdef SufficientStats sy = as_stats(y, x) * cdef SufficientStats result = SufficientStats() # <<<<<<<<<<<<<< * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) * result.metric = sx.metric */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_result = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":61 * cdef SufficientStats sy = as_stats(y, x) * cdef SufficientStats result = SufficientStats() * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) # <<<<<<<<<<<<<< @@ -13358,7 +13493,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__add__(PyObject *__pyx_v_x, */ __pyx_v_result->stats = new SufficientStats(operator+((__pyx_v_sx->stats[0]), (__pyx_v_sy->stats[0]))); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":62 * cdef SufficientStats result = SufficientStats() * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) * result.metric = sx.metric # <<<<<<<<<<<<<< @@ -13367,7 +13502,7 @@ static PyObject *__pyx_pf_5_cdec_15SufficientStats_9__add__(PyObject *__pyx_v_x, */ __pyx_v_result->metric = __pyx_v_sx->metric; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":63 * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) * result.metric = sx.metric * return result # <<<<<<<<<<<<<< @@ -13420,7 +13555,7 @@ static int __pyx_pw_5_cdec_12CandidateSet_1__cinit__(PyObject *__pyx_v_self, PyO else goto __pyx_L5_argtuple_error; } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { goto __pyx_L5_argtuple_error; @@ -13431,13 +13566,13 @@ static int __pyx_pw_5_cdec_12CandidateSet_1__cinit__(PyObject *__pyx_v_self, PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_cdec.CandidateSet.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_evaluator), __pyx_ptype_5_cdec_SegmentEvaluator, 1, "evaluator", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_evaluator), __pyx_ptype_5_cdec_SegmentEvaluator, 1, "evaluator", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5_cdec_12CandidateSet___cinit__(((struct __pyx_obj_5_cdec_CandidateSet *)__pyx_v_self), __pyx_v_evaluator); goto __pyx_L0; __pyx_L1_error:; @@ -13447,7 +13582,7 @@ static int __pyx_pw_5_cdec_12CandidateSet_1__cinit__(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":65 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":70 * cdef mteval.CandidateSet* cs * * def __cinit__(self, SegmentEvaluator evaluator): # <<<<<<<<<<<<<< @@ -13460,7 +13595,7 @@ static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_Cand __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":71 * * def __cinit__(self, SegmentEvaluator evaluator): * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) # <<<<<<<<<<<<<< @@ -13469,7 +13604,7 @@ static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_Cand */ __pyx_v_self->scorer = new boost::shared_ptr((__pyx_v_evaluator->scorer[0])); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":72 * def __cinit__(self, SegmentEvaluator evaluator): * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) * self.metric = evaluator.metric # <<<<<<<<<<<<<< @@ -13478,7 +13613,7 @@ static int __pyx_pf_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_5_cdec_Cand */ __pyx_v_self->metric = __pyx_v_evaluator->metric; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":73 * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) * self.metric = evaluator.metric * self.cs = new mteval.CandidateSet() # <<<<<<<<<<<<<< @@ -13501,7 +13636,7 @@ static void __pyx_pw_5_cdec_12CandidateSet_3__dealloc__(PyObject *__pyx_v_self) __Pyx_RefNannyFinishContext(); } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":70 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":75 * self.cs = new mteval.CandidateSet() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -13513,7 +13648,7 @@ static void __pyx_pf_5_cdec_12CandidateSet_2__dealloc__(CYTHON_UNUSED struct __p __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":76 * * def __dealloc__(self): * del self.scorer # <<<<<<<<<<<<<< @@ -13522,7 +13657,7 @@ static void __pyx_pf_5_cdec_12CandidateSet_2__dealloc__(CYTHON_UNUSED struct __p */ delete __pyx_v_self->scorer; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":77 * def __dealloc__(self): * del self.scorer * del self.cs # <<<<<<<<<<<<<< @@ -13545,7 +13680,7 @@ static Py_ssize_t __pyx_pw_5_cdec_12CandidateSet_5__len__(PyObject *__pyx_v_self return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":74 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":79 * del self.cs * * def __len__(self): # <<<<<<<<<<<<<< @@ -13558,7 +13693,7 @@ static Py_ssize_t __pyx_pf_5_cdec_12CandidateSet_4__len__(struct __pyx_obj_5_cde __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":80 * * def __len__(self): * return self.cs.size() # <<<<<<<<<<<<<< @@ -13582,7 +13717,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_7__getitem__(PyObject *__pyx_v_s __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); assert(__pyx_arg_k); { - __pyx_v_k = __Pyx_PyInt_AsInt(__pyx_arg_k); if (unlikely((__pyx_v_k == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_k = __Pyx_PyInt_AsInt(__pyx_arg_k); if (unlikely((__pyx_v_k == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -13595,7 +13730,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_7__getitem__(PyObject *__pyx_v_s return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":77 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":82 * return self.cs.size() * * def __getitem__(self,int k): # <<<<<<<<<<<<<< @@ -13615,7 +13750,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":83 * * def __getitem__(self,int k): * if not 0 <= k < self.cs.size(): # <<<<<<<<<<<<<< @@ -13629,35 +13764,35 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_ __pyx_t_2 = (!__pyx_t_1); if (__pyx_t_2) { - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":84 * def __getitem__(self,int k): * if not 0 <= k < self.cs.size(): * raise IndexError('candidate set index out of range') # <<<<<<<<<<<<<< * cdef Candidate candidate = Candidate() * candidate.candidate = &self.cs[0][k] */ - __pyx_t_3 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_42), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_builtin_IndexError, ((PyObject *)__pyx_k_tuple_44), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[5]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L3; } __pyx_L3:; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":85 * if not 0 <= k < self.cs.size(): * raise IndexError('candidate set index out of range') * cdef Candidate candidate = Candidate() # <<<<<<<<<<<<<< * candidate.candidate = &self.cs[0][k] * candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Candidate)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Candidate)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_v_candidate = ((struct __pyx_obj_5_cdec_Candidate *)__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":86 * raise IndexError('candidate set index out of range') * cdef Candidate candidate = Candidate() * candidate.candidate = &self.cs[0][k] # <<<<<<<<<<<<<< @@ -13666,7 +13801,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_ */ __pyx_v_candidate->candidate = (&((__pyx_v_self->cs[0])[__pyx_v_k])); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":87 * cdef Candidate candidate = Candidate() * candidate.candidate = &self.cs[0][k] * candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) # <<<<<<<<<<<<<< @@ -13675,7 +13810,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_6__getitem__(struct __pyx_obj_5_ */ __pyx_v_candidate->score = __pyx_v_self->metric->ComputeScore(((__pyx_v_self->cs[0])[__pyx_v_k]).eval_feats); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":88 * candidate.candidate = &self.cs[0][k] * candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) * return candidate # <<<<<<<<<<<<<< @@ -13712,7 +13847,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_9__iter__(PyObject *__pyx_v_self return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":85 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":90 * return candidate * * def __iter__(self): # <<<<<<<<<<<<<< @@ -13738,7 +13873,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_8__iter__(struct __pyx_obj_5_cde __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_12CandidateSet_10generator15, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_12CandidateSet_10generator15, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -13773,27 +13908,27 @@ static PyObject *__pyx_gb_5_cdec_12CandidateSet_10generator15(__pyx_GeneratorObj return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":92 * def __iter__(self): * cdef unsigned i * for i in range(len(self)): # <<<<<<<<<<<<<< * yield self[i] * */ - __pyx_t_1 = PyObject_Length(((PyObject *)__pyx_cur_scope->__pyx_v_self)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(((PyObject *)__pyx_cur_scope->__pyx_v_self)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":93 * cdef unsigned i * for i in range(len(self)): * yield self[i] # <<<<<<<<<<<<<< * * def add_kbest(self, Hypergraph hypergraph, unsigned k): */ - __pyx_t_3 = __Pyx_GetItemInt(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; @@ -13807,7 +13942,7 @@ static PyObject *__pyx_gb_5_cdec_12CandidateSet_10generator15(__pyx_GeneratorObj __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -13851,11 +13986,11 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_12add_kbest(PyObject *__pyx_v_se values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__k); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("add_kbest", 1, 2, 2, 1); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("add_kbest", 1, 2, 2, 1); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "add_kbest") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "add_kbest") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -13864,17 +13999,17 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_12add_kbest(PyObject *__pyx_v_se values[1] = PyTuple_GET_ITEM(__pyx_args, 1); } __pyx_v_hypergraph = ((struct __pyx_obj_5_cdec_Hypergraph *)values[0]); - __pyx_v_k = __Pyx_PyInt_AsUnsignedInt(values[1]); if (unlikely((__pyx_v_k == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_k = __Pyx_PyInt_AsUnsignedInt(values[1]); if (unlikely((__pyx_v_k == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("add_kbest", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("add_kbest", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_cdec.CandidateSet.add_kbest", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_hypergraph), __pyx_ptype_5_cdec_Hypergraph, 1, "hypergraph", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_hypergraph), __pyx_ptype_5_cdec_Hypergraph, 1, "hypergraph", 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5_cdec_12CandidateSet_11add_kbest(((struct __pyx_obj_5_cdec_CandidateSet *)__pyx_v_self), __pyx_v_hypergraph, __pyx_v_k); goto __pyx_L0; __pyx_L1_error:; @@ -13884,7 +14019,7 @@ static PyObject *__pyx_pw_5_cdec_12CandidateSet_12add_kbest(PyObject *__pyx_v_se return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":90 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":95 * yield self[i] * * def add_kbest(self, Hypergraph hypergraph, unsigned k): # <<<<<<<<<<<<<< @@ -13897,7 +14032,7 @@ static PyObject *__pyx_pf_5_cdec_12CandidateSet_11add_kbest(struct __pyx_obj_5_c __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("add_kbest", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":96 * * def add_kbest(self, Hypergraph hypergraph, unsigned k): * self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) # <<<<<<<<<<<<<< @@ -13921,7 +14056,7 @@ static void __pyx_pw_5_cdec_16SegmentEvaluator_1__dealloc__(PyObject *__pyx_v_se __Pyx_RefNannyFinishContext(); } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":97 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":102 * cdef mteval.EvaluationMetric* metric * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -13933,7 +14068,7 @@ static void __pyx_pf_5_cdec_16SegmentEvaluator___dealloc__(CYTHON_UNUSED struct __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":103 * * def __dealloc__(self): * del self.scorer # <<<<<<<<<<<<<< @@ -13956,7 +14091,7 @@ static PyObject *__pyx_pw_5_cdec_16SegmentEvaluator_3evaluate(PyObject *__pyx_v_ return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":100 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":105 * del self.scorer * * def evaluate(self, sentence): # <<<<<<<<<<<<<< @@ -13976,19 +14111,19 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("evaluate", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":107 * def evaluate(self, sentence): * cdef vector[WordID] hyp * cdef SufficientStats sf = SufficientStats() # <<<<<<<<<<<<<< * sf.metric = self.metric * sf.stats = new mteval.SufficientStats() */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SufficientStats)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_sf = ((struct __pyx_obj_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":108 * cdef vector[WordID] hyp * cdef SufficientStats sf = SufficientStats() * sf.metric = self.metric # <<<<<<<<<<<<<< @@ -13997,7 +14132,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5 */ __pyx_v_sf->metric = __pyx_v_self->metric; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":109 * cdef SufficientStats sf = SufficientStats() * sf.metric = self.metric * sf.stats = new mteval.SufficientStats() # <<<<<<<<<<<<<< @@ -14006,22 +14141,22 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5 */ __pyx_v_sf->stats = new SufficientStats(); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":110 * sf.metric = self.metric * sf.stats = new mteval.SufficientStats() * ConvertSentence(string(as_str(sentence.strip())), &hyp) # <<<<<<<<<<<<<< * self.scorer.get().Evaluate(hyp, sf.stats) * return sf */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_sentence, __pyx_n_s__strip); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_sentence, __pyx_n_s__strip); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; TD::ConvertSentence(std::string(__pyx_f_5_cdec_as_str(__pyx_t_2, NULL)), (&__pyx_v_hyp)); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":111 * sf.stats = new mteval.SufficientStats() * ConvertSentence(string(as_str(sentence.strip())), &hyp) * self.scorer.get().Evaluate(hyp, sf.stats) # <<<<<<<<<<<<<< @@ -14030,7 +14165,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx_obj_5 */ __pyx_v_self->scorer->get()->Evaluate(__pyx_v_hyp, __pyx_v_sf->stats); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":112 * ConvertSentence(string(as_str(sentence.strip())), &hyp) * self.scorer.get().Evaluate(hyp, sf.stats) * return sf # <<<<<<<<<<<<<< @@ -14067,7 +14202,7 @@ static PyObject *__pyx_pw_5_cdec_16SegmentEvaluator_5candidate_set(PyObject *__p return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":109 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":114 * return sf * * def candidate_set(self): # <<<<<<<<<<<<<< @@ -14085,7 +14220,7 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("candidate_set", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":110 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":115 * * def candidate_set(self): * return CandidateSet(self) # <<<<<<<<<<<<<< @@ -14093,12 +14228,12 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_ * cdef class Scorer: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_self)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_CandidateSet)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_CandidateSet)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; @@ -14121,13 +14256,22 @@ static PyObject *__pyx_pf_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_ /* Python wrapper */ static int __pyx_pw_5_cdec_6Scorer_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static int __pyx_pw_5_cdec_6Scorer_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - char *__pyx_v_name; + PyObject *__pyx_v_name = 0; static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__name,0}; int __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { PyObject* values[1] = {0}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":121 + * cdef mteval.EvaluationMetric* metric + * + * def __cinit__(self, bytes name=None): # <<<<<<<<<<<<<< + * if name: + * self.name = new string(name) + */ + values[0] = ((PyObject*)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); @@ -14139,56 +14283,89 @@ static int __pyx_pw_5_cdec_6Scorer_1__cinit__(PyObject *__pyx_v_self, PyObject * kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: - values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__name); - if (likely(values[0])) kw_args--; - else goto __pyx_L5_argtuple_error; + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__name); + if (value) { values[0] = value; kw_args--; } + } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { - goto __pyx_L5_argtuple_error; } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } } - __pyx_v_name = PyBytes_AsString(values[0]); if (unlikely((!__pyx_v_name) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_name = ((PyObject*)values[0]); } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_cdec.Scorer.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_name), (&PyBytes_Type), 1, "name", 1))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5_cdec_6Scorer___cinit__(((struct __pyx_obj_5_cdec_Scorer *)__pyx_v_self), __pyx_v_name); + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = -1; + __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":115 - * cdef string* name - * - * def __cinit__(self, char* name): # <<<<<<<<<<<<<< - * self.name = new string(name) - * - */ - -static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, char *__pyx_v_name) { +static int __pyx_pf_5_cdec_6Scorer___cinit__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, PyObject *__pyx_v_name) { int __pyx_r; __Pyx_RefNannyDeclarations + int __pyx_t_1; + char *__pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":122 + * + * def __cinit__(self, bytes name=None): + * if name: # <<<<<<<<<<<<<< + * self.name = new string(name) + * self.metric = mteval.MetricInstance(self.name[0]) + */ + __pyx_t_1 = (((PyObject *)__pyx_v_name) != Py_None) && (PyBytes_GET_SIZE(((PyObject *)__pyx_v_name)) != 0); + if (__pyx_t_1) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":123 + * def __cinit__(self, bytes name=None): + * if name: + * self.name = new string(name) # <<<<<<<<<<<<<< + * self.metric = mteval.MetricInstance(self.name[0]) * - * def __cinit__(self, char* name): - * self.name = new string(name) # <<<<<<<<<<<<<< + */ + __pyx_t_2 = PyBytes_AsString(((PyObject *)__pyx_v_name)); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_self->name = new std::string(__pyx_t_2); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":124 + * if name: + * self.name = new string(name) + * self.metric = mteval.MetricInstance(self.name[0]) # <<<<<<<<<<<<<< * * def __dealloc__(self): */ - __pyx_v_self->name = new std::string(__pyx_v_name); + __pyx_v_self->metric = EvaluationMetric::Instance((__pyx_v_self->name[0])); + goto __pyx_L3; + } + __pyx_L3:; __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("_cdec.Scorer.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } @@ -14202,8 +14379,8 @@ static void __pyx_pw_5_cdec_6Scorer_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":118 - * self.name = new string(name) +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":126 + * self.metric = mteval.MetricInstance(self.name[0]) * * def __dealloc__(self): # <<<<<<<<<<<<<< * del self.name @@ -14214,7 +14391,7 @@ static void __pyx_pf_5_cdec_6Scorer_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":119 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":127 * * def __dealloc__(self): * del self.name # <<<<<<<<<<<<<< @@ -14252,7 +14429,7 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_5__call__(PyObject *__pyx_v_self, PyObj else goto __pyx_L5_argtuple_error; } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { goto __pyx_L5_argtuple_error; @@ -14263,7 +14440,7 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_5__call__(PyObject *__pyx_v_self, PyObj } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__call__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__call__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_cdec.Scorer.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -14274,16 +14451,15 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_5__call__(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":121 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":129 * del self.name * * def __call__(self, refs): # <<<<<<<<<<<<<< - * cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0]) * if isinstance(refs, unicode) or isinstance(refs, str): + * refs = [refs] */ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Scorer *__pyx_v_self, PyObject *__pyx_v_refs) { - EvaluationMetric *__pyx_v_metric; std::vector > *__pyx_v_refsv; std::vector *__pyx_v_refv; PyObject *__pyx_v_ref = NULL; @@ -14304,18 +14480,9 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score __Pyx_RefNannySetupContext("__call__", 0); __Pyx_INCREF(__pyx_v_refs); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":122 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":130 * * def __call__(self, refs): - * cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0]) # <<<<<<<<<<<<<< - * if isinstance(refs, unicode) or isinstance(refs, str): - * refs = [refs] - */ - __pyx_v_metric = EvaluationMetric::Instance((__pyx_v_self->name[0])); - - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":123 - * def __call__(self, refs): - * cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0]) * if isinstance(refs, unicode) or isinstance(refs, str): # <<<<<<<<<<<<<< * refs = [refs] * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() @@ -14335,14 +14502,14 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score } if (__pyx_t_4) { - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":124 - * cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0]) + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":131 + * def __call__(self, refs): * if isinstance(refs, unicode) or isinstance(refs, str): * refs = [refs] # <<<<<<<<<<<<<< * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() * cdef vector[WordID]* refv */ - __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_refs); PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_refs); @@ -14354,7 +14521,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score } __pyx_L3:; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":132 * if isinstance(refs, unicode) or isinstance(refs, str): * refs = [refs] * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() # <<<<<<<<<<<<<< @@ -14363,7 +14530,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score */ __pyx_v_refsv = new std::vector >(); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":135 * cdef vector[WordID]* refv * cdef bytes ref_str * for ref in refs: # <<<<<<<<<<<<<< @@ -14374,7 +14541,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score __pyx_t_1 = __pyx_v_refs; __Pyx_INCREF(__pyx_t_1); __pyx_t_5 = 0; __pyx_t_6 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_refs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_refs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_6 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -14390,7 +14557,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score if (unlikely(!__pyx_t_7)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[5]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[5]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -14400,7 +14567,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score __pyx_v_ref = __pyx_t_7; __pyx_t_7 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":136 * cdef bytes ref_str * for ref in refs: * refv = new vector[WordID]() # <<<<<<<<<<<<<< @@ -14409,22 +14576,22 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score */ __pyx_v_refv = new std::vector(); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":137 * for ref in refs: * refv = new vector[WordID]() * ConvertSentence(string(as_str(ref.strip())), refv) # <<<<<<<<<<<<<< * refsv.push_back(refv[0]) * del refv */ - __pyx_t_7 = PyObject_GetAttr(__pyx_v_ref, __pyx_n_s__strip); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetAttr(__pyx_v_ref, __pyx_n_s__strip); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; TD::ConvertSentence(std::string(__pyx_f_5_cdec_as_str(__pyx_t_8, NULL)), __pyx_v_refv); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":138 * refv = new vector[WordID]() * ConvertSentence(string(as_str(ref.strip())), refv) * refsv.push_back(refv[0]) # <<<<<<<<<<<<<< @@ -14433,7 +14600,7 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score */ __pyx_v_refsv->push_back((__pyx_v_refv[0])); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":139 * ConvertSentence(string(as_str(ref.strip())), refv) * refsv.push_back(refv[0]) * del refv # <<<<<<<<<<<<<< @@ -14444,47 +14611,47 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_4__call__(struct __pyx_obj_5_cdec_Score } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":141 * del refv * cdef unsigned i * cdef SegmentEvaluator evaluator = SegmentEvaluator() # <<<<<<<<<<<<<< - * evaluator.metric = metric - * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) + * evaluator.metric = self.metric + * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SegmentEvaluator)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_SegmentEvaluator)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_evaluator = ((struct __pyx_obj_5_cdec_SegmentEvaluator *)__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":142 * cdef unsigned i * cdef SegmentEvaluator evaluator = SegmentEvaluator() - * evaluator.metric = metric # <<<<<<<<<<<<<< - * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) - * del refsv # in theory should not delete but store in SegmentEvaluator + * evaluator.metric = self.metric # <<<<<<<<<<<<<< + * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( + * self.metric.CreateSegmentEvaluator(refsv[0])) */ - __pyx_v_evaluator->metric = __pyx_v_metric; + __pyx_v_evaluator->metric = __pyx_v_self->metric; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":143 * cdef SegmentEvaluator evaluator = SegmentEvaluator() - * evaluator.metric = metric - * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) # <<<<<<<<<<<<<< + * evaluator.metric = self.metric + * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( # <<<<<<<<<<<<<< + * self.metric.CreateSegmentEvaluator(refsv[0])) * del refsv # in theory should not delete but store in SegmentEvaluator - * return evaluator */ - __pyx_v_evaluator->scorer = new boost::shared_ptr(__pyx_v_metric->CreateSegmentEvaluator((__pyx_v_refsv[0]))); + __pyx_v_evaluator->scorer = new boost::shared_ptr(__pyx_v_self->metric->CreateSegmentEvaluator((__pyx_v_refsv[0]))); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":137 - * evaluator.metric = metric - * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":145 + * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( + * self.metric.CreateSegmentEvaluator(refsv[0])) * del refsv # in theory should not delete but store in SegmentEvaluator # <<<<<<<<<<<<<< * return evaluator * */ delete __pyx_v_refsv; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":138 - * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":146 + * self.metric.CreateSegmentEvaluator(refsv[0])) * del refsv # in theory should not delete but store in SegmentEvaluator * return evaluator # <<<<<<<<<<<<<< * @@ -14523,7 +14690,7 @@ static PyObject *__pyx_pw_5_cdec_6Scorer_7__str__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":140 +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":148 * return evaluator * * def __str__(self): # <<<<<<<<<<<<<< @@ -14540,15 +14707,15 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_6__str__(struct __pyx_obj_5_cdec_Scorer int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":141 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":149 * * def __str__(self): * return self.name.c_str() # <<<<<<<<<<<<<< * - * BLEU = Scorer('IBM_BLEU') + * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_FromString(__pyx_v_self->name->c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(__pyx_v_self->name->c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __pyx_r = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; @@ -14565,71 +14732,661 @@ static PyObject *__pyx_pf_5_cdec_6Scorer_6__str__(struct __pyx_obj_5_cdec_Scorer __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_4generator16(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ - -/* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_3_make_config(PyObject *__pyx_self, PyObject *__pyx_v_config); /*proto*/ -static PyMethodDef __pyx_mdef_5_cdec_3_make_config = {__Pyx_NAMESTR("_make_config"), (PyCFunction)__pyx_pw_5_cdec_3_make_config, METH_O, __Pyx_DOCSTR(0)}; -static PyObject *__pyx_pw_5_cdec_3_make_config(PyObject *__pyx_self, PyObject *__pyx_v_config) { - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("_make_config (wrapper)", 0); - __pyx_self = __pyx_self; - __pyx_r = __pyx_pf_5_cdec_2_make_config(__pyx_self, ((PyObject *)__pyx_v_config)); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -/* "_cdec.pyx":28 - * class ParseFailed(Exception): pass +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":151 + * return self.name.c_str() * - * def _make_config(config): # <<<<<<<<<<<<<< - * for key, value in config.items(): - * if isinstance(value, dict): + * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): # <<<<<<<<<<<<<< + * cdef Metric metric = metric_ + * cdef list ss = [] */ -static PyObject *__pyx_pf_5_cdec_2_make_config(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_config) { - struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *__pyx_cur_scope; - PyObject *__pyx_r = NULL; +static float __pyx_f_5_cdec__compute_score(void *__pyx_v_metric_, SufficientStats *__pyx_v_stats) { + struct __pyx_obj_5_cdec_Metric *__pyx_v_metric = 0; + PyObject *__pyx_v_ss = 0; + unsigned int __pyx_v_i; + float __pyx_r; __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + unsigned int __pyx_t_2; + unsigned int __pyx_t_3; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + float __pyx_t_7; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("_make_config", 0); - __pyx_cur_scope = (struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *)__pyx_ptype_5_cdec___pyx_scope_struct_22__make_config->tp_new(__pyx_ptype_5_cdec___pyx_scope_struct_22__make_config, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __Pyx_RefNannyFinishContext(); - return NULL; - } - __Pyx_GOTREF(__pyx_cur_scope); - __pyx_cur_scope->__pyx_v_config = __pyx_v_config; - __Pyx_INCREF(__pyx_cur_scope->__pyx_v_config); - __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_config); - { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_4generator16, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } + __Pyx_RefNannySetupContext("_compute_score", 0); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("_cdec._make_config", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_DECREF(((PyObject *)__pyx_cur_scope)); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":152 + * + * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): + * cdef Metric metric = metric_ # <<<<<<<<<<<<<< + * cdef list ss = [] + * cdef unsigned i + */ + __Pyx_INCREF(((PyObject *)((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_))); + __pyx_v_metric = ((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_); -static PyObject *__pyx_gb_5_cdec_4generator16(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":153 + * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): + * cdef Metric metric = metric_ + * cdef list ss = [] # <<<<<<<<<<<<<< + * cdef unsigned i + * for i in range(stats.size()): + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_ss = __pyx_t_1; + __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":155 + * cdef list ss = [] + * cdef unsigned i + * for i in range(stats.size()): # <<<<<<<<<<<<<< + * ss.append(stats[0][i]) + * return metric.score(ss) + */ + __pyx_t_2 = __pyx_v_stats->size(); + for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { + __pyx_v_i = __pyx_t_3; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":156 + * cdef unsigned i + * for i in range(stats.size()): + * ss.append(stats[0][i]) # <<<<<<<<<<<<<< + * return metric.score(ss) + * + */ + __pyx_t_1 = PyFloat_FromDouble(((__pyx_v_stats[0])[__pyx_v_i])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyList_Append(__pyx_v_ss, __pyx_t_1); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":157 + * for i in range(stats.size()): + * ss.append(stats[0][i]) + * return metric.score(ss) # <<<<<<<<<<<<<< + * + * cdef void _compute_sufficient_stats(void* metric_, + */ + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_metric), __pyx_n_s__score); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_INCREF(((PyObject *)__pyx_v_ss)); + PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_v_ss)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_ss)); + __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; + __pyx_t_7 = __pyx_PyFloat_AsFloat(__pyx_t_6); if (unlikely((__pyx_t_7 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_r = __pyx_t_7; + goto __pyx_L0; + + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_WriteUnraisable("_cdec._compute_score", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_metric); + __Pyx_XDECREF(__pyx_v_ss); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":159 + * return metric.score(ss) + * + * cdef void _compute_sufficient_stats(void* metric_, # <<<<<<<<<<<<<< + * string* hyp, + * vector[string]* refs, + */ + +static void __pyx_f_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_, std::string *__pyx_v_hyp, std::vector *__pyx_v_refs, SufficientStats *__pyx_v_out) { + struct __pyx_obj_5_cdec_Metric *__pyx_v_metric = 0; + PyObject *__pyx_v_refs_ = 0; + unsigned int __pyx_v_i; + PyObject *__pyx_v_ss = 0; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + size_t __pyx_t_2; + unsigned int __pyx_t_3; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; + float __pyx_t_8; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_compute_sufficient_stats", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":163 + * vector[string]* refs, + * mteval.SufficientStats* out): + * cdef Metric metric = metric_ # <<<<<<<<<<<<<< + * cdef list refs_ = [] + * cdef unsigned i + */ + __Pyx_INCREF(((PyObject *)((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_))); + __pyx_v_metric = ((struct __pyx_obj_5_cdec_Metric *)__pyx_v_metric_); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":164 + * mteval.SufficientStats* out): + * cdef Metric metric = metric_ + * cdef list refs_ = [] # <<<<<<<<<<<<<< + * cdef unsigned i + * for i in range(refs.size()): + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_refs_ = __pyx_t_1; + __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":166 + * cdef list refs_ = [] + * cdef unsigned i + * for i in range(refs.size()): # <<<<<<<<<<<<<< + * refs_.append(refs[0][i].c_str()) + * cdef list ss = metric.evaluate(hyp.c_str(), refs_) + */ + __pyx_t_2 = __pyx_v_refs->size(); + for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { + __pyx_v_i = __pyx_t_3; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":167 + * cdef unsigned i + * for i in range(refs.size()): + * refs_.append(refs[0][i].c_str()) # <<<<<<<<<<<<<< + * cdef list ss = metric.evaluate(hyp.c_str(), refs_) + * out.fields.resize(len(ss)) + */ + __pyx_t_1 = PyBytes_FromString(((__pyx_v_refs[0])[__pyx_v_i]).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_1)); + __pyx_t_4 = PyList_Append(__pyx_v_refs_, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; + } + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":168 + * for i in range(refs.size()): + * refs_.append(refs[0][i].c_str()) + * cdef list ss = metric.evaluate(hyp.c_str(), refs_) # <<<<<<<<<<<<<< + * out.fields.resize(len(ss)) + * for i in range(len(ss)): + */ + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_metric), __pyx_n_s__evaluate); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = PyBytes_FromString(__pyx_v_hyp->c_str()); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_5)); + __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_5)); + __Pyx_GIVEREF(((PyObject *)__pyx_t_5)); + __Pyx_INCREF(((PyObject *)__pyx_v_refs_)); + PyTuple_SET_ITEM(__pyx_t_6, 1, ((PyObject *)__pyx_v_refs_)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_refs_)); + __pyx_t_5 = 0; + __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; + if (!(likely(PyList_CheckExact(__pyx_t_5))||((__pyx_t_5) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected list, got %.200s", Py_TYPE(__pyx_t_5)->tp_name), 0))) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_ss = ((PyObject*)__pyx_t_5); + __pyx_t_5 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":169 + * refs_.append(refs[0][i].c_str()) + * cdef list ss = metric.evaluate(hyp.c_str(), refs_) + * out.fields.resize(len(ss)) # <<<<<<<<<<<<<< + * for i in range(len(ss)): + * out.fields[i] = ss[i] + */ + if (unlikely(((PyObject *)__pyx_v_ss) == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_7 = PyList_GET_SIZE(((PyObject *)__pyx_v_ss)); + __pyx_v_out->fields.resize(__pyx_t_7); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":170 + * cdef list ss = metric.evaluate(hyp.c_str(), refs_) + * out.fields.resize(len(ss)) + * for i in range(len(ss)): # <<<<<<<<<<<<<< + * out.fields[i] = ss[i] + * + */ + if (unlikely(((PyObject *)__pyx_v_ss) == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_7 = PyList_GET_SIZE(((PyObject *)__pyx_v_ss)); + for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_7; __pyx_t_3+=1) { + __pyx_v_i = __pyx_t_3; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":171 + * out.fields.resize(len(ss)) + * for i in range(len(ss)): + * out.fields[i] = ss[i] # <<<<<<<<<<<<<< + * + * cdef class Metric: + */ + __pyx_t_5 = __Pyx_GetItemInt_List(((PyObject *)__pyx_v_ss), __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_8 = __pyx_PyFloat_AsFloat(__pyx_t_5); if (unlikely((__pyx_t_8 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + (__pyx_v_out->fields[__pyx_v_i]) = __pyx_t_8; + } + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_WriteUnraisable("_cdec._compute_sufficient_stats", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_metric); + __Pyx_XDECREF(__pyx_v_refs_); + __Pyx_XDECREF(__pyx_v_ss); + __Pyx_RefNannyFinishContext(); +} + +/* Python wrapper */ +static int __pyx_pw_5_cdec_6Metric_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_5_cdec_6Metric_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); + if (unlikely(PyTuple_GET_SIZE(__pyx_args) > 0)) { + __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 0, 0, PyTuple_GET_SIZE(__pyx_args)); return -1;} + if (unlikely(__pyx_kwds) && unlikely(PyDict_Size(__pyx_kwds) > 0) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__cinit__", 0))) return -1; + __pyx_r = __pyx_pf_5_cdec_6Metric___cinit__(((struct __pyx_obj_5_cdec_Metric *)__pyx_v_self)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":175 + * cdef class Metric: + * cdef Scorer scorer + * def __cinit__(self): # <<<<<<<<<<<<<< + * self.scorer = Scorer() + * self.scorer.name = new string(as_str(self.__class__.__name__)) + */ + +static int __pyx_pf_5_cdec_6Metric___cinit__(struct __pyx_obj_5_cdec_Metric *__pyx_v_self) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__cinit__", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":176 + * cdef Scorer scorer + * def __cinit__(self): + * self.scorer = Scorer() # <<<<<<<<<<<<<< + * self.scorer.name = new string(as_str(self.__class__.__name__)) + * self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], + */ + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_GOTREF(__pyx_v_self->scorer); + __Pyx_DECREF(((PyObject *)__pyx_v_self->scorer)); + __pyx_v_self->scorer = ((struct __pyx_obj_5_cdec_Scorer *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":177 + * def __cinit__(self): + * self.scorer = Scorer() + * self.scorer.name = new string(as_str(self.__class__.__name__)) # <<<<<<<<<<<<<< + * self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], + * self, _compute_sufficient_stats, _compute_score) + */ + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s____class__); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____name__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_self->scorer->name = new std::string(__pyx_f_5_cdec_as_str(__pyx_t_2, NULL)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":178 + * self.scorer = Scorer() + * self.scorer.name = new string(as_str(self.__class__.__name__)) + * self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], # <<<<<<<<<<<<<< + * self, _compute_sufficient_stats, _compute_score) + * + */ + __pyx_v_self->scorer->metric = PythonEvaluationMetric::Instance((__pyx_v_self->scorer->name[0]), ((void *)__pyx_v_self), __pyx_f_5_cdec__compute_sufficient_stats, __pyx_f_5_cdec__compute_score); + + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("_cdec.Metric.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_6Metric_3__call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_5_cdec_6Metric_3__call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_refs = 0; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__refs,0}; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__call__ (wrapper)", 0); + { + PyObject* values[1] = {0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__refs); + if (likely(values[0])) kw_args--; + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + } + __pyx_v_refs = values[0]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__call__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_cdec.Metric.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5_cdec_6Metric_2__call__(((struct __pyx_obj_5_cdec_Metric *)__pyx_v_self), __pyx_v_refs); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":181 + * self, _compute_sufficient_stats, _compute_score) + * + * def __call__(self, refs): # <<<<<<<<<<<<<< + * return self.scorer(refs) + * + */ + +static PyObject *__pyx_pf_5_cdec_6Metric_2__call__(struct __pyx_obj_5_cdec_Metric *__pyx_v_self, PyObject *__pyx_v_refs) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__call__", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":182 + * + * def __call__(self, refs): + * return self.scorer(refs) # <<<<<<<<<<<<<< + * + * def score(SufficientStats stats): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 182; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_refs); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_refs); + __Pyx_GIVEREF(__pyx_v_refs); + __pyx_t_2 = PyObject_Call(((PyObject *)__pyx_v_self->scorer), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 182; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("_cdec.Metric.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_6Metric_5score(PyObject *__pyx_v_stats, CYTHON_UNUSED PyObject *unused); /*proto*/ +static PyObject *__pyx_pw_5_cdec_6Metric_5score(PyObject *__pyx_v_stats, CYTHON_UNUSED PyObject *unused) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("score (wrapper)", 0); + __pyx_r = __pyx_pf_5_cdec_6Metric_4score(((struct __pyx_obj_5_cdec_Metric *)__pyx_v_stats)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":184 + * return self.scorer(refs) + * + * def score(SufficientStats stats): # <<<<<<<<<<<<<< + * return 0 + * + */ + +static PyObject *__pyx_pf_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx_obj_5_cdec_Metric *__pyx_v_stats) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("score", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":185 + * + * def score(SufficientStats stats): + * return 0 # <<<<<<<<<<<<<< + * + * def evaluate(self, hyp, refs): + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_0); + __pyx_r = __pyx_int_0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_6Metric_7evaluate(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_5_cdec_6Metric_7evaluate(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + CYTHON_UNUSED PyObject *__pyx_v_hyp = 0; + CYTHON_UNUSED PyObject *__pyx_v_refs = 0; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__hyp,&__pyx_n_s__refs,0}; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("evaluate (wrapper)", 0); + { + PyObject* values[2] = {0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__hyp); + if (likely(values[0])) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__refs); + if (likely(values[1])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("evaluate", 1, 2, 2, 1); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "evaluate") < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + } + __pyx_v_hyp = values[0]; + __pyx_v_refs = values[1]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("evaluate", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[5]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_cdec.Metric.evaluate", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5_cdec_6Metric_6evaluate(((struct __pyx_obj_5_cdec_Metric *)__pyx_v_self), __pyx_v_hyp, __pyx_v_refs); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":187 + * return 0 + * + * def evaluate(self, hyp, refs): # <<<<<<<<<<<<<< + * return [] + * + */ + +static PyObject *__pyx_pf_5_cdec_6Metric_6evaluate(CYTHON_UNUSED struct __pyx_obj_5_cdec_Metric *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_hyp, CYTHON_UNUSED PyObject *__pyx_v_refs) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("evaluate", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":188 + * + * def evaluate(self, hyp, refs): + * return [] # <<<<<<<<<<<<<< + * + * BLEU = Scorer('IBM_BLEU') + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = ((PyObject *)__pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("_cdec.Metric.evaluate", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} +static PyObject *__pyx_gb_5_cdec_4generator16(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_3_make_config(PyObject *__pyx_self, PyObject *__pyx_v_config); /*proto*/ +static PyMethodDef __pyx_mdef_5_cdec_3_make_config = {__Pyx_NAMESTR("_make_config"), (PyCFunction)__pyx_pw_5_cdec_3_make_config, METH_O, __Pyx_DOCSTR(0)}; +static PyObject *__pyx_pw_5_cdec_3_make_config(PyObject *__pyx_self, PyObject *__pyx_v_config) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("_make_config (wrapper)", 0); + __pyx_self = __pyx_self; + __pyx_r = __pyx_pf_5_cdec_2_make_config(__pyx_self, ((PyObject *)__pyx_v_config)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "_cdec.pyx":28 + * class ParseFailed(Exception): pass + * + * def _make_config(config): # <<<<<<<<<<<<<< + * for key, value in config.items(): + * if isinstance(value, dict): + */ + +static PyObject *__pyx_pf_5_cdec_2_make_config(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_config) { + struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *__pyx_cur_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_make_config", 0); + __pyx_cur_scope = (struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *)__pyx_ptype_5_cdec___pyx_scope_struct_22__make_config->tp_new(__pyx_ptype_5_cdec___pyx_scope_struct_22__make_config, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __Pyx_RefNannyFinishContext(); + return NULL; + } + __Pyx_GOTREF(__pyx_cur_scope); + __pyx_cur_scope->__pyx_v_config = __pyx_v_config; + __Pyx_INCREF(__pyx_cur_scope->__pyx_v_config); + __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_config); + { + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_4generator16, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_cur_scope); + __Pyx_RefNannyFinishContext(); + return (PyObject *) gen; + } + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("_cdec._make_config", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_DECREF(((PyObject *)__pyx_cur_scope)); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_gb_5_cdec_4generator16(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +{ + struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config *)__pyx_generator->closure); + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; Py_ssize_t __pyx_t_3; PyObject *(*__pyx_t_4)(PyObject *); PyObject *__pyx_t_5 = NULL; @@ -14866,7 +15623,7 @@ static PyObject *__pyx_gb_5_cdec_4generator16(__pyx_GeneratorObject *__pyx_gener __Pyx_INCREF(__pyx_cur_scope->__pyx_v_info); PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_cur_scope->__pyx_v_info); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_info); - __pyx_t_7 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_43), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_45), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_7)); __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -15245,7 +16002,7 @@ static PyObject *__pyx_gb_5_cdec_7Decoder_9__cinit___2generator20(__pyx_Generato __Pyx_GIVEREF(__pyx_t_3); __pyx_cur_scope->__pyx_v_kv = __pyx_t_3; __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_44), __pyx_cur_scope->__pyx_v_kv); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_46), __pyx_cur_scope->__pyx_v_kv); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); __pyx_r = ((PyObject *)__pyx_t_3); __pyx_t_3 = 0; @@ -15336,7 +16093,7 @@ static int __pyx_pf_5_cdec_7Decoder___cinit__(struct __pyx_obj_5_cdec_Decoder *_ */ __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_config, __pyx_n_s__get); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_45), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_47), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_formalism = __pyx_t_3; @@ -15401,7 +16158,7 @@ static int __pyx_pf_5_cdec_7Decoder___cinit__(struct __pyx_obj_5_cdec_Decoder *_ */ __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__InvalidConfig); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_46), __pyx_v_formalism); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_48), __pyx_v_formalism); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); @@ -15807,7 +16564,7 @@ static int __pyx_pf_5_cdec_7Decoder_7weights_2__set__(struct __pyx_obj_5_cdec_De * * property formalism: */ - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_47), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_49), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); @@ -16038,7 +16795,7 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_4read_weights(struct __pyx_obj_5_cdec_ __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__startswith); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_49), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_51), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -16207,7 +16964,7 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_4read_weights(struct __pyx_obj_5_cdec_ } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_50, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_52, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -16412,7 +17169,7 @@ static PyObject *__pyx_pf_5_cdec_7Decoder_6translate(struct __pyx_obj_5_cdec_Dec * if grammar: * if isinstance(grammar, str) or isinstance(grammar, unicode): */ - __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_51), ((PyObject *)Py_TYPE(__pyx_v_sentence))); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_53), ((PyObject *)Py_TYPE(__pyx_v_sentence))); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -19213,6 +19970,13 @@ static void __pyx_tp_dealloc_5_cdec_SufficientStats(PyObject *o) { } (*Py_TYPE(o)->tp_free)(o); } +static PyObject *__pyx_sq_item_5_cdec_SufficientStats(PyObject *o, Py_ssize_t i) { + PyObject *r; + PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0; + r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x); + Py_DECREF(x); + return r; +} static PyObject *__pyx_getprop_5_cdec_15SufficientStats_score(PyObject *o, void *x) { return __pyx_pw_5_cdec_15SufficientStats_5score_1__get__(o); @@ -19233,7 +19997,7 @@ static struct PyGetSetDef __pyx_getsets_5_cdec_SufficientStats[] = { }; static PyNumberMethods __pyx_tp_as_number_SufficientStats = { - __pyx_pw_5_cdec_15SufficientStats_10__add__, /*nb_add*/ + __pyx_pw_5_cdec_15SufficientStats_12__add__, /*nb_add*/ 0, /*nb_subtract*/ 0, /*nb_multiply*/ #if PY_MAJOR_VERSION < 3 @@ -19268,7 +20032,7 @@ static PyNumberMethods __pyx_tp_as_number_SufficientStats = { #if PY_MAJOR_VERSION < 3 0, /*nb_hex*/ #endif - __pyx_pw_5_cdec_15SufficientStats_8__iadd__, /*nb_inplace_add*/ + __pyx_pw_5_cdec_15SufficientStats_10__iadd__, /*nb_inplace_add*/ 0, /*nb_inplace_subtract*/ 0, /*nb_inplace_multiply*/ #if PY_MAJOR_VERSION < 3 @@ -19294,7 +20058,7 @@ static PySequenceMethods __pyx_tp_as_sequence_SufficientStats = { __pyx_pw_5_cdec_15SufficientStats_3__len__, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ - 0, /*sq_item*/ + __pyx_sq_item_5_cdec_SufficientStats, /*sq_item*/ 0, /*sq_slice*/ 0, /*sq_ass_item*/ 0, /*sq_ass_slice*/ @@ -19305,7 +20069,7 @@ static PySequenceMethods __pyx_tp_as_sequence_SufficientStats = { static PyMappingMethods __pyx_tp_as_mapping_SufficientStats = { __pyx_pw_5_cdec_15SufficientStats_3__len__, /*mp_length*/ - 0, /*mp_subscript*/ + __pyx_pw_5_cdec_15SufficientStats_8__getitem__, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; @@ -19933,6 +20697,202 @@ static PyTypeObject __pyx_type_5_cdec_Scorer = { #endif }; +static PyObject *__pyx_tp_new_5_cdec_Metric(PyTypeObject *t, PyObject *a, PyObject *k) { + struct __pyx_obj_5_cdec_Metric *p; + PyObject *o = (*t->tp_alloc)(t, 0); + if (!o) return 0; + p = ((struct __pyx_obj_5_cdec_Metric *)o); + p->scorer = ((struct __pyx_obj_5_cdec_Scorer *)Py_None); Py_INCREF(Py_None); + if (__pyx_pw_5_cdec_6Metric_1__cinit__(o, __pyx_empty_tuple, NULL) < 0) { + Py_DECREF(o); o = 0; + } + return o; +} + +static void __pyx_tp_dealloc_5_cdec_Metric(PyObject *o) { + struct __pyx_obj_5_cdec_Metric *p = (struct __pyx_obj_5_cdec_Metric *)o; + Py_XDECREF(((PyObject *)p->scorer)); + (*Py_TYPE(o)->tp_free)(o); +} + +static int __pyx_tp_traverse_5_cdec_Metric(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_5_cdec_Metric *p = (struct __pyx_obj_5_cdec_Metric *)o; + if (p->scorer) { + e = (*v)(((PyObject*)p->scorer), a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_5_cdec_Metric(PyObject *o) { + struct __pyx_obj_5_cdec_Metric *p = (struct __pyx_obj_5_cdec_Metric *)o; + PyObject* tmp; + tmp = ((PyObject*)p->scorer); + p->scorer = ((struct __pyx_obj_5_cdec_Scorer *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyMethodDef __pyx_methods_5_cdec_Metric[] = { + {__Pyx_NAMESTR("score"), (PyCFunction)__pyx_pw_5_cdec_6Metric_5score, METH_NOARGS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("evaluate"), (PyCFunction)__pyx_pw_5_cdec_6Metric_7evaluate, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {0, 0, 0, 0} +}; + +static PyNumberMethods __pyx_tp_as_number_Metric = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_divide*/ + #endif + 0, /*nb_remainder*/ + 0, /*nb_divmod*/ + 0, /*nb_power*/ + 0, /*nb_negative*/ + 0, /*nb_positive*/ + 0, /*nb_absolute*/ + 0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_coerce*/ + #endif + 0, /*nb_int*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_long*/ + #else + 0, /*reserved*/ + #endif + 0, /*nb_float*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_oct*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*nb_hex*/ + #endif + 0, /*nb_inplace_add*/ + 0, /*nb_inplace_subtract*/ + 0, /*nb_inplace_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_inplace_divide*/ + #endif + 0, /*nb_inplace_remainder*/ + 0, /*nb_inplace_power*/ + 0, /*nb_inplace_lshift*/ + 0, /*nb_inplace_rshift*/ + 0, /*nb_inplace_and*/ + 0, /*nb_inplace_xor*/ + 0, /*nb_inplace_or*/ + 0, /*nb_floor_divide*/ + 0, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + #if PY_VERSION_HEX >= 0x02050000 + 0, /*nb_index*/ + #endif +}; + +static PySequenceMethods __pyx_tp_as_sequence_Metric = { + 0, /*sq_length*/ + 0, /*sq_concat*/ + 0, /*sq_repeat*/ + 0, /*sq_item*/ + 0, /*sq_slice*/ + 0, /*sq_ass_item*/ + 0, /*sq_ass_slice*/ + 0, /*sq_contains*/ + 0, /*sq_inplace_concat*/ + 0, /*sq_inplace_repeat*/ +}; + +static PyMappingMethods __pyx_tp_as_mapping_Metric = { + 0, /*mp_length*/ + 0, /*mp_subscript*/ + 0, /*mp_ass_subscript*/ +}; + +static PyBufferProcs __pyx_tp_as_buffer_Metric = { + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getreadbuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getwritebuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getsegcount*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getcharbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_getbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_releasebuffer*/ + #endif +}; + +static PyTypeObject __pyx_type_5_cdec_Metric = { + PyVarObject_HEAD_INIT(0, 0) + __Pyx_NAMESTR("_cdec.Metric"), /*tp_name*/ + sizeof(struct __pyx_obj_5_cdec_Metric), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_5_cdec_Metric, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #else + 0, /*reserved*/ + #endif + 0, /*tp_repr*/ + &__pyx_tp_as_number_Metric, /*tp_as_number*/ + &__pyx_tp_as_sequence_Metric, /*tp_as_sequence*/ + &__pyx_tp_as_mapping_Metric, /*tp_as_mapping*/ + 0, /*tp_hash*/ + __pyx_pw_5_cdec_6Metric_3__call__, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &__pyx_tp_as_buffer_Metric, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_5_cdec_Metric, /*tp_traverse*/ + __pyx_tp_clear_5_cdec_Metric, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_5_cdec_Metric, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_5_cdec_Metric, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + #if PY_VERSION_HEX >= 0x02060000 + 0, /*tp_version_tag*/ + #endif +}; + static PyObject *__pyx_tp_new_5_cdec_Decoder(PyTypeObject *t, PyObject *a, PyObject *k) { struct __pyx_obj_5_cdec_Decoder *p; PyObject *o = (*t->tp_alloc)(t, 0); @@ -25197,13 +26157,14 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 0, 1, 0}, {&__pyx_kp_s_41, __pyx_k_41, sizeof(__pyx_k_41), 0, 0, 1, 0}, {&__pyx_kp_s_43, __pyx_k_43, sizeof(__pyx_k_43), 0, 0, 1, 0}, - {&__pyx_kp_s_44, __pyx_k_44, sizeof(__pyx_k_44), 0, 0, 1, 0}, + {&__pyx_kp_s_45, __pyx_k_45, sizeof(__pyx_k_45), 0, 0, 1, 0}, {&__pyx_kp_s_46, __pyx_k_46, sizeof(__pyx_k_46), 0, 0, 1, 0}, - {&__pyx_kp_s_47, __pyx_k_47, sizeof(__pyx_k_47), 0, 0, 1, 0}, {&__pyx_kp_s_48, __pyx_k_48, sizeof(__pyx_k_48), 0, 0, 1, 0}, - {&__pyx_kp_s_51, __pyx_k_51, sizeof(__pyx_k_51), 0, 0, 1, 0}, - {&__pyx_kp_s_54, __pyx_k_54, sizeof(__pyx_k_54), 0, 0, 1, 0}, - {&__pyx_kp_s_59, __pyx_k_59, sizeof(__pyx_k_59), 0, 0, 1, 0}, + {&__pyx_kp_s_49, __pyx_k_49, sizeof(__pyx_k_49), 0, 0, 1, 0}, + {&__pyx_kp_s_50, __pyx_k_50, sizeof(__pyx_k_50), 0, 0, 1, 0}, + {&__pyx_kp_s_53, __pyx_k_53, sizeof(__pyx_k_53), 0, 0, 1, 0}, + {&__pyx_kp_s_56, __pyx_k_56, sizeof(__pyx_k_56), 0, 0, 1, 0}, + {&__pyx_kp_s_61, __pyx_k_61, sizeof(__pyx_k_61), 0, 0, 1, 0}, {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0}, {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0}, {&__pyx_kp_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 0}, @@ -25218,9 +26179,11 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__TER, __pyx_k__TER, sizeof(__pyx_k__TER), 0, 0, 1, 1}, {&__pyx_n_s__TypeError, __pyx_k__TypeError, sizeof(__pyx_k__TypeError), 0, 0, 1, 1}, {&__pyx_n_s__ValueError, __pyx_k__ValueError, sizeof(__pyx_k__ValueError), 0, 0, 1, 1}, + {&__pyx_n_s____class__, __pyx_k____class__, sizeof(__pyx_k____class__), 0, 0, 1, 1}, {&__pyx_n_s____enter__, __pyx_k____enter__, sizeof(__pyx_k____enter__), 0, 0, 1, 1}, {&__pyx_n_s____exit__, __pyx_k____exit__, sizeof(__pyx_k____exit__), 0, 0, 1, 1}, {&__pyx_n_s____main__, __pyx_k____main__, sizeof(__pyx_k____main__), 0, 0, 1, 1}, + {&__pyx_n_s____name__, __pyx_k____name__, sizeof(__pyx_k____name__), 0, 0, 1, 1}, {&__pyx_n_s____test__, __pyx_k____test__, sizeof(__pyx_k____test__), 0, 0, 1, 1}, {&__pyx_n_s___cdec, __pyx_k___cdec, sizeof(__pyx_k___cdec), 0, 0, 1, 1}, {&__pyx_n_s___make_config, __pyx_k___make_config, sizeof(__pyx_k___make_config), 0, 0, 1, 1}, @@ -25239,6 +26202,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__encoding, __pyx_k__encoding, sizeof(__pyx_k__encoding), 0, 0, 1, 1}, {&__pyx_n_s__enumerate, __pyx_k__enumerate, sizeof(__pyx_k__enumerate), 0, 0, 1, 1}, {&__pyx_n_s__eval, __pyx_k__eval, sizeof(__pyx_k__eval), 0, 0, 1, 1}, + {&__pyx_n_s__evaluate, __pyx_k__evaluate, sizeof(__pyx_k__evaluate), 0, 0, 1, 1}, {&__pyx_n_s__evaluator, __pyx_k__evaluator, sizeof(__pyx_k__evaluator), 0, 0, 1, 1}, {&__pyx_n_s__f, __pyx_k__f, sizeof(__pyx_k__f), 0, 0, 1, 1}, {&__pyx_n_s__formalism, __pyx_k__formalism, sizeof(__pyx_k__formalism), 0, 0, 1, 1}, @@ -25246,6 +26210,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__genexpr, __pyx_k__genexpr, sizeof(__pyx_k__genexpr), 0, 0, 1, 1}, {&__pyx_n_s__get, __pyx_k__get, sizeof(__pyx_k__get), 0, 0, 1, 1}, {&__pyx_n_s__grammar, __pyx_k__grammar, sizeof(__pyx_k__grammar), 0, 0, 1, 1}, + {&__pyx_n_s__hyp, __pyx_k__hyp, sizeof(__pyx_k__hyp), 0, 0, 1, 1}, {&__pyx_n_s__hypergraph, __pyx_k__hypergraph, sizeof(__pyx_k__hypergraph), 0, 0, 1, 1}, {&__pyx_n_s__i, __pyx_k__i, sizeof(__pyx_k__i), 0, 0, 1, 1}, {&__pyx_n_s__in_edges, __pyx_k__in_edges, sizeof(__pyx_k__in_edges), 0, 0, 1, 1}, @@ -25271,6 +26236,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__replace, __pyx_k__replace, sizeof(__pyx_k__replace), 0, 0, 1, 1}, {&__pyx_n_s__rules, __pyx_k__rules, sizeof(__pyx_k__rules), 0, 0, 1, 1}, {&__pyx_n_s__scfg, __pyx_k__scfg, sizeof(__pyx_k__scfg), 0, 0, 1, 1}, + {&__pyx_n_s__score, __pyx_k__score, sizeof(__pyx_k__score), 0, 0, 1, 1}, {&__pyx_n_s__scores, __pyx_k__scores, sizeof(__pyx_k__scores), 0, 0, 1, 1}, {&__pyx_n_s__self, __pyx_k__self, sizeof(__pyx_k__self), 0, 0, 1, 1}, {&__pyx_n_s__sentence, __pyx_k__sentence, sizeof(__pyx_k__sentence), 0, 0, 1, 1}, @@ -25350,7 +26316,7 @@ static int __Pyx_InitCachedConstants(void) { * for trule in rules: * if not isinstance(trule, BaseTRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< - * _g.AddRule(( trule).rule[0]) + * _g.AddRule(( trule).rule[0]) */ __pyx_k_tuple_14 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_14)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_14); @@ -25494,20 +26460,34 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40)); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":79 - * def __getitem__(self,int k): - * if not 0 <= k < self.cs.size(): - * raise IndexError('candidate set index out of range') # <<<<<<<<<<<<<< - * cdef Candidate candidate = Candidate() - * candidate.candidate = &self.cs[0][k] + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":50 + * def __getitem__(self, int index): + * if not 0 <= index < len(self): + * raise IndexError('sufficient stats vector index out of range') # <<<<<<<<<<<<<< + * return self.stats[0][index] + * */ - __pyx_k_tuple_42 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_42)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_42 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_42)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_42); __Pyx_INCREF(((PyObject *)__pyx_kp_s_41)); PyTuple_SET_ITEM(__pyx_k_tuple_42, 0, ((PyObject *)__pyx_kp_s_41)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_41)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_42)); + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":84 + * def __getitem__(self,int k): + * if not 0 <= k < self.cs.size(): + * raise IndexError('candidate set index out of range') # <<<<<<<<<<<<<< + * cdef Candidate candidate = Candidate() + * candidate.candidate = &self.cs[0][k] + */ + __pyx_k_tuple_44 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_44); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_43)); + PyTuple_SET_ITEM(__pyx_k_tuple_44, 0, ((PyObject *)__pyx_kp_s_43)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_43)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44)); + /* "_cdec.pyx":50 * """ * if config_str is None: @@ -25515,15 +26495,15 @@ static int __Pyx_InitCachedConstants(void) { * if formalism not in ('scfg', 'fst', 'lextrans', 'pb', * 'csplit', 'tagger', 'lexalign'): */ - __pyx_k_tuple_45 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_45)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_45); + __pyx_k_tuple_47 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_47)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_47); __Pyx_INCREF(((PyObject *)__pyx_n_s__formalism)); - PyTuple_SET_ITEM(__pyx_k_tuple_45, 0, ((PyObject *)__pyx_n_s__formalism)); + PyTuple_SET_ITEM(__pyx_k_tuple_47, 0, ((PyObject *)__pyx_n_s__formalism)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__formalism)); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_45, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_47, 1, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_45)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_47)); /* "_cdec.pyx":88 * with open(weights) as fp: @@ -25532,12 +26512,12 @@ static int __Pyx_InitCachedConstants(void) { * fname, value = line.split() * self.weights[fname.strip()] = float(value) */ - __pyx_k_tuple_49 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_49)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_49); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_48)); - PyTuple_SET_ITEM(__pyx_k_tuple_49, 0, ((PyObject *)__pyx_kp_s_48)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_48)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_49)); + __pyx_k_tuple_51 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_51); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_50)); + PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, ((PyObject *)__pyx_kp_s_50)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_50)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51)); /* "_cdec.pyx":86 * @@ -25546,18 +26526,18 @@ static int __Pyx_InitCachedConstants(void) { * for line in fp: * if line.strip().startswith('#'): continue */ - __pyx_k_tuple_50 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_50)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_50); + __pyx_k_tuple_52 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_52)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_52); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_50, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_52, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_50, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_52, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_50, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_52, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_50)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_52)); /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":3 * cimport grammar @@ -25566,44 +26546,44 @@ static int __Pyx_InitCachedConstants(void) { * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) * */ - __pyx_k_tuple_52 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_52)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_52); + __pyx_k_tuple_54 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_54)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_54); __Pyx_INCREF(((PyObject *)__pyx_n_s__phrase)); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 0, ((PyObject *)__pyx_n_s__phrase)); + PyTuple_SET_ITEM(__pyx_k_tuple_54, 0, ((PyObject *)__pyx_n_s__phrase)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__phrase)); __Pyx_INCREF(((PyObject *)__pyx_n_s__genexpr)); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 1, ((PyObject *)__pyx_n_s__genexpr)); + PyTuple_SET_ITEM(__pyx_k_tuple_54, 1, ((PyObject *)__pyx_n_s__genexpr)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__genexpr)); __Pyx_INCREF(((PyObject *)__pyx_n_s__genexpr)); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 2, ((PyObject *)__pyx_n_s__genexpr)); + PyTuple_SET_ITEM(__pyx_k_tuple_54, 2, ((PyObject *)__pyx_n_s__genexpr)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__genexpr)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_52)); - __pyx_k_codeobj_53 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_52, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_54, __pyx_n_s___phrase, 3, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_53)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54)); + __pyx_k_codeobj_55 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_54, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_56, __pyx_n_s___phrase, 3, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_55)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":143 - * return self.name.c_str() + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":190 + * return [] * * BLEU = Scorer('IBM_BLEU') # <<<<<<<<<<<<<< * TER = Scorer('TER') */ - __pyx_k_tuple_55 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_55)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_55); + __pyx_k_tuple_57 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_57)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_57); __Pyx_INCREF(((PyObject *)__pyx_n_s__IBM_BLEU)); - PyTuple_SET_ITEM(__pyx_k_tuple_55, 0, ((PyObject *)__pyx_n_s__IBM_BLEU)); + PyTuple_SET_ITEM(__pyx_k_tuple_57, 0, ((PyObject *)__pyx_n_s__IBM_BLEU)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__IBM_BLEU)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_55)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_57)); - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":191 * * BLEU = Scorer('IBM_BLEU') * TER = Scorer('TER') # <<<<<<<<<<<<<< */ - __pyx_k_tuple_56 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_56)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_56); + __pyx_k_tuple_58 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_58)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_58); __Pyx_INCREF(((PyObject *)__pyx_n_s__TER)); - PyTuple_SET_ITEM(__pyx_k_tuple_56, 0, ((PyObject *)__pyx_n_s__TER)); + PyTuple_SET_ITEM(__pyx_k_tuple_58, 0, ((PyObject *)__pyx_n_s__TER)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__TER)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_56)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_58)); /* "_cdec.pyx":28 * class ParseFailed(Exception): pass @@ -25612,25 +26592,25 @@ static int __Pyx_InitCachedConstants(void) { * for key, value in config.items(): * if isinstance(value, dict): */ - __pyx_k_tuple_57 = PyTuple_New(5); if (unlikely(!__pyx_k_tuple_57)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_57); + __pyx_k_tuple_59 = PyTuple_New(5); if (unlikely(!__pyx_k_tuple_59)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_59); __Pyx_INCREF(((PyObject *)__pyx_n_s__config)); - PyTuple_SET_ITEM(__pyx_k_tuple_57, 0, ((PyObject *)__pyx_n_s__config)); + PyTuple_SET_ITEM(__pyx_k_tuple_59, 0, ((PyObject *)__pyx_n_s__config)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__config)); __Pyx_INCREF(((PyObject *)__pyx_n_s__key)); - PyTuple_SET_ITEM(__pyx_k_tuple_57, 1, ((PyObject *)__pyx_n_s__key)); + PyTuple_SET_ITEM(__pyx_k_tuple_59, 1, ((PyObject *)__pyx_n_s__key)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__key)); __Pyx_INCREF(((PyObject *)__pyx_n_s__value)); - PyTuple_SET_ITEM(__pyx_k_tuple_57, 2, ((PyObject *)__pyx_n_s__value)); + PyTuple_SET_ITEM(__pyx_k_tuple_59, 2, ((PyObject *)__pyx_n_s__value)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value)); __Pyx_INCREF(((PyObject *)__pyx_n_s__name)); - PyTuple_SET_ITEM(__pyx_k_tuple_57, 3, ((PyObject *)__pyx_n_s__name)); + PyTuple_SET_ITEM(__pyx_k_tuple_59, 3, ((PyObject *)__pyx_n_s__name)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__name)); __Pyx_INCREF(((PyObject *)__pyx_n_s__info)); - PyTuple_SET_ITEM(__pyx_k_tuple_57, 4, ((PyObject *)__pyx_n_s__info)); + PyTuple_SET_ITEM(__pyx_k_tuple_59, 4, ((PyObject *)__pyx_n_s__info)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__info)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_57)); - __pyx_k_codeobj_58 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_57, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_59, __pyx_n_s___make_config, 28, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_58)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_59)); + __pyx_k_codeobj_60 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_59, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_61, __pyx_n_s___make_config, 28, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_60)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -25764,15 +26744,18 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyType_Ready(&__pyx_type_5_cdec_SufficientStats) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_SetAttrString(__pyx_m, "SufficientStats", (PyObject *)&__pyx_type_5_cdec_SufficientStats) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_SufficientStats = &__pyx_type_5_cdec_SufficientStats; - if (PyType_Ready(&__pyx_type_5_cdec_CandidateSet) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "CandidateSet", (PyObject *)&__pyx_type_5_cdec_CandidateSet) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_CandidateSet) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "CandidateSet", (PyObject *)&__pyx_type_5_cdec_CandidateSet) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_CandidateSet = &__pyx_type_5_cdec_CandidateSet; - if (PyType_Ready(&__pyx_type_5_cdec_SegmentEvaluator) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "SegmentEvaluator", (PyObject *)&__pyx_type_5_cdec_SegmentEvaluator) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_SegmentEvaluator) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "SegmentEvaluator", (PyObject *)&__pyx_type_5_cdec_SegmentEvaluator) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 98; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_SegmentEvaluator = &__pyx_type_5_cdec_SegmentEvaluator; - if (PyType_Ready(&__pyx_type_5_cdec_Scorer) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "Scorer", (PyObject *)&__pyx_type_5_cdec_Scorer) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_Scorer) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "Scorer", (PyObject *)&__pyx_type_5_cdec_Scorer) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_Scorer = &__pyx_type_5_cdec_Scorer; + if (PyType_Ready(&__pyx_type_5_cdec_Metric) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "Metric", (PyObject *)&__pyx_type_5_cdec_Metric) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_5_cdec_Metric = &__pyx_type_5_cdec_Metric; if (PyType_Ready(&__pyx_type_5_cdec_Decoder) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_SetAttrString(__pyx_m, "Decoder", (PyObject *)&__pyx_type_5_cdec_Decoder) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_Decoder = &__pyx_type_5_cdec_Decoder; @@ -25818,7 +26801,7 @@ PyMODINIT_FUNC PyInit__cdec(void) __pyx_ptype_5_cdec___pyx_scope_struct_19_lines = &__pyx_type_5_cdec___pyx_scope_struct_19_lines; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_20___iter__) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_20___iter__ = &__pyx_type_5_cdec___pyx_scope_struct_20___iter__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_21___iter__) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_21___iter__) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_21___iter__ = &__pyx_type_5_cdec___pyx_scope_struct_21___iter__; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_22__make_config) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_22__make_config = &__pyx_type_5_cdec___pyx_scope_struct_22__make_config; @@ -25843,25 +26826,25 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyObject_SetAttr(__pyx_m, __pyx_n_s___phrase, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":143 - * return self.name.c_str() + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":190 + * return [] * * BLEU = Scorer('IBM_BLEU') # <<<<<<<<<<<<<< * TER = Scorer('TER') */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_55), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_57), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__BLEU, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetAttr(__pyx_m, __pyx_n_s__BLEU, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":191 * * BLEU = Scorer('IBM_BLEU') * TER = Scorer('TER') # <<<<<<<<<<<<<< */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_56), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Scorer)), ((PyObject *)__pyx_k_tuple_58), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyObject_SetAttr(__pyx_m, __pyx_n_s__TER, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetAttr(__pyx_m, __pyx_n_s__TER, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "_cdec.pyx":22 @@ -26356,6 +27339,40 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject* } } +static CYTHON_INLINE int __Pyx_CheckKeywordStrings( + PyObject *kwdict, + const char* function_name, + int kw_allowed) +{ + PyObject* key = 0; + Py_ssize_t pos = 0; + while (PyDict_Next(kwdict, &pos, &key, 0)) { + #if PY_MAJOR_VERSION < 3 + if (unlikely(!PyString_CheckExact(key)) && unlikely(!PyString_Check(key))) + #else + if (unlikely(!PyUnicode_Check(key))) + #endif + goto invalid_keyword_type; + } + if ((!kw_allowed) && unlikely(key)) + goto invalid_keyword; + return 1; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%s() keywords must be strings", function_name); + return 0; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%s() got an unexpected keyword argument '%s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif + return 0; +} + static double __Pyx__PyObject_AsDouble(PyObject* obj) { PyObject* float_value; if (Py_TYPE(obj)->tp_as_number && Py_TYPE(obj)->tp_as_number->nb_float) { diff --git a/python/src/decoder.pxd b/python/src/decoder.pxd index d2065579..a66166a2 100644 --- a/python/src/decoder.pxd +++ b/python/src/decoder.pxd @@ -27,8 +27,6 @@ cdef extern from "decoder/decoder.h": variables_map& GetConf() # add grammar rules (currently only supported by SCFG decoders) - # that will be used on subsequent calls to Decode. rules should be in standard - # text format. This function does NOT read from a file. void AddSupplementalGrammarFromString(string& grammar_str) void AddSupplementalGrammar(shared_ptr[Grammar] grammar) diff --git a/python/src/grammar.pxd b/python/src/grammar.pxd index 43806f71..8853a614 100644 --- a/python/src/grammar.pxd +++ b/python/src/grammar.pxd @@ -3,7 +3,6 @@ from libcpp.string cimport string from utils cimport * cdef extern from "decoder/trule.h": - cdef cppclass AlignmentPoint: AlignmentPoint(int s, int t) AlignmentPoint Inverted() diff --git a/python/src/mteval.pxd b/python/src/mteval.pxd index 52af6297..27c2808d 100644 --- a/python/src/mteval.pxd +++ b/python/src/mteval.pxd @@ -10,6 +10,7 @@ cdef extern from "mteval/ns.h": unsigned size() float operator[](unsigned i) void swap(SufficientStats& other) + vector[float] fields SufficientStats add "operator+" (SufficientStats&, SufficientStats&) @@ -26,9 +27,15 @@ cdef extern from "mteval/ns.h": vector[WordID]& refs, SufficientStats* out) -cdef extern from "mteval/ns.h" namespace "EvaluationMetric": - EvaluationMetric* Instance(string& metric_id) - EvaluationMetric* Instance() # IBM_BLEU + cdef EvaluationMetric* MetricInstance "EvaluationMetric::Instance" (string& metric_id) + +cdef extern from "py_scorer.h": + ctypedef float (*MetricScoreCallback)(void*, SufficientStats* stats) + ctypedef void (*MetricStatsCallback)(void*, + string* hyp, vector[string]* refs, SufficientStats* out) + + cdef EvaluationMetric* PyMetricInstance "PythonEvaluationMetric::Instance"( + string& metric_id, void*, MetricStatsCallback, MetricScoreCallback) cdef extern from "training/candidate_set.h" namespace "training": cdef cppclass Candidate "const training::Candidate": diff --git a/python/src/mteval.pxi b/python/src/mteval.pxi index 4ba73168..f67f4f04 100644 --- a/python/src/mteval.pxi +++ b/python/src/mteval.pxi @@ -43,7 +43,12 @@ cdef class SufficientStats: def __iter__(self): for i in range(len(self)): - yield self.stats[0][i] + yield self[i] + + def __getitem__(self, int index): + if not 0 <= index < len(self): + raise IndexError('sufficient stats vector index out of range') + return self.stats[0][index] def __iadd__(SufficientStats self, SufficientStats other): self.stats[0] += other.stats[0] @@ -111,15 +116,17 @@ cdef class SegmentEvaluator: cdef class Scorer: cdef string* name + cdef mteval.EvaluationMetric* metric - def __cinit__(self, char* name): - self.name = new string(name) + def __cinit__(self, bytes name=None): + if name: + self.name = new string(name) + self.metric = mteval.MetricInstance(self.name[0]) def __dealloc__(self): del self.name def __call__(self, refs): - cdef mteval.EvaluationMetric* metric = mteval.Instance(self.name[0]) if isinstance(refs, unicode) or isinstance(refs, str): refs = [refs] cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() @@ -132,13 +139,53 @@ cdef class Scorer: del refv cdef unsigned i cdef SegmentEvaluator evaluator = SegmentEvaluator() - evaluator.metric = metric - evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator](metric.CreateSegmentEvaluator(refsv[0])) + evaluator.metric = self.metric + evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( + self.metric.CreateSegmentEvaluator(refsv[0])) del refsv # in theory should not delete but store in SegmentEvaluator return evaluator def __str__(self): return self.name.c_str() +cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): + cdef Metric metric = metric_ + cdef list ss = [] + cdef unsigned i + for i in range(stats.size()): + ss.append(stats[0][i]) + return metric.score(ss) + +cdef void _compute_sufficient_stats(void* metric_, + string* hyp, + vector[string]* refs, + mteval.SufficientStats* out): + cdef Metric metric = metric_ + cdef list refs_ = [] + cdef unsigned i + for i in range(refs.size()): + refs_.append(refs[0][i].c_str()) + cdef list ss = metric.evaluate(hyp.c_str(), refs_) + out.fields.resize(len(ss)) + for i in range(len(ss)): + out.fields[i] = ss[i] + +cdef class Metric: + cdef Scorer scorer + def __cinit__(self): + self.scorer = Scorer() + self.scorer.name = new string(as_str(self.__class__.__name__)) + self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], + self, _compute_sufficient_stats, _compute_score) + + def __call__(self, refs): + return self.scorer(refs) + + def score(SufficientStats stats): + return 0 + + def evaluate(self, hyp, refs): + return [] + BLEU = Scorer('IBM_BLEU') TER = Scorer('TER') diff --git a/python/src/observer.h b/python/src/observer.h index d398f2f7..05f3c9be 100644 --- a/python/src/observer.h +++ b/python/src/observer.h @@ -1,6 +1,5 @@ #include "decoder/hg.h" #include "decoder/decoder.h" -#include struct BasicObserver: public DecoderObserver { Hypergraph* hypergraph; diff --git a/python/src/py_scorer.h b/python/src/py_scorer.h new file mode 100644 index 00000000..22dc9fee --- /dev/null +++ b/python/src/py_scorer.h @@ -0,0 +1,44 @@ +#include "ns.h" +#include "tdict.h" + +typedef float (*MetricScoreCallback)(void*, SufficientStats* stats); +typedef void (*MetricStatsCallback)(void*, + std::string *hyp, + std::vector *refs, + SufficientStats* out); + +struct PythonEvaluationMetric : public EvaluationMetric { + + PythonEvaluationMetric(const std::string& id) : EvaluationMetric(id) {} + + static EvaluationMetric* Instance(const std::string& id, + void* obj, + MetricStatsCallback statscb, + MetricScoreCallback scorecb) { + PythonEvaluationMetric* metric = new PythonEvaluationMetric(id); + metric->pymetric = obj; + metric->_compute_score = scorecb; + metric->_compute_sufficient_stats = statscb; + return metric; + } + + float ComputeScore(const SufficientStats& stats) const { + SufficientStats stats_(stats); + return _compute_score(pymetric, &stats_); + } + + void ComputeSufficientStatistics(const std::vector& hyp, + const std::vector >& refs, + SufficientStats* out) const { + std::string hyp_(TD::GetString(hyp)); + std::vector refs_; + for(unsigned i = 0; i < refs.size(); ++i) { + refs_.push_back(TD::GetString(refs[i])); + } + _compute_sufficient_stats(pymetric, &hyp_, &refs_, out); + } + + void* pymetric; + MetricStatsCallback _compute_sufficient_stats; + MetricScoreCallback _compute_score; +}; diff --git a/sa-extract/Makefile b/sa-extract/Makefile index 7b39ae4d..3145bbfe 100644 --- a/sa-extract/Makefile +++ b/sa-extract/Makefile @@ -1,17 +1,11 @@ -PYVER=python2.7 -PYDIR=/usr/local/Cellar/python/2.7.2 -PYINCLUDE=$(PYDIR)/include/$(PYVER) -CYTHON=/usr/local/share/python/cython -PYTHON=$(PYDIR)/bin/python - %.c: %.pyx - $(CYTHON) $< -o $@ + cython $< -o $@ %.o: %.cc g++ -O6 -g -fPIC -c $< all: cstrmap.c strmap.cc rule.c sym.c cdat.c cintlist.c cfloatlist.c calignment.c csuf.c clex.c rulefactory.c cveb.c lcp.c precomputation.c - $(PYTHON) setup.py build + python setup.py build clean: rm -f cdat.c cstrmap.c sym.c rule.c cintlist.c cfloatlist.c calignment.c csuf.c clex.c rulefactory.c cveb.c lcp.c precomputation.c *.so *.o *.cxx *~ *.pyc -- cgit v1.2.3 From ee5e376e263d9aeabdeee6968b4457f53d3fc772 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Fri, 27 Jul 2012 23:33:45 -0400 Subject: [python] Move python files to avoid pythonpath conflicts --- python/cdec/__init__.py | 1 - python/cdec/configobj.py | 2468 --------------------------------------- python/cdec/sa/__init__.py | 4 - python/cdec/sa/compile.py | 94 -- python/cdec/sa/extract.py | 31 - python/cdec/sa/extractor.py | 78 -- python/cdec/sa/features.py | 57 - python/cdec/score.py | 1 - python/pkg/cdec/__init__.py | 1 + python/pkg/cdec/configobj.py | 2468 +++++++++++++++++++++++++++++++++++++++ python/pkg/cdec/sa/__init__.py | 4 + python/pkg/cdec/sa/compile.py | 94 ++ python/pkg/cdec/sa/extract.py | 31 + python/pkg/cdec/sa/extractor.py | 78 ++ python/pkg/cdec/sa/features.py | 57 + python/pkg/cdec/score.py | 1 + python/setup.py | 3 +- python/src/sa/_sa.c | 39 +- python/src/sa/sym.pxi | 2 +- 19 files changed, 2753 insertions(+), 2759 deletions(-) delete mode 100644 python/cdec/__init__.py delete mode 100644 python/cdec/configobj.py delete mode 100644 python/cdec/sa/__init__.py delete mode 100644 python/cdec/sa/compile.py delete mode 100644 python/cdec/sa/extract.py delete mode 100644 python/cdec/sa/extractor.py delete mode 100644 python/cdec/sa/features.py delete mode 100644 python/cdec/score.py create mode 100644 python/pkg/cdec/__init__.py create mode 100644 python/pkg/cdec/configobj.py create mode 100644 python/pkg/cdec/sa/__init__.py create mode 100644 python/pkg/cdec/sa/compile.py create mode 100644 python/pkg/cdec/sa/extract.py create mode 100644 python/pkg/cdec/sa/extractor.py create mode 100644 python/pkg/cdec/sa/features.py create mode 100644 python/pkg/cdec/score.py (limited to 'python/cdec/__init__.py') diff --git a/python/cdec/__init__.py b/python/cdec/__init__.py deleted file mode 100644 index 19058493..00000000 --- a/python/cdec/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from _cdec import Decoder, Lattice, TRule, NT, NTRef, ParseFailed, InvalidConfig diff --git a/python/cdec/configobj.py b/python/cdec/configobj.py deleted file mode 100644 index c1f6e6df..00000000 --- a/python/cdec/configobj.py +++ /dev/null @@ -1,2468 +0,0 @@ -# configobj.py -# A config file reader/writer that supports nested sections in config files. -# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa -# E-mail: fuzzyman AT voidspace DOT org DOT uk -# nico AT tekNico DOT net - -# ConfigObj 4 -# http://www.voidspace.org.uk/python/configobj.html - -# Released subject to the BSD License -# Please see http://www.voidspace.org.uk/python/license.shtml - -# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml -# For information about bugfixes, updates and support, please join the -# ConfigObj mailing list: -# http://lists.sourceforge.net/lists/listinfo/configobj-develop -# Comments, suggestions and bug reports welcome. - -from __future__ import generators - -import os -import re -import sys - -from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE - - -# imported lazily to avoid startup performance hit if it isn't used -compiler = None - -# A dictionary mapping BOM to -# the encoding to decode with, and what to set the -# encoding attribute to. -BOMS = { - BOM_UTF8: ('utf_8', None), - BOM_UTF16_BE: ('utf16_be', 'utf_16'), - BOM_UTF16_LE: ('utf16_le', 'utf_16'), - BOM_UTF16: ('utf_16', 'utf_16'), - } -# All legal variants of the BOM codecs. -# TODO: the list of aliases is not meant to be exhaustive, is there a -# better way ? -BOM_LIST = { - 'utf_16': 'utf_16', - 'u16': 'utf_16', - 'utf16': 'utf_16', - 'utf-16': 'utf_16', - 'utf16_be': 'utf16_be', - 'utf_16_be': 'utf16_be', - 'utf-16be': 'utf16_be', - 'utf16_le': 'utf16_le', - 'utf_16_le': 'utf16_le', - 'utf-16le': 'utf16_le', - 'utf_8': 'utf_8', - 'u8': 'utf_8', - 'utf': 'utf_8', - 'utf8': 'utf_8', - 'utf-8': 'utf_8', - } - -# Map of encodings to the BOM to write. -BOM_SET = { - 'utf_8': BOM_UTF8, - 'utf_16': BOM_UTF16, - 'utf16_be': BOM_UTF16_BE, - 'utf16_le': BOM_UTF16_LE, - None: BOM_UTF8 - } - - -def match_utf8(encoding): - return BOM_LIST.get(encoding.lower()) == 'utf_8' - - -# Quote strings used for writing values -squot = "'%s'" -dquot = '"%s"' -noquot = "%s" -wspace_plus = ' \r\n\v\t\'"' -tsquot = '"""%s"""' -tdquot = "'''%s'''" - -# Sentinel for use in getattr calls to replace hasattr -MISSING = object() - -__version__ = '4.7.2' - -try: - any -except NameError: - def any(iterable): - for entry in iterable: - if entry: - return True - return False - - -__all__ = ( - '__version__', - 'DEFAULT_INDENT_TYPE', - 'DEFAULT_INTERPOLATION', - 'ConfigObjError', - 'NestingError', - 'ParseError', - 'DuplicateError', - 'ConfigspecError', - 'ConfigObj', - 'SimpleVal', - 'InterpolationError', - 'InterpolationLoopError', - 'MissingInterpolationOption', - 'RepeatSectionError', - 'ReloadError', - 'UnreprError', - 'UnknownType', - 'flatten_errors', - 'get_extra_values' -) - -DEFAULT_INTERPOLATION = 'configparser' -DEFAULT_INDENT_TYPE = ' ' -MAX_INTERPOL_DEPTH = 10 - -OPTION_DEFAULTS = { - 'interpolation': True, - 'raise_errors': False, - 'list_values': True, - 'create_empty': False, - 'file_error': False, - 'configspec': None, - 'stringify': True, - # option may be set to one of ('', ' ', '\t') - 'indent_type': None, - 'encoding': None, - 'default_encoding': None, - 'unrepr': False, - 'write_empty_values': False, -} - - - -def getObj(s): - global compiler - if compiler is None: - import compiler - s = "a=" + s - p = compiler.parse(s) - return p.getChildren()[1].getChildren()[0].getChildren()[1] - - -class UnknownType(Exception): - pass - - -class Builder(object): - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise UnknownType(o.__class__.__name__) - return m(o) - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - if o.name == 'None': - return None - if o.name == 'True': - return True - if o.name == 'False': - return False - - # An undefined Name - raise UnknownType('Undefined Name') - - def build_Add(self, o): - real, imag = map(self.build_Const, o.getChildren()) - try: - real = float(real) - except TypeError: - raise UnknownType('Add') - if not isinstance(imag, complex) or imag.real != 0.0: - raise UnknownType('Add') - return real+imag - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_UnarySub(self, o): - return -self.build_Const(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build_Const(o.getChildren()[0]) - - -_builder = Builder() - - -def unrepr(s): - if not s: - return s - return _builder.build(getObj(s)) - - - -class ConfigObjError(SyntaxError): - """ - This is the base class for all errors that ConfigObj raises. - It is a subclass of SyntaxError. - """ - def __init__(self, message='', line_number=None, line=''): - self.line = line - self.line_number = line_number - SyntaxError.__init__(self, message) - - -class NestingError(ConfigObjError): - """ - This error indicates a level of nesting that doesn't match. - """ - - -class ParseError(ConfigObjError): - """ - This error indicates that a line is badly written. - It is neither a valid ``key = value`` line, - nor a valid section marker line. - """ - - -class ReloadError(IOError): - """ - A 'reload' operation failed. - This exception is a subclass of ``IOError``. - """ - def __init__(self): - IOError.__init__(self, 'reload failed, filename is not set.') - - -class DuplicateError(ConfigObjError): - """ - The keyword or section specified already exists. - """ - - -class ConfigspecError(ConfigObjError): - """ - An error occured whilst parsing a configspec. - """ - - -class InterpolationError(ConfigObjError): - """Base class for the two interpolation errors.""" - - -class InterpolationLoopError(InterpolationError): - """Maximum interpolation depth exceeded in string interpolation.""" - - def __init__(self, option): - InterpolationError.__init__( - self, - 'interpolation loop detected in value "%s".' % option) - - -class RepeatSectionError(ConfigObjError): - """ - This error indicates additional sections in a section with a - ``__many__`` (repeated) section. - """ - - -class MissingInterpolationOption(InterpolationError): - """A value specified for interpolation was missing.""" - def __init__(self, option): - msg = 'missing option "%s" in interpolation.' % option - InterpolationError.__init__(self, msg) - - -class UnreprError(ConfigObjError): - """An error parsing in unrepr mode.""" - - - -class InterpolationEngine(object): - """ - A helper class to help perform string interpolation. - - This class is an abstract base class; its descendants perform - the actual work. - """ - - # compiled regexp to use in self.interpolate() - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - _cookie = '%' - - def __init__(self, section): - # the Section instance that "owns" this engine - self.section = section - - - def interpolate(self, key, value): - # short-cut - if not self._cookie in value: - return value - - def recursive_interpolate(key, value, section, backtrail): - """The function that does the actual work. - - ``value``: the string we're trying to interpolate. - ``section``: the section in which that string was found - ``backtrail``: a dict to keep track of where we've been, - to detect and prevent infinite recursion loops - - This is similar to a depth-first-search algorithm. - """ - # Have we been here already? - if (key, section.name) in backtrail: - # Yes - infinite loop detected - raise InterpolationLoopError(key) - # Place a marker on our backtrail so we won't come back here again - backtrail[(key, section.name)] = 1 - - # Now start the actual work - match = self._KEYCRE.search(value) - while match: - # The actual parsing of the match is implementation-dependent, - # so delegate to our helper function - k, v, s = self._parse_match(match) - if k is None: - # That's the signal that no further interpolation is needed - replacement = v - else: - # Further interpolation may be needed to obtain final value - replacement = recursive_interpolate(k, v, s, backtrail) - # Replace the matched string with its final value - start, end = match.span() - value = ''.join((value[:start], replacement, value[end:])) - new_search_start = start + len(replacement) - # Pick up the next interpolation key, if any, for next time - # through the while loop - match = self._KEYCRE.search(value, new_search_start) - - # Now safe to come back here again; remove marker from backtrail - del backtrail[(key, section.name)] - - return value - - # Back in interpolate(), all we have to do is kick off the recursive - # function with appropriate starting values - value = recursive_interpolate(key, value, self.section, {}) - return value - - - def _fetch(self, key): - """Helper function to fetch values from owning section. - - Returns a 2-tuple: the value, and the section where it was found. - """ - # switch off interpolation before we try and fetch anything ! - save_interp = self.section.main.interpolation - self.section.main.interpolation = False - - # Start at section that "owns" this InterpolationEngine - current_section = self.section - while True: - # try the current section first - val = current_section.get(key) - if val is not None and not isinstance(val, Section): - break - # try "DEFAULT" next - val = current_section.get('DEFAULT', {}).get(key) - if val is not None and not isinstance(val, Section): - break - # move up to parent and try again - # top-level's parent is itself - if current_section.parent is current_section: - # reached top level, time to give up - break - current_section = current_section.parent - - # restore interpolation to previous value before returning - self.section.main.interpolation = save_interp - if val is None: - raise MissingInterpolationOption(key) - return val, current_section - - - def _parse_match(self, match): - """Implementation-dependent helper function. - - Will be passed a match object corresponding to the interpolation - key we just found (e.g., "%(foo)s" or "$foo"). Should look up that - key in the appropriate config file section (using the ``_fetch()`` - helper function) and return a 3-tuple: (key, value, section) - - ``key`` is the name of the key we're looking for - ``value`` is the value found for that key - ``section`` is a reference to the section where it was found - - ``key`` and ``section`` should be None if no further - interpolation should be performed on the resulting value - (e.g., if we interpolated "$$" and returned "$"). - """ - raise NotImplementedError() - - - -class ConfigParserInterpolation(InterpolationEngine): - """Behaves like ConfigParser.""" - _cookie = '%' - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - - def _parse_match(self, match): - key = match.group(1) - value, section = self._fetch(key) - return key, value, section - - - -class TemplateInterpolation(InterpolationEngine): - """Behaves like string.Template.""" - _cookie = '$' - _delimiter = '$' - _KEYCRE = re.compile(r""" - \$(?: - (?P\$) | # Two $ signs - (?P[_a-z][_a-z0-9]*) | # $name format - {(?P[^}]*)} # ${name} format - ) - """, re.IGNORECASE | re.VERBOSE) - - def _parse_match(self, match): - # Valid name (in or out of braces): fetch value from section - key = match.group('named') or match.group('braced') - if key is not None: - value, section = self._fetch(key) - return key, value, section - # Escaped delimiter (e.g., $$): return single delimiter - if match.group('escaped') is not None: - # Return None for key and section to indicate it's time to stop - return None, self._delimiter, None - # Anything else: ignore completely, just return it unchanged - return None, match.group(), None - - -interpolation_engines = { - 'configparser': ConfigParserInterpolation, - 'template': TemplateInterpolation, -} - - -def __newobj__(cls, *args): - # Hack for pickle - return cls.__new__(cls, *args) - -class Section(dict): - """ - A dictionary-like object that represents a section in a config file. - - It does string interpolation if the 'interpolation' attribute - of the 'main' object is set to True. - - Interpolation is tried first from this object, then from the 'DEFAULT' - section of this object, next from the parent and its 'DEFAULT' section, - and so on until the main object is reached. - - A Section will behave like an ordered dictionary - following the - order of the ``scalars`` and ``sections`` attributes. - You can use this to change the order of members. - - Iteration follows the order: scalars, then sections. - """ - - - def __setstate__(self, state): - dict.update(self, state[0]) - self.__dict__.update(state[1]) - - def __reduce__(self): - state = (dict(self), self.__dict__) - return (__newobj__, (self.__class__,), state) - - - def __init__(self, parent, depth, main, indict=None, name=None): - """ - * parent is the section above - * depth is the depth level of this section - * main is the main ConfigObj - * indict is a dictionary to initialise the section with - """ - if indict is None: - indict = {} - dict.__init__(self) - # used for nesting level *and* interpolation - self.parent = parent - # used for the interpolation attribute - self.main = main - # level of nesting depth of this Section - self.depth = depth - # purely for information - self.name = name - # - self._initialise() - # we do this explicitly so that __setitem__ is used properly - # (rather than just passing to ``dict.__init__``) - for entry, value in indict.iteritems(): - self[entry] = value - - - def _initialise(self): - # the sequence of scalar values in this Section - self.scalars = [] - # the sequence of sections in this Section - self.sections = [] - # for comments :-) - self.comments = {} - self.inline_comments = {} - # the configspec - self.configspec = None - # for defaults - self.defaults = [] - self.default_values = {} - self.extra_values = [] - self._created = False - - - def _interpolate(self, key, value): - try: - # do we already have an interpolation engine? - engine = self._interpolation_engine - except AttributeError: - # not yet: first time running _interpolate(), so pick the engine - name = self.main.interpolation - if name == True: # note that "if name:" would be incorrect here - # backwards-compatibility: interpolation=True means use default - name = DEFAULT_INTERPOLATION - name = name.lower() # so that "Template", "template", etc. all work - class_ = interpolation_engines.get(name, None) - if class_ is None: - # invalid value for self.main.interpolation - self.main.interpolation = False - return value - else: - # save reference to engine so we don't have to do this again - engine = self._interpolation_engine = class_(self) - # let the engine do the actual work - return engine.interpolate(key, value) - - - def __getitem__(self, key): - """Fetch the item and do string interpolation.""" - val = dict.__getitem__(self, key) - if self.main.interpolation: - if isinstance(val, basestring): - return self._interpolate(key, val) - if isinstance(val, list): - def _check(entry): - if isinstance(entry, basestring): - return self._interpolate(key, entry) - return entry - new = [_check(entry) for entry in val] - if new != val: - return new - return val - - - def __setitem__(self, key, value, unrepr=False): - """ - Correctly set a value. - - Making dictionary values Section instances. - (We have to special case 'Section' instances - which are also dicts) - - Keys must be strings. - Values need only be strings (or lists of strings) if - ``main.stringify`` is set. - - ``unrepr`` must be set when setting a value to a dictionary, without - creating a new sub-section. - """ - if not isinstance(key, basestring): - raise ValueError('The key "%s" is not a string.' % key) - - # add the comment - if key not in self.comments: - self.comments[key] = [] - self.inline_comments[key] = '' - # remove the entry from defaults - if key in self.defaults: - self.defaults.remove(key) - # - if isinstance(value, Section): - if key not in self: - self.sections.append(key) - dict.__setitem__(self, key, value) - elif isinstance(value, dict) and not unrepr: - # First create the new depth level, - # then create the section - if key not in self: - self.sections.append(key) - new_depth = self.depth + 1 - dict.__setitem__( - self, - key, - Section( - self, - new_depth, - self.main, - indict=value, - name=key)) - else: - if key not in self: - self.scalars.append(key) - if not self.main.stringify: - if isinstance(value, basestring): - pass - elif isinstance(value, (list, tuple)): - for entry in value: - if not isinstance(entry, basestring): - raise TypeError('Value is not a string "%s".' % entry) - else: - raise TypeError('Value is not a string "%s".' % value) - dict.__setitem__(self, key, value) - - - def __delitem__(self, key): - """Remove items from the sequence when deleting.""" - dict. __delitem__(self, key) - if key in self.scalars: - self.scalars.remove(key) - else: - self.sections.remove(key) - del self.comments[key] - del self.inline_comments[key] - - - def get(self, key, default=None): - """A version of ``get`` that doesn't bypass string interpolation.""" - try: - return self[key] - except KeyError: - return default - - - def update(self, indict): - """ - A version of update that uses our ``__setitem__``. - """ - for entry in indict: - self[entry] = indict[entry] - - - def pop(self, key, default=MISSING): - """ - 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised' - """ - try: - val = self[key] - except KeyError: - if default is MISSING: - raise - val = default - else: - del self[key] - return val - - - def popitem(self): - """Pops the first (key,val)""" - sequence = (self.scalars + self.sections) - if not sequence: - raise KeyError(": 'popitem(): dictionary is empty'") - key = sequence[0] - val = self[key] - del self[key] - return key, val - - - def clear(self): - """ - A version of clear that also affects scalars/sections - Also clears comments and configspec. - - Leaves other attributes alone : - depth/main/parent are not affected - """ - dict.clear(self) - self.scalars = [] - self.sections = [] - self.comments = {} - self.inline_comments = {} - self.configspec = None - self.defaults = [] - self.extra_values = [] - - - def setdefault(self, key, default=None): - """A version of setdefault that sets sequence if appropriate.""" - try: - return self[key] - except KeyError: - self[key] = default - return self[key] - - - def items(self): - """D.items() -> list of D's (key, value) pairs, as 2-tuples""" - return zip((self.scalars + self.sections), self.values()) - - - def keys(self): - """D.keys() -> list of D's keys""" - return (self.scalars + self.sections) - - - def values(self): - """D.values() -> list of D's values""" - return [self[key] for key in (self.scalars + self.sections)] - - - def iteritems(self): - """D.iteritems() -> an iterator over the (key, value) items of D""" - return iter(self.items()) - - - def iterkeys(self): - """D.iterkeys() -> an iterator over the keys of D""" - return iter((self.scalars + self.sections)) - - __iter__ = iterkeys - - - def itervalues(self): - """D.itervalues() -> an iterator over the values of D""" - return iter(self.values()) - - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)]) - - __str__ = __repr__ - __str__.__doc__ = "x.__str__() <==> str(x)" - - - # Extra methods - not in a normal dictionary - - def dict(self): - """ - Return a deepcopy of self as a dictionary. - - All members that are ``Section`` instances are recursively turned to - ordinary dictionaries - by calling their ``dict`` method. - - >>> n = a.dict() - >>> n == a - 1 - >>> n is a - 0 - """ - newdict = {} - for entry in self: - this_entry = self[entry] - if isinstance(this_entry, Section): - this_entry = this_entry.dict() - elif isinstance(this_entry, list): - # create a copy rather than a reference - this_entry = list(this_entry) - elif isinstance(this_entry, tuple): - # create a copy rather than a reference - this_entry = tuple(this_entry) - newdict[entry] = this_entry - return newdict - - - def merge(self, indict): - """ - A recursive update - useful for merging config files. - - >>> a = '''[section1] - ... option1 = True - ... [[subsection]] - ... more_options = False - ... # end of file'''.splitlines() - >>> b = '''# File is user.ini - ... [section1] - ... option1 = False - ... # end of file'''.splitlines() - >>> c1 = ConfigObj(b) - >>> c2 = ConfigObj(a) - >>> c2.merge(c1) - >>> c2 - ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) - """ - for key, val in indict.items(): - if (key in self and isinstance(self[key], dict) and - isinstance(val, dict)): - self[key].merge(val) - else: - self[key] = val - - - def rename(self, oldkey, newkey): - """ - Change a keyname to another, without changing position in sequence. - - Implemented so that transformations can be made on keys, - as well as on values. (used by encode and decode) - - Also renames comments. - """ - if oldkey in self.scalars: - the_list = self.scalars - elif oldkey in self.sections: - the_list = self.sections - else: - raise KeyError('Key "%s" not found.' % oldkey) - pos = the_list.index(oldkey) - # - val = self[oldkey] - dict.__delitem__(self, oldkey) - dict.__setitem__(self, newkey, val) - the_list.remove(oldkey) - the_list.insert(pos, newkey) - comm = self.comments[oldkey] - inline_comment = self.inline_comments[oldkey] - del self.comments[oldkey] - del self.inline_comments[oldkey] - self.comments[newkey] = comm - self.inline_comments[newkey] = inline_comment - - - def walk(self, function, raise_errors=True, - call_on_sections=False, **keywargs): - """ - Walk every member and call a function on the keyword and value. - - Return a dictionary of the return values - - If the function raises an exception, raise the errror - unless ``raise_errors=False``, in which case set the return value to - ``False``. - - Any unrecognised keyword arguments you pass to walk, will be pased on - to the function you pass in. - - Note: if ``call_on_sections`` is ``True`` then - on encountering a - subsection, *first* the function is called for the *whole* subsection, - and then recurses into it's members. This means your function must be - able to handle strings, dictionaries and lists. This allows you - to change the key of subsections as well as for ordinary members. The - return value when called on the whole subsection has to be discarded. - - See the encode and decode methods for examples, including functions. - - .. admonition:: caution - - You can use ``walk`` to transform the names of members of a section - but you mustn't add or delete members. - - >>> config = '''[XXXXsection] - ... XXXXkey = XXXXvalue'''.splitlines() - >>> cfg = ConfigObj(config) - >>> cfg - ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) - >>> def transform(section, key): - ... val = section[key] - ... newkey = key.replace('XXXX', 'CLIENT1') - ... section.rename(key, newkey) - ... if isinstance(val, (tuple, list, dict)): - ... pass - ... else: - ... val = val.replace('XXXX', 'CLIENT1') - ... section[newkey] = val - >>> cfg.walk(transform, call_on_sections=True) - {'CLIENT1section': {'CLIENT1key': None}} - >>> cfg - ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) - """ - out = {} - # scalars first - for i in range(len(self.scalars)): - entry = self.scalars[i] - try: - val = function(self, entry, **keywargs) - # bound again in case name has changed - entry = self.scalars[i] - out[entry] = val - except Exception: - if raise_errors: - raise - else: - entry = self.scalars[i] - out[entry] = False - # then sections - for i in range(len(self.sections)): - entry = self.sections[i] - if call_on_sections: - try: - function(self, entry, **keywargs) - except Exception: - if raise_errors: - raise - else: - entry = self.sections[i] - out[entry] = False - # bound again in case name has changed - entry = self.sections[i] - # previous result is discarded - out[entry] = self[entry].walk( - function, - raise_errors=raise_errors, - call_on_sections=call_on_sections, - **keywargs) - return out - - - def as_bool(self, key): - """ - Accepts a key as input. The corresponding value must be a string or - the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to - retain compatibility with Python 2.2. - - If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns - ``True``. - - If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns - ``False``. - - ``as_bool`` is not case sensitive. - - Any other input will raise a ``ValueError``. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_bool('a') - Traceback (most recent call last): - ValueError: Value "fish" is neither True nor False - >>> a['b'] = 'True' - >>> a.as_bool('b') - 1 - >>> a['b'] = 'off' - >>> a.as_bool('b') - 0 - """ - val = self[key] - if val == True: - return True - elif val == False: - return False - else: - try: - if not isinstance(val, basestring): - # TODO: Why do we raise a KeyError here? - raise KeyError() - else: - return self.main._bools[val.lower()] - except KeyError: - raise ValueError('Value "%s" is neither True nor False' % val) - - - def as_int(self, key): - """ - A convenience method which coerces the specified value to an integer. - - If the value is an invalid literal for ``int``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_int('a') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: 'fish' - >>> a['b'] = '1' - >>> a.as_int('b') - 1 - >>> a['b'] = '3.2' - >>> a.as_int('b') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: '3.2' - """ - return int(self[key]) - - - def as_float(self, key): - """ - A convenience method which coerces the specified value to a float. - - If the value is an invalid literal for ``float``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_float('a') - Traceback (most recent call last): - ValueError: invalid literal for float(): fish - >>> a['b'] = '1' - >>> a.as_float('b') - 1.0 - >>> a['b'] = '3.2' - >>> a.as_float('b') - 3.2000000000000002 - """ - return float(self[key]) - - - def as_list(self, key): - """ - A convenience method which fetches the specified value, guaranteeing - that it is a list. - - >>> a = ConfigObj() - >>> a['a'] = 1 - >>> a.as_list('a') - [1] - >>> a['a'] = (1,) - >>> a.as_list('a') - [1] - >>> a['a'] = [1] - >>> a.as_list('a') - [1] - """ - result = self[key] - if isinstance(result, (tuple, list)): - return list(result) - return [result] - - - def restore_default(self, key): - """ - Restore (and return) default value for the specified key. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - If there is no default value for this key, ``KeyError`` is raised. - """ - default = self.default_values[key] - dict.__setitem__(self, key, default) - if key not in self.defaults: - self.defaults.append(key) - return default - - - def restore_defaults(self): - """ - Recursively restore default values to all members - that have them. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - It doesn't delete or modify entries without default values. - """ - for key in self.default_values: - self.restore_default(key) - - for section in self.sections: - self[section].restore_defaults() - - -class ConfigObj(Section): - """An object to read, create, and write config files.""" - - _keyword = re.compile(r'''^ # line start - (\s*) # indentation - ( # keyword - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'"=].*?) # no quotes - ) - \s*=\s* # divider - (.*) # value (including list values and comments) - $ # line end - ''', - re.VERBOSE) - - _sectionmarker = re.compile(r'''^ - (\s*) # 1: indentation - ((?:\[\s*)+) # 2: section marker open - ( # 3: section name open - (?:"\s*\S.*?\s*")| # at least one non-space with double quotes - (?:'\s*\S.*?\s*')| # at least one non-space with single quotes - (?:[^'"\s].*?) # at least one non-space unquoted - ) # section name close - ((?:\s*\])+) # 4: section marker close - \s*(\#.*)? # 5: optional comment - $''', - re.VERBOSE) - - # this regexp pulls list values out as a single string - # or single values and comments - # FIXME: this regex adds a '' to the end of comma terminated lists - # workaround in ``_handle_value`` - _valueexp = re.compile(r'''^ - (?: - (?: - ( - (?: - (?: - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#][^,\#]*?) # unquoted - ) - \s*,\s* # comma - )* # match all list items ending in a comma (if any) - ) - ( - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#\s][^,]*?)| # unquoted - (?:(? 1: - msg = "Parsing failed with several errors.\nFirst error %s" % info - error = ConfigObjError(msg) - else: - error = self._errors[0] - # set the errors attribute; it's a list of tuples: - # (error_type, message, line_number) - error.errors = self._errors - # set the config attribute - error.config = self - raise error - # delete private attributes - del self._errors - - if configspec is None: - self.configspec = None - else: - self._handle_configspec(configspec) - - - def _initialise(self, options=None): - if options is None: - options = OPTION_DEFAULTS - - # initialise a few variables - self.filename = None - self._errors = [] - self.raise_errors = options['raise_errors'] - self.interpolation = options['interpolation'] - self.list_values = options['list_values'] - self.create_empty = options['create_empty'] - self.file_error = options['file_error'] - self.stringify = options['stringify'] - self.indent_type = options['indent_type'] - self.encoding = options['encoding'] - self.default_encoding = options['default_encoding'] - self.BOM = False - self.newlines = None - self.write_empty_values = options['write_empty_values'] - self.unrepr = options['unrepr'] - - self.initial_comment = [] - self.final_comment = [] - self.configspec = None - - if self._inspec: - self.list_values = False - - # Clear section attributes as well - Section._initialise(self) - - - def __repr__(self): - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return ('ConfigObj({%s})' % - ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)])) - - - def _handle_bom(self, infile): - """ - Handle any BOM, and decode if necessary. - - If an encoding is specified, that *must* be used - but the BOM should - still be removed (and the BOM attribute set). - - (If the encoding is wrongly specified, then a BOM for an alternative - encoding won't be discovered or removed.) - - If an encoding is not specified, UTF8 or UTF16 BOM will be detected and - removed. The BOM attribute will be set. UTF16 will be decoded to - unicode. - - NOTE: This method must not be called with an empty ``infile``. - - Specifying the *wrong* encoding is likely to cause a - ``UnicodeDecodeError``. - - ``infile`` must always be returned as a list of lines, but may be - passed in as a single string. - """ - if ((self.encoding is not None) and - (self.encoding.lower() not in BOM_LIST)): - # No need to check for a BOM - # the encoding specified doesn't have one - # just decode - return self._decode(infile, self.encoding) - - if isinstance(infile, (list, tuple)): - line = infile[0] - else: - line = infile - if self.encoding is not None: - # encoding explicitly supplied - # And it could have an associated BOM - # TODO: if encoding is just UTF16 - we ought to check for both - # TODO: big endian and little endian versions. - enc = BOM_LIST[self.encoding.lower()] - if enc == 'utf_16': - # For UTF16 we try big endian and little endian - for BOM, (encoding, final_encoding) in BOMS.items(): - if not final_encoding: - # skip UTF8 - continue - if infile.startswith(BOM): - ### BOM discovered - ##self.BOM = True - # Don't need to remove BOM - return self._decode(infile, encoding) - - # If we get this far, will *probably* raise a DecodeError - # As it doesn't appear to start with a BOM - return self._decode(infile, self.encoding) - - # Must be UTF8 - BOM = BOM_SET[enc] - if not line.startswith(BOM): - return self._decode(infile, self.encoding) - - newline = line[len(BOM):] - - # BOM removed - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - self.BOM = True - return self._decode(infile, self.encoding) - - # No encoding specified - so we need to check for UTF8/UTF16 - for BOM, (encoding, final_encoding) in BOMS.items(): - if not line.startswith(BOM): - continue - else: - # BOM discovered - self.encoding = final_encoding - if not final_encoding: - self.BOM = True - # UTF8 - # remove BOM - newline = line[len(BOM):] - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - # UTF8 - don't decode - if isinstance(infile, basestring): - return infile.splitlines(True) - else: - return infile - # UTF16 - have to decode - return self._decode(infile, encoding) - - # No BOM discovered and no encoding specified, just return - if isinstance(infile, basestring): - # infile read from a file will be a single string - return infile.splitlines(True) - return infile - - - def _a_to_u(self, aString): - """Decode ASCII strings to unicode if a self.encoding is specified.""" - if self.encoding: - return aString.decode('ascii') - else: - return aString - - - def _decode(self, infile, encoding): - """ - Decode infile to unicode. Using the specified encoding. - - if is a string, it also needs converting to a list. - """ - if isinstance(infile, basestring): - # can't be unicode - # NOTE: Could raise a ``UnicodeDecodeError`` - return infile.decode(encoding).splitlines(True) - for i, line in enumerate(infile): - if not isinstance(line, unicode): - # NOTE: The isinstance test here handles mixed lists of unicode/string - # NOTE: But the decode will break on any non-string values - # NOTE: Or could raise a ``UnicodeDecodeError`` - infile[i] = line.decode(encoding) - return infile - - - def _decode_element(self, line): - """Decode element to unicode if necessary.""" - if not self.encoding: - return line - if isinstance(line, str) and self.default_encoding: - return line.decode(self.default_encoding) - return line - - - def _str(self, value): - """ - Used by ``stringify`` within validate, to turn non-string values - into strings. - """ - if not isinstance(value, basestring): - return str(value) - else: - return value - - - def _parse(self, infile): - """Actually parse the config file.""" - temp_list_values = self.list_values - if self.unrepr: - self.list_values = False - - comment_list = [] - done_start = False - this_section = self - maxline = len(infile) - 1 - cur_index = -1 - reset_comment = False - - while cur_index < maxline: - if reset_comment: - comment_list = [] - cur_index += 1 - line = infile[cur_index] - sline = line.strip() - # do we have anything on the line ? - if not sline or sline.startswith('#'): - reset_comment = False - comment_list.append(line) - continue - - if not done_start: - # preserve initial comment - self.initial_comment = comment_list - comment_list = [] - done_start = True - - reset_comment = True - # first we check if it's a section marker - mat = self._sectionmarker.match(line) - if mat is not None: - # is a section line - (indent, sect_open, sect_name, sect_close, comment) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - cur_depth = sect_open.count('[') - if cur_depth != sect_close.count(']'): - self._handle_error("Cannot compute the section depth at line %s.", - NestingError, infile, cur_index) - continue - - if cur_depth < this_section.depth: - # the new section is dropping back to a previous level - try: - parent = self._match_depth(this_section, - cur_depth).parent - except SyntaxError: - self._handle_error("Cannot compute nesting level at line %s.", - NestingError, infile, cur_index) - continue - elif cur_depth == this_section.depth: - # the new section is a sibling of the current section - parent = this_section.parent - elif cur_depth == this_section.depth + 1: - # the new section is a child the current section - parent = this_section - else: - self._handle_error("Section too nested at line %s.", - NestingError, infile, cur_index) - - sect_name = self._unquote(sect_name) - if sect_name in parent: - self._handle_error('Duplicate section name at line %s.', - DuplicateError, infile, cur_index) - continue - - # create the new section - this_section = Section( - parent, - cur_depth, - self, - name=sect_name) - parent[sect_name] = this_section - parent.inline_comments[sect_name] = comment - parent.comments[sect_name] = comment_list - continue - # - # it's not a section marker, - # so it should be a valid ``key = value`` line - mat = self._keyword.match(line) - if mat is None: - # it neither matched as a keyword - # or a section marker - self._handle_error( - 'Invalid line at line "%s".', - ParseError, infile, cur_index) - else: - # is a keyword value - # value will include any inline comment - (indent, key, value) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - # check for a multiline value - if value[:3] in ['"""', "'''"]: - try: - value, comment, cur_index = self._multiline( - value, infile, cur_index, maxline) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if type(e) == UnknownType: - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if isinstance(e, UnknownType): - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - # extract comment and lists - try: - (value, comment) = self._handle_value(value) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - # - key = self._unquote(key) - if key in this_section: - self._handle_error( - 'Duplicate keyword name at line %s.', - DuplicateError, infile, cur_index) - continue - # add the key. - # we set unrepr because if we have got this far we will never - # be creating a new section - this_section.__setitem__(key, value, unrepr=True) - this_section.inline_comments[key] = comment - this_section.comments[key] = comment_list - continue - # - if self.indent_type is None: - # no indentation used, set the type accordingly - self.indent_type = '' - - # preserve the final comment - if not self and not self.initial_comment: - self.initial_comment = comment_list - elif not reset_comment: - self.final_comment = comment_list - self.list_values = temp_list_values - - - def _match_depth(self, sect, depth): - """ - Given a section and a depth level, walk back through the sections - parents to see if the depth level matches a previous section. - - Return a reference to the right section, - or raise a SyntaxError. - """ - while depth < sect.depth: - if sect is sect.parent: - # we've reached the top level already - raise SyntaxError() - sect = sect.parent - if sect.depth == depth: - return sect - # shouldn't get here - raise SyntaxError() - - - def _handle_error(self, text, ErrorClass, infile, cur_index): - """ - Handle an error according to the error settings. - - Either raise the error or store it. - The error will have occured at ``cur_index`` - """ - line = infile[cur_index] - cur_index += 1 - message = text % cur_index - error = ErrorClass(message, cur_index, line) - if self.raise_errors: - # raise the error - parsing stops here - raise error - # store the error - # reraise when parsing has finished - self._errors.append(error) - - - def _unquote(self, value): - """Return an unquoted version of a value""" - if not value: - # should only happen during parsing of lists - raise SyntaxError - if (value[0] == value[-1]) and (value[0] in ('"', "'")): - value = value[1:-1] - return value - - - def _quote(self, value, multiline=True): - """ - Return a safely quoted version of a value. - - Raise a ConfigObjError if the value cannot be safely quoted. - If multiline is ``True`` (default) then use triple quotes - if necessary. - - * Don't quote values that don't need it. - * Recursively quote members of a list and return a comma joined list. - * Multiline is ``False`` for lists. - * Obey list syntax for empty and single member lists. - - If ``list_values=False`` then the value is only quoted if it contains - a ``\\n`` (is multiline) or '#'. - - If ``write_empty_values`` is set, and the value is an empty string, it - won't be quoted. - """ - if multiline and self.write_empty_values and value == '': - # Only if multiline is set, so that it is used for values not - # keys, and not values that are part of a list - return '' - - if multiline and isinstance(value, (list, tuple)): - if not value: - return ',' - elif len(value) == 1: - return self._quote(value[0], multiline=False) + ',' - return ', '.join([self._quote(val, multiline=False) - for val in value]) - if not isinstance(value, basestring): - if self.stringify: - value = str(value) - else: - raise TypeError('Value "%s" is not a string.' % value) - - if not value: - return '""' - - no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value - need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) - hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) - check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote - - if check_for_single: - if not self.list_values: - # we don't quote if ``list_values=False`` - quot = noquot - # for normal values either single or double quotes will do - elif '\n' in value: - # will only happen if multiline is off - e.g. '\n' in key - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif ((value[0] not in wspace_plus) and - (value[-1] not in wspace_plus) and - (',' not in value)): - quot = noquot - else: - quot = self._get_single_quote(value) - else: - # if value has '\n' or "'" *and* '"', it will need triple quotes - quot = self._get_triple_quote(value) - - if quot == noquot and '#' in value and self.list_values: - quot = self._get_single_quote(value) - - return quot % value - - - def _get_single_quote(self, value): - if ("'" in value) and ('"' in value): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif '"' in value: - quot = squot - else: - quot = dquot - return quot - - - def _get_triple_quote(self, value): - if (value.find('"""') != -1) and (value.find("'''") != -1): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - if value.find('"""') == -1: - quot = tdquot - else: - quot = tsquot - return quot - - - def _handle_value(self, value): - """ - Given a value string, unquote, remove comment, - handle lists. (including empty and single member lists) - """ - if self._inspec: - # Parsing a configspec so don't handle comments - return (value, '') - # do we look for lists in values ? - if not self.list_values: - mat = self._nolistvalue.match(value) - if mat is None: - raise SyntaxError() - # NOTE: we don't unquote here - return mat.groups() - # - mat = self._valueexp.match(value) - if mat is None: - # the value is badly constructed, probably badly quoted, - # or an invalid list - raise SyntaxError() - (list_values, single, empty_list, comment) = mat.groups() - if (list_values == '') and (single is None): - # change this if you want to accept empty values - raise SyntaxError() - # NOTE: note there is no error handling from here if the regex - # is wrong: then incorrect values will slip through - if empty_list is not None: - # the single comma - meaning an empty list - return ([], comment) - if single is not None: - # handle empty values - if list_values and not single: - # FIXME: the '' is a workaround because our regex now matches - # '' at the end of a list if it has a trailing comma - single = None - else: - single = single or '""' - single = self._unquote(single) - if list_values == '': - # not a list value - return (single, comment) - the_list = self._listvalueexp.findall(list_values) - the_list = [self._unquote(val) for val in the_list] - if single is not None: - the_list += [single] - return (the_list, comment) - - - def _multiline(self, value, infile, cur_index, maxline): - """Extract the value, where we are in a multiline situation.""" - quot = value[:3] - newvalue = value[3:] - single_line = self._triple_quote[quot][0] - multi_line = self._triple_quote[quot][1] - mat = single_line.match(value) - if mat is not None: - retval = list(mat.groups()) - retval.append(cur_index) - return retval - elif newvalue.find(quot) != -1: - # somehow the triple quote is missing - raise SyntaxError() - # - while cur_index < maxline: - cur_index += 1 - newvalue += '\n' - line = infile[cur_index] - if line.find(quot) == -1: - newvalue += line - else: - # end of multiline, process it - break - else: - # we've got to the end of the config, oops... - raise SyntaxError() - mat = multi_line.match(line) - if mat is None: - # a badly formed line - raise SyntaxError() - (value, comment) = mat.groups() - return (newvalue + value, comment, cur_index) - - - def _handle_configspec(self, configspec): - """Parse the configspec.""" - # FIXME: Should we check that the configspec was created with the - # correct settings ? (i.e. ``list_values=False``) - if not isinstance(configspec, ConfigObj): - try: - configspec = ConfigObj(configspec, - raise_errors=True, - file_error=True, - _inspec=True) - except ConfigObjError, e: - # FIXME: Should these errors have a reference - # to the already parsed ConfigObj ? - raise ConfigspecError('Parsing configspec failed: %s' % e) - except IOError, e: - raise IOError('Reading configspec failed: %s' % e) - - self.configspec = configspec - - - - def _set_configspec(self, section, copy): - """ - Called by validate. Handles setting the configspec on subsections - including sections to be validated by __many__ - """ - configspec = section.configspec - many = configspec.get('__many__') - if isinstance(many, dict): - for entry in section.sections: - if entry not in configspec: - section[entry].configspec = many - - for entry in configspec.sections: - if entry == '__many__': - continue - if entry not in section: - section[entry] = {} - section[entry]._created = True - if copy: - # copy comments - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - - # Could be a scalar when we expect a section - if isinstance(section[entry], Section): - section[entry].configspec = configspec[entry] - - - def _write_line(self, indent_string, entry, this_entry, comment): - """Write an individual line, for the write method""" - # NOTE: the calls to self._quote here handles non-StringType values. - if not self.unrepr: - val = self._decode_element(self._quote(this_entry)) - else: - val = repr(this_entry) - return '%s%s%s%s%s' % (indent_string, - self._decode_element(self._quote(entry, multiline=False)), - self._a_to_u(' = '), - val, - self._decode_element(comment)) - - - def _write_marker(self, indent_string, depth, entry, comment): - """Write a section marker line""" - return '%s%s%s%s%s' % (indent_string, - self._a_to_u('[' * depth), - self._quote(self._decode_element(entry), multiline=False), - self._a_to_u(']' * depth), - self._decode_element(comment)) - - - def _handle_comment(self, comment): - """Deal with a comment.""" - if not comment: - return '' - start = self.indent_type - if not comment.startswith('#'): - start += self._a_to_u(' # ') - return (start + comment) - - - # Public methods - - def write(self, outfile=None, section=None): - """ - Write the current ConfigObj as a file - - tekNico: FIXME: use StringIO instead of real files - - >>> filename = a.filename - >>> a.filename = 'test.ini' - >>> a.write() - >>> a.filename = filename - >>> a == ConfigObj('test.ini', raise_errors=True) - 1 - >>> import os - >>> os.remove('test.ini') - """ - if self.indent_type is None: - # this can be true if initialised from a dictionary - self.indent_type = DEFAULT_INDENT_TYPE - - out = [] - cs = self._a_to_u('#') - csp = self._a_to_u('# ') - if section is None: - int_val = self.interpolation - self.interpolation = False - section = self - for line in self.initial_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - - indent_string = self.indent_type * section.depth - for entry in (section.scalars + section.sections): - if entry in section.defaults: - # don't write out default values - continue - for comment_line in section.comments[entry]: - comment_line = self._decode_element(comment_line.lstrip()) - if comment_line and not comment_line.startswith(cs): - comment_line = csp + comment_line - out.append(indent_string + comment_line) - this_entry = section[entry] - comment = self._handle_comment(section.inline_comments[entry]) - - if isinstance(this_entry, dict): - # a section - out.append(self._write_marker( - indent_string, - this_entry.depth, - entry, - comment)) - out.extend(self.write(section=this_entry)) - else: - out.append(self._write_line( - indent_string, - entry, - this_entry, - comment)) - - if section is self: - for line in self.final_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - self.interpolation = int_val - - if section is not self: - return out - - if (self.filename is None) and (outfile is None): - # output a list of lines - # might need to encode - # NOTE: This will *screw* UTF16, each line will start with the BOM - if self.encoding: - out = [l.encode(self.encoding) for l in out] - if (self.BOM and ((self.encoding is None) or - (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): - # Add the UTF8 BOM - if not out: - out.append('') - out[0] = BOM_UTF8 + out[0] - return out - - # Turn the list to a string, joined with correct newlines - newline = self.newlines or os.linesep - if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' - and sys.platform == 'win32' and newline == '\r\n'): - # Windows specific hack to avoid writing '\r\r\n' - newline = '\n' - output = self._a_to_u(newline).join(out) - if self.encoding: - output = output.encode(self.encoding) - if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): - # Add the UTF8 BOM - output = BOM_UTF8 + output - - if not output.endswith(newline): - output += newline - if outfile is not None: - outfile.write(output) - else: - h = open(self.filename, 'wb') - h.write(output) - h.close() - - - def validate(self, validator, preserve_errors=False, copy=False, - section=None): - """ - Test the ConfigObj against a configspec. - - It uses the ``validator`` object from *validate.py*. - - To run ``validate`` on the current ConfigObj, call: :: - - test = config.validate(validator) - - (Normally having previously passed in the configspec when the ConfigObj - was created - you can dynamically assign a dictionary of checks to the - ``configspec`` attribute of a section though). - - It returns ``True`` if everything passes, or a dictionary of - pass/fails (True/False). If every member of a subsection passes, it - will just have the value ``True``. (It also returns ``False`` if all - members fail). - - In addition, it converts the values from strings to their native - types if their checks pass (and ``stringify`` is set). - - If ``preserve_errors`` is ``True`` (``False`` is default) then instead - of a marking a fail with a ``False``, it will preserve the actual - exception object. This can contain info about the reason for failure. - For example the ``VdtValueTooSmallError`` indicates that the value - supplied was too small. If a value (or section) is missing it will - still be marked as ``False``. - - You must have the validate module to use ``preserve_errors=True``. - - You can then use the ``flatten_errors`` function to turn your nested - results dictionary into a flattened list of failures - useful for - displaying meaningful error messages. - """ - if section is None: - if self.configspec is None: - raise ValueError('No configspec supplied.') - if preserve_errors: - # We do this once to remove a top level dependency on the validate module - # Which makes importing configobj faster - from validate import VdtMissingValue - self._vdtMissingValue = VdtMissingValue - - section = self - - if copy: - section.initial_comment = section.configspec.initial_comment - section.final_comment = section.configspec.final_comment - section.encoding = section.configspec.encoding - section.BOM = section.configspec.BOM - section.newlines = section.configspec.newlines - section.indent_type = section.configspec.indent_type - - # - # section.default_values.clear() #?? - configspec = section.configspec - self._set_configspec(section, copy) - - - def validate_entry(entry, spec, val, missing, ret_true, ret_false): - section.default_values.pop(entry, None) - - try: - section.default_values[entry] = validator.get_default_value(configspec[entry]) - except (KeyError, AttributeError, validator.baseErrorClass): - # No default, bad default or validator has no 'get_default_value' - # (e.g. SimpleVal) - pass - - try: - check = validator.check(spec, - val, - missing=missing - ) - except validator.baseErrorClass, e: - if not preserve_errors or isinstance(e, self._vdtMissingValue): - out[entry] = False - else: - # preserve the error - out[entry] = e - ret_false = False - ret_true = False - else: - ret_false = False - out[entry] = True - if self.stringify or missing: - # if we are doing type conversion - # or the value is a supplied default - if not self.stringify: - if isinstance(check, (list, tuple)): - # preserve lists - check = [self._str(item) for item in check] - elif missing and check is None: - # convert the None from a default to a '' - check = '' - else: - check = self._str(check) - if (check != val) or missing: - section[entry] = check - if not copy and missing and entry not in section.defaults: - section.defaults.append(entry) - return ret_true, ret_false - - # - out = {} - ret_true = True - ret_false = True - - unvalidated = [k for k in section.scalars if k not in configspec] - incorrect_sections = [k for k in configspec.sections if k in section.scalars] - incorrect_scalars = [k for k in configspec.scalars if k in section.sections] - - for entry in configspec.scalars: - if entry in ('__many__', '___many___'): - # reserved names - continue - if (not entry in section.scalars) or (entry in section.defaults): - # missing entries - # or entries from defaults - missing = True - val = None - if copy and entry not in section.scalars: - # copy comments - section.comments[entry] = ( - configspec.comments.get(entry, [])) - section.inline_comments[entry] = ( - configspec.inline_comments.get(entry, '')) - # - else: - missing = False - val = section[entry] - - ret_true, ret_false = validate_entry(entry, configspec[entry], val, - missing, ret_true, ret_false) - - many = None - if '__many__' in configspec.scalars: - many = configspec['__many__'] - elif '___many___' in configspec.scalars: - many = configspec['___many___'] - - if many is not None: - for entry in unvalidated: - val = section[entry] - ret_true, ret_false = validate_entry(entry, many, val, False, - ret_true, ret_false) - unvalidated = [] - - for entry in incorrect_scalars: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Value %r was provided as a section' % entry - out[entry] = validator.baseErrorClass(msg) - for entry in incorrect_sections: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Section %r was provided as a single value' % entry - out[entry] = validator.baseErrorClass(msg) - - # Missing sections will have been created as empty ones when the - # configspec was read. - for entry in section.sections: - # FIXME: this means DEFAULT is not copied in copy mode - if section is self and entry == 'DEFAULT': - continue - if section[entry].configspec is None: - unvalidated.append(entry) - continue - if copy: - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) - out[entry] = check - if check == False: - ret_true = False - elif check == True: - ret_false = False - else: - ret_true = False - - section.extra_values = unvalidated - if preserve_errors and not section._created: - # If the section wasn't created (i.e. it wasn't missing) - # then we can't return False, we need to preserve errors - ret_false = False - # - if ret_false and preserve_errors and out: - # If we are preserving errors, but all - # the failures are from missing sections / values - # then we can return False. Otherwise there is a - # real failure that we need to preserve. - ret_false = not any(out.values()) - if ret_true: - return True - elif ret_false: - return False - return out - - - def reset(self): - """Clear ConfigObj instance and restore to 'freshly created' state.""" - self.clear() - self._initialise() - # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) - # requires an empty dictionary - self.configspec = None - # Just to be sure ;-) - self._original_configspec = None - - - def reload(self): - """ - Reload a ConfigObj from file. - - This method raises a ``ReloadError`` if the ConfigObj doesn't have - a filename attribute pointing to a file. - """ - if not isinstance(self.filename, basestring): - raise ReloadError() - - filename = self.filename - current_options = {} - for entry in OPTION_DEFAULTS: - if entry == 'configspec': - continue - current_options[entry] = getattr(self, entry) - - configspec = self._original_configspec - current_options['configspec'] = configspec - - self.clear() - self._initialise(current_options) - self._load(filename, configspec) - - - -class SimpleVal(object): - """ - A simple validator. - Can be used to check that all members expected are present. - - To use it, provide a configspec with all your members in (the value given - will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` - method of your ``ConfigObj``. ``validate`` will return ``True`` if all - members are present, or a dictionary with True/False meaning - present/missing. (Whole missing sections will be replaced with ``False``) - """ - - def __init__(self): - self.baseErrorClass = ConfigObjError - - def check(self, check, member, missing=False): - """A dummy check method, always returns the value unchanged.""" - if missing: - raise self.baseErrorClass() - return member - - -def flatten_errors(cfg, res, levels=None, results=None): - """ - An example function that will turn a nested dictionary of results - (as returned by ``ConfigObj.validate``) into a flat list. - - ``cfg`` is the ConfigObj instance being checked, ``res`` is the results - dictionary returned by ``validate``. - - (This is a recursive function, so you shouldn't use the ``levels`` or - ``results`` arguments - they are used by the function.) - - Returns a list of keys that failed. Each member of the list is a tuple:: - - ([list of sections...], key, result) - - If ``validate`` was called with ``preserve_errors=False`` (the default) - then ``result`` will always be ``False``. - - *list of sections* is a flattened list of sections that the key was found - in. - - If the section was missing (or a section was expected and a scalar provided - - or vice-versa) then key will be ``None``. - - If the value (or section) was missing then ``result`` will be ``False``. - - If ``validate`` was called with ``preserve_errors=True`` and a value - was present, but failed the check, then ``result`` will be the exception - object returned. You can use this as a string that describes the failure. - - For example *The value "3" is of the wrong type*. - """ - if levels is None: - # first time called - levels = [] - results = [] - if res == True: - return results - if res == False or isinstance(res, Exception): - results.append((levels[:], None, res)) - if levels: - levels.pop() - return results - for (key, val) in res.items(): - if val == True: - continue - if isinstance(cfg.get(key), dict): - # Go down one level - levels.append(key) - flatten_errors(cfg[key], val, levels, results) - continue - results.append((levels[:], key, val)) - # - # Go up one level - if levels: - levels.pop() - # - return results - - -def get_extra_values(conf, _prepend=()): - """ - Find all the values and sections not in the configspec from a validated - ConfigObj. - - ``get_extra_values`` returns a list of tuples where each tuple represents - either an extra section, or an extra value. - - The tuples contain two values, a tuple representing the section the value - is in and the name of the extra values. For extra values in the top level - section the first member will be an empty tuple. For values in the 'foo' - section the first member will be ``('foo',)``. For members in the 'bar' - subsection of the 'foo' section the first member will be ``('foo', 'bar')``. - - NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't - been validated it will return an empty list. - """ - out = [] - - out.extend([(_prepend, name) for name in conf.extra_values]) - for name in conf.sections: - if name not in conf.extra_values: - out.extend(get_extra_values(conf[name], _prepend + (name,))) - return out - - -"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/python/cdec/sa/__init__.py b/python/cdec/sa/__init__.py deleted file mode 100644 index 8645e837..00000000 --- a/python/cdec/sa/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from _sa import sym_fromstring,\ - SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\ - HieroCachingRuleFactory, Sampler -from extractor import GrammarExtractor diff --git a/python/cdec/sa/compile.py b/python/cdec/sa/compile.py deleted file mode 100644 index 30e605a6..00000000 --- a/python/cdec/sa/compile.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python -import argparse -import os -import logging -import cdec.configobj -import cdec.sa - -MAX_PHRASE_LENGTH = 4 -def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): - lcp = cdec.sa.LCP(f_sa) - stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True) - precomp = cdec.sa.Precomputation(from_stats=stats, - fsarray=f_sa, - precompute_rank=rank1, - precompute_secondary_rank=rank2, - max_length=max_len, - max_nonterminals=max_nt, - train_max_initial_size=max_size, - train_min_gap_size=min_gap) - return precomp - -def main(): - logging.basicConfig(level=logging.INFO) - logger = logging.getLogger('cdec.sa.compile') - parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.') - parser.add_argument('--maxnt', '-n', type=int, default=2, - help='Maximum number of non-terminal symbols') - parser.add_argument('--maxlen', '-l', type=int, default=5, - help='Maximum number of terminals') - parser.add_argument('--maxsize', '-s', type=int, default=15, - help='Maximum rule span') - parser.add_argument('--mingap', '-g', type=int, default=1, - help='Minimum gap size') - parser.add_argument('--rank1', '-r1', type=int, default=100, - help='Number of pre-computed frequent patterns') - parser.add_argument('--rank2', '-r2', type=int, default=10, - help='Number of pre-computed super-frequent patterns)') - parser.add_argument('-c', '--config', default='/dev/stdout', - help='Output configuration') - parser.add_argument('-o', '--output', required=True, - help='Output path') - parser.add_argument('-f', '--source', required=True, - help='Source language corpus') - parser.add_argument('-e', '--target', required=True, - help='Target language corpus') - parser.add_argument('-a', '--alignment', required=True, - help='Bitext word alignment') - args = parser.parse_args() - - param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") - params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) - - if not os.path.exists(args.output): - os.mkdir(args.output) - - f_sa_bin = os.path.join(args.output, 'f.sa.bin') - e_bin = os.path.join(args.output, 'e.bin') - precomp_file = 'precomp.{0}.{1}.{2}.{3}.{4}.{5}.bin'.format(*params) - precomp_bin = os.path.join(args.output, precomp_file) - a_bin = os.path.join(args.output, 'a.bin') - lex_bin = os.path.join(args.output, 'lex.bin') - - logger.info('Compiling source suffix array') - f_sa = cdec.sa.SuffixArray(from_text=args.source) - f_sa.write_binary(f_sa_bin) - - logger.info('Compiling target data array') - e = cdec.sa.DataArray(from_text=args.target) - e.write_binary(e_bin) - - logger.info('Precomputing frequent phrases') - precompute(f_sa, *params).write_binary(precomp_bin) - - logger.info('Compiling alignment') - a = cdec.sa.Alignment(from_text=args.alignment) - a.write_binary(a_bin) - - logger.info('Compiling bilexical dictionary') - lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa) - lex.write_binary(lex_bin) - - # Write configuration - config = cdec.configobj.ConfigObj(args.config, unrepr=True) - config['f_sa_file'] = f_sa_bin - config['e_file'] = e_bin - config['a_file'] = a_bin - config['lex_file'] = lex_bin - config['precompute_file'] = precomp_bin - for name, value in zip(param_names, params): - config[name] = value - config.write() - -if __name__ == '__main__': - main() diff --git a/python/cdec/sa/extract.py b/python/cdec/sa/extract.py deleted file mode 100644 index 918aa3bb..00000000 --- a/python/cdec/sa/extract.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -import sys -import os -import argparse -import logging -import cdec.sa - -def main(): - logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.') - parser.add_argument('-c', '--config', required=True, - help='Extractor configuration') - parser.add_argument('-g', '--grammars', required=True, - help='Grammar output path') - args = parser.parse_args() - - if not os.path.exists(args.grammars): - os.mkdir(args.grammars) - - extractor = cdec.sa.GrammarExtractor(args.config) - for i, sentence in enumerate(sys.stdin): - sentence = sentence[:-1] - grammar_file = os.path.join(args.grammars, 'grammar.{0}'.format(i)) - with open(grammar_file, 'w') as output: - for rule in extractor.grammar(sentence): - output.write(str(rule)+'\n') - grammar_file = os.path.abspath(grammar_file) - print('{1}'.format(grammar_file, sentence)) - -if __name__ == '__main__': - main() diff --git a/python/cdec/sa/extractor.py b/python/cdec/sa/extractor.py deleted file mode 100644 index bb912e16..00000000 --- a/python/cdec/sa/extractor.py +++ /dev/null @@ -1,78 +0,0 @@ -from itertools import chain -import os -import cdec.configobj -from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\ - MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE -import cdec.sa - -# maximum span of a grammar rule in TEST DATA -MAX_INITIAL_SIZE = 15 - -class GrammarExtractor: - def __init__(self, config): - if isinstance(config, str) or isinstance(config, unicode): - if not os.path.exists(config): - raise IOError('cannot read configuration from {0}'.format(config)) - config = cdec.configobj.ConfigObj(config, unrepr=True) - alignment = cdec.sa.Alignment(from_binary=config['a_file']) - self.factory = cdec.sa.HieroCachingRuleFactory( - # compiled alignment object (REQUIRED) - alignment, - # name of generic nonterminal used by Hiero - category="[X]", - # maximum number of contiguous chunks of terminal symbols in RHS of a rule - max_chunks=config['max_nt']+1, - # maximum span of a grammar rule in TEST DATA - max_initial_size=MAX_INITIAL_SIZE, - # maximum number of symbols (both T and NT) allowed in a rule - max_length=config['max_len'], - # maximum number of nonterminals allowed in a rule (set >2 at your own risk) - max_nonterminals=config['max_nt'], - # maximum number of contiguous chunks of terminal symbols - # in target-side RHS of a rule. - max_target_chunks=config['max_nt']+1, - # maximum number of target side symbols (both T and NT) allowed in a rule. - max_target_length=MAX_INITIAL_SIZE, - # minimum span of a nonterminal in the RHS of a rule in TEST DATA - min_gap_size=1, - # filename of file containing precomputed collocations - precompute_file=config['precompute_file'], - # maximum frequency rank of patterns used to compute triples (< 20) - precompute_secondary_rank=config['rank2'], - # maximum frequency rank of patterns used to compute collocations (< 300) - precompute_rank=config['rank1'], - # require extracted rules to have at least one aligned word - require_aligned_terminal=True, - # require each contiguous chunk of extracted rules - # to have at least one aligned word - require_aligned_chunks=False, - # maximum span of a grammar rule extracted from TRAINING DATA - train_max_initial_size=config['max_size'], - # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA - train_min_gap_size=config['min_gap'], - # True if phrases should be tight, False otherwise (better but slower) - tight_phrases=True, - ) - - # lexical weighting tables - tt = cdec.sa.BiLex(from_binary=config['lex_file']) - - self.models = (EgivenFCoherent, SampleCountF, CountEF, - MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE) - - fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file']) - edarray = cdec.sa.DataArray(from_binary=config['e_file']) - - # lower=faster, higher=better; improvements level off above 200-300 range, - # -1 = don't sample, use all data (VERY SLOW!) - sampler = cdec.sa.Sampler(300, fsarray) - - self.factory.configure(fsarray, edarray, sampler) - - def grammar(self, sentence): - if isinstance(sentence, unicode): - sentence = sentence.encode('utf8') - cnet = chain(('',), sentence.split(), ('',)) - cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet) - cnet = tuple(((word, None, 1), ) for word in cnet) - return self.factory.input(cnet, self.models) diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py deleted file mode 100644 index 325b9e13..00000000 --- a/python/cdec/sa/features.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import division -import math - -MAXSCORE = 99 - -def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) - return -math.log10(paircount/fcount) - -def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): - return math.log10(1 + paircount) - -def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): - return math.log10(1 + fsample_count) - -def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): - prob = paircount/fsample_count - return -math.log10(prob) if prob > 0 else MAXSCORE - -def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): - return -math.log10(fcount/fsample_count) - -def MaxLexEgivenF(ttable): - def feature(fphrase, ephrase, paircount, fcount, fsample_count): - fwords = fphrase.words - fwords.append('NULL') - def score(): - for e in ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) - return feature - -def MaxLexFgivenE(ttable): - def feature(fphrase, ephrase, paircount, fcount, fsample_count): - ewords = ephrase.words - ewords.append('NULL') - def score(): - for f in fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) - return feature - -def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): - return (fcount == 1) - -def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount == 1) - -def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): - return (fcount > 1) - -def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount > 1) - -def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount > 0.01) diff --git a/python/cdec/score.py b/python/cdec/score.py deleted file mode 100644 index 22257774..00000000 --- a/python/cdec/score.py +++ /dev/null @@ -1 +0,0 @@ -from _cdec import BLEU, TER, CER, Metric diff --git a/python/pkg/cdec/__init__.py b/python/pkg/cdec/__init__.py new file mode 100644 index 00000000..503ac787 --- /dev/null +++ b/python/pkg/cdec/__init__.py @@ -0,0 +1 @@ +from cdec._cdec import Decoder, Lattice, TRule, NT, NTRef, ParseFailed, InvalidConfig diff --git a/python/pkg/cdec/configobj.py b/python/pkg/cdec/configobj.py new file mode 100644 index 00000000..c1f6e6df --- /dev/null +++ b/python/pkg/cdec/configobj.py @@ -0,0 +1,2468 @@ +# configobj.py +# A config file reader/writer that supports nested sections in config files. +# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa +# E-mail: fuzzyman AT voidspace DOT org DOT uk +# nico AT tekNico DOT net + +# ConfigObj 4 +# http://www.voidspace.org.uk/python/configobj.html + +# Released subject to the BSD License +# Please see http://www.voidspace.org.uk/python/license.shtml + +# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml +# For information about bugfixes, updates and support, please join the +# ConfigObj mailing list: +# http://lists.sourceforge.net/lists/listinfo/configobj-develop +# Comments, suggestions and bug reports welcome. + +from __future__ import generators + +import os +import re +import sys + +from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE + + +# imported lazily to avoid startup performance hit if it isn't used +compiler = None + +# A dictionary mapping BOM to +# the encoding to decode with, and what to set the +# encoding attribute to. +BOMS = { + BOM_UTF8: ('utf_8', None), + BOM_UTF16_BE: ('utf16_be', 'utf_16'), + BOM_UTF16_LE: ('utf16_le', 'utf_16'), + BOM_UTF16: ('utf_16', 'utf_16'), + } +# All legal variants of the BOM codecs. +# TODO: the list of aliases is not meant to be exhaustive, is there a +# better way ? +BOM_LIST = { + 'utf_16': 'utf_16', + 'u16': 'utf_16', + 'utf16': 'utf_16', + 'utf-16': 'utf_16', + 'utf16_be': 'utf16_be', + 'utf_16_be': 'utf16_be', + 'utf-16be': 'utf16_be', + 'utf16_le': 'utf16_le', + 'utf_16_le': 'utf16_le', + 'utf-16le': 'utf16_le', + 'utf_8': 'utf_8', + 'u8': 'utf_8', + 'utf': 'utf_8', + 'utf8': 'utf_8', + 'utf-8': 'utf_8', + } + +# Map of encodings to the BOM to write. +BOM_SET = { + 'utf_8': BOM_UTF8, + 'utf_16': BOM_UTF16, + 'utf16_be': BOM_UTF16_BE, + 'utf16_le': BOM_UTF16_LE, + None: BOM_UTF8 + } + + +def match_utf8(encoding): + return BOM_LIST.get(encoding.lower()) == 'utf_8' + + +# Quote strings used for writing values +squot = "'%s'" +dquot = '"%s"' +noquot = "%s" +wspace_plus = ' \r\n\v\t\'"' +tsquot = '"""%s"""' +tdquot = "'''%s'''" + +# Sentinel for use in getattr calls to replace hasattr +MISSING = object() + +__version__ = '4.7.2' + +try: + any +except NameError: + def any(iterable): + for entry in iterable: + if entry: + return True + return False + + +__all__ = ( + '__version__', + 'DEFAULT_INDENT_TYPE', + 'DEFAULT_INTERPOLATION', + 'ConfigObjError', + 'NestingError', + 'ParseError', + 'DuplicateError', + 'ConfigspecError', + 'ConfigObj', + 'SimpleVal', + 'InterpolationError', + 'InterpolationLoopError', + 'MissingInterpolationOption', + 'RepeatSectionError', + 'ReloadError', + 'UnreprError', + 'UnknownType', + 'flatten_errors', + 'get_extra_values' +) + +DEFAULT_INTERPOLATION = 'configparser' +DEFAULT_INDENT_TYPE = ' ' +MAX_INTERPOL_DEPTH = 10 + +OPTION_DEFAULTS = { + 'interpolation': True, + 'raise_errors': False, + 'list_values': True, + 'create_empty': False, + 'file_error': False, + 'configspec': None, + 'stringify': True, + # option may be set to one of ('', ' ', '\t') + 'indent_type': None, + 'encoding': None, + 'default_encoding': None, + 'unrepr': False, + 'write_empty_values': False, +} + + + +def getObj(s): + global compiler + if compiler is None: + import compiler + s = "a=" + s + p = compiler.parse(s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + +class UnknownType(Exception): + pass + + +class Builder(object): + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise UnknownType(o.__class__.__name__) + return m(o) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + if o.name == 'None': + return None + if o.name == 'True': + return True + if o.name == 'False': + return False + + # An undefined Name + raise UnknownType('Undefined Name') + + def build_Add(self, o): + real, imag = map(self.build_Const, o.getChildren()) + try: + real = float(real) + except TypeError: + raise UnknownType('Add') + if not isinstance(imag, complex) or imag.real != 0.0: + raise UnknownType('Add') + return real+imag + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_UnarySub(self, o): + return -self.build_Const(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build_Const(o.getChildren()[0]) + + +_builder = Builder() + + +def unrepr(s): + if not s: + return s + return _builder.build(getObj(s)) + + + +class ConfigObjError(SyntaxError): + """ + This is the base class for all errors that ConfigObj raises. + It is a subclass of SyntaxError. + """ + def __init__(self, message='', line_number=None, line=''): + self.line = line + self.line_number = line_number + SyntaxError.__init__(self, message) + + +class NestingError(ConfigObjError): + """ + This error indicates a level of nesting that doesn't match. + """ + + +class ParseError(ConfigObjError): + """ + This error indicates that a line is badly written. + It is neither a valid ``key = value`` line, + nor a valid section marker line. + """ + + +class ReloadError(IOError): + """ + A 'reload' operation failed. + This exception is a subclass of ``IOError``. + """ + def __init__(self): + IOError.__init__(self, 'reload failed, filename is not set.') + + +class DuplicateError(ConfigObjError): + """ + The keyword or section specified already exists. + """ + + +class ConfigspecError(ConfigObjError): + """ + An error occured whilst parsing a configspec. + """ + + +class InterpolationError(ConfigObjError): + """Base class for the two interpolation errors.""" + + +class InterpolationLoopError(InterpolationError): + """Maximum interpolation depth exceeded in string interpolation.""" + + def __init__(self, option): + InterpolationError.__init__( + self, + 'interpolation loop detected in value "%s".' % option) + + +class RepeatSectionError(ConfigObjError): + """ + This error indicates additional sections in a section with a + ``__many__`` (repeated) section. + """ + + +class MissingInterpolationOption(InterpolationError): + """A value specified for interpolation was missing.""" + def __init__(self, option): + msg = 'missing option "%s" in interpolation.' % option + InterpolationError.__init__(self, msg) + + +class UnreprError(ConfigObjError): + """An error parsing in unrepr mode.""" + + + +class InterpolationEngine(object): + """ + A helper class to help perform string interpolation. + + This class is an abstract base class; its descendants perform + the actual work. + """ + + # compiled regexp to use in self.interpolate() + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + _cookie = '%' + + def __init__(self, section): + # the Section instance that "owns" this engine + self.section = section + + + def interpolate(self, key, value): + # short-cut + if not self._cookie in value: + return value + + def recursive_interpolate(key, value, section, backtrail): + """The function that does the actual work. + + ``value``: the string we're trying to interpolate. + ``section``: the section in which that string was found + ``backtrail``: a dict to keep track of where we've been, + to detect and prevent infinite recursion loops + + This is similar to a depth-first-search algorithm. + """ + # Have we been here already? + if (key, section.name) in backtrail: + # Yes - infinite loop detected + raise InterpolationLoopError(key) + # Place a marker on our backtrail so we won't come back here again + backtrail[(key, section.name)] = 1 + + # Now start the actual work + match = self._KEYCRE.search(value) + while match: + # The actual parsing of the match is implementation-dependent, + # so delegate to our helper function + k, v, s = self._parse_match(match) + if k is None: + # That's the signal that no further interpolation is needed + replacement = v + else: + # Further interpolation may be needed to obtain final value + replacement = recursive_interpolate(k, v, s, backtrail) + # Replace the matched string with its final value + start, end = match.span() + value = ''.join((value[:start], replacement, value[end:])) + new_search_start = start + len(replacement) + # Pick up the next interpolation key, if any, for next time + # through the while loop + match = self._KEYCRE.search(value, new_search_start) + + # Now safe to come back here again; remove marker from backtrail + del backtrail[(key, section.name)] + + return value + + # Back in interpolate(), all we have to do is kick off the recursive + # function with appropriate starting values + value = recursive_interpolate(key, value, self.section, {}) + return value + + + def _fetch(self, key): + """Helper function to fetch values from owning section. + + Returns a 2-tuple: the value, and the section where it was found. + """ + # switch off interpolation before we try and fetch anything ! + save_interp = self.section.main.interpolation + self.section.main.interpolation = False + + # Start at section that "owns" this InterpolationEngine + current_section = self.section + while True: + # try the current section first + val = current_section.get(key) + if val is not None and not isinstance(val, Section): + break + # try "DEFAULT" next + val = current_section.get('DEFAULT', {}).get(key) + if val is not None and not isinstance(val, Section): + break + # move up to parent and try again + # top-level's parent is itself + if current_section.parent is current_section: + # reached top level, time to give up + break + current_section = current_section.parent + + # restore interpolation to previous value before returning + self.section.main.interpolation = save_interp + if val is None: + raise MissingInterpolationOption(key) + return val, current_section + + + def _parse_match(self, match): + """Implementation-dependent helper function. + + Will be passed a match object corresponding to the interpolation + key we just found (e.g., "%(foo)s" or "$foo"). Should look up that + key in the appropriate config file section (using the ``_fetch()`` + helper function) and return a 3-tuple: (key, value, section) + + ``key`` is the name of the key we're looking for + ``value`` is the value found for that key + ``section`` is a reference to the section where it was found + + ``key`` and ``section`` should be None if no further + interpolation should be performed on the resulting value + (e.g., if we interpolated "$$" and returned "$"). + """ + raise NotImplementedError() + + + +class ConfigParserInterpolation(InterpolationEngine): + """Behaves like ConfigParser.""" + _cookie = '%' + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + + def _parse_match(self, match): + key = match.group(1) + value, section = self._fetch(key) + return key, value, section + + + +class TemplateInterpolation(InterpolationEngine): + """Behaves like string.Template.""" + _cookie = '$' + _delimiter = '$' + _KEYCRE = re.compile(r""" + \$(?: + (?P\$) | # Two $ signs + (?P[_a-z][_a-z0-9]*) | # $name format + {(?P[^}]*)} # ${name} format + ) + """, re.IGNORECASE | re.VERBOSE) + + def _parse_match(self, match): + # Valid name (in or out of braces): fetch value from section + key = match.group('named') or match.group('braced') + if key is not None: + value, section = self._fetch(key) + return key, value, section + # Escaped delimiter (e.g., $$): return single delimiter + if match.group('escaped') is not None: + # Return None for key and section to indicate it's time to stop + return None, self._delimiter, None + # Anything else: ignore completely, just return it unchanged + return None, match.group(), None + + +interpolation_engines = { + 'configparser': ConfigParserInterpolation, + 'template': TemplateInterpolation, +} + + +def __newobj__(cls, *args): + # Hack for pickle + return cls.__new__(cls, *args) + +class Section(dict): + """ + A dictionary-like object that represents a section in a config file. + + It does string interpolation if the 'interpolation' attribute + of the 'main' object is set to True. + + Interpolation is tried first from this object, then from the 'DEFAULT' + section of this object, next from the parent and its 'DEFAULT' section, + and so on until the main object is reached. + + A Section will behave like an ordered dictionary - following the + order of the ``scalars`` and ``sections`` attributes. + You can use this to change the order of members. + + Iteration follows the order: scalars, then sections. + """ + + + def __setstate__(self, state): + dict.update(self, state[0]) + self.__dict__.update(state[1]) + + def __reduce__(self): + state = (dict(self), self.__dict__) + return (__newobj__, (self.__class__,), state) + + + def __init__(self, parent, depth, main, indict=None, name=None): + """ + * parent is the section above + * depth is the depth level of this section + * main is the main ConfigObj + * indict is a dictionary to initialise the section with + """ + if indict is None: + indict = {} + dict.__init__(self) + # used for nesting level *and* interpolation + self.parent = parent + # used for the interpolation attribute + self.main = main + # level of nesting depth of this Section + self.depth = depth + # purely for information + self.name = name + # + self._initialise() + # we do this explicitly so that __setitem__ is used properly + # (rather than just passing to ``dict.__init__``) + for entry, value in indict.iteritems(): + self[entry] = value + + + def _initialise(self): + # the sequence of scalar values in this Section + self.scalars = [] + # the sequence of sections in this Section + self.sections = [] + # for comments :-) + self.comments = {} + self.inline_comments = {} + # the configspec + self.configspec = None + # for defaults + self.defaults = [] + self.default_values = {} + self.extra_values = [] + self._created = False + + + def _interpolate(self, key, value): + try: + # do we already have an interpolation engine? + engine = self._interpolation_engine + except AttributeError: + # not yet: first time running _interpolate(), so pick the engine + name = self.main.interpolation + if name == True: # note that "if name:" would be incorrect here + # backwards-compatibility: interpolation=True means use default + name = DEFAULT_INTERPOLATION + name = name.lower() # so that "Template", "template", etc. all work + class_ = interpolation_engines.get(name, None) + if class_ is None: + # invalid value for self.main.interpolation + self.main.interpolation = False + return value + else: + # save reference to engine so we don't have to do this again + engine = self._interpolation_engine = class_(self) + # let the engine do the actual work + return engine.interpolate(key, value) + + + def __getitem__(self, key): + """Fetch the item and do string interpolation.""" + val = dict.__getitem__(self, key) + if self.main.interpolation: + if isinstance(val, basestring): + return self._interpolate(key, val) + if isinstance(val, list): + def _check(entry): + if isinstance(entry, basestring): + return self._interpolate(key, entry) + return entry + new = [_check(entry) for entry in val] + if new != val: + return new + return val + + + def __setitem__(self, key, value, unrepr=False): + """ + Correctly set a value. + + Making dictionary values Section instances. + (We have to special case 'Section' instances - which are also dicts) + + Keys must be strings. + Values need only be strings (or lists of strings) if + ``main.stringify`` is set. + + ``unrepr`` must be set when setting a value to a dictionary, without + creating a new sub-section. + """ + if not isinstance(key, basestring): + raise ValueError('The key "%s" is not a string.' % key) + + # add the comment + if key not in self.comments: + self.comments[key] = [] + self.inline_comments[key] = '' + # remove the entry from defaults + if key in self.defaults: + self.defaults.remove(key) + # + if isinstance(value, Section): + if key not in self: + self.sections.append(key) + dict.__setitem__(self, key, value) + elif isinstance(value, dict) and not unrepr: + # First create the new depth level, + # then create the section + if key not in self: + self.sections.append(key) + new_depth = self.depth + 1 + dict.__setitem__( + self, + key, + Section( + self, + new_depth, + self.main, + indict=value, + name=key)) + else: + if key not in self: + self.scalars.append(key) + if not self.main.stringify: + if isinstance(value, basestring): + pass + elif isinstance(value, (list, tuple)): + for entry in value: + if not isinstance(entry, basestring): + raise TypeError('Value is not a string "%s".' % entry) + else: + raise TypeError('Value is not a string "%s".' % value) + dict.__setitem__(self, key, value) + + + def __delitem__(self, key): + """Remove items from the sequence when deleting.""" + dict. __delitem__(self, key) + if key in self.scalars: + self.scalars.remove(key) + else: + self.sections.remove(key) + del self.comments[key] + del self.inline_comments[key] + + + def get(self, key, default=None): + """A version of ``get`` that doesn't bypass string interpolation.""" + try: + return self[key] + except KeyError: + return default + + + def update(self, indict): + """ + A version of update that uses our ``__setitem__``. + """ + for entry in indict: + self[entry] = indict[entry] + + + def pop(self, key, default=MISSING): + """ + 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised' + """ + try: + val = self[key] + except KeyError: + if default is MISSING: + raise + val = default + else: + del self[key] + return val + + + def popitem(self): + """Pops the first (key,val)""" + sequence = (self.scalars + self.sections) + if not sequence: + raise KeyError(": 'popitem(): dictionary is empty'") + key = sequence[0] + val = self[key] + del self[key] + return key, val + + + def clear(self): + """ + A version of clear that also affects scalars/sections + Also clears comments and configspec. + + Leaves other attributes alone : + depth/main/parent are not affected + """ + dict.clear(self) + self.scalars = [] + self.sections = [] + self.comments = {} + self.inline_comments = {} + self.configspec = None + self.defaults = [] + self.extra_values = [] + + + def setdefault(self, key, default=None): + """A version of setdefault that sets sequence if appropriate.""" + try: + return self[key] + except KeyError: + self[key] = default + return self[key] + + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples""" + return zip((self.scalars + self.sections), self.values()) + + + def keys(self): + """D.keys() -> list of D's keys""" + return (self.scalars + self.sections) + + + def values(self): + """D.values() -> list of D's values""" + return [self[key] for key in (self.scalars + self.sections)] + + + def iteritems(self): + """D.iteritems() -> an iterator over the (key, value) items of D""" + return iter(self.items()) + + + def iterkeys(self): + """D.iterkeys() -> an iterator over the keys of D""" + return iter((self.scalars + self.sections)) + + __iter__ = iterkeys + + + def itervalues(self): + """D.itervalues() -> an iterator over the values of D""" + return iter(self.values()) + + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)]) + + __str__ = __repr__ + __str__.__doc__ = "x.__str__() <==> str(x)" + + + # Extra methods - not in a normal dictionary + + def dict(self): + """ + Return a deepcopy of self as a dictionary. + + All members that are ``Section`` instances are recursively turned to + ordinary dictionaries - by calling their ``dict`` method. + + >>> n = a.dict() + >>> n == a + 1 + >>> n is a + 0 + """ + newdict = {} + for entry in self: + this_entry = self[entry] + if isinstance(this_entry, Section): + this_entry = this_entry.dict() + elif isinstance(this_entry, list): + # create a copy rather than a reference + this_entry = list(this_entry) + elif isinstance(this_entry, tuple): + # create a copy rather than a reference + this_entry = tuple(this_entry) + newdict[entry] = this_entry + return newdict + + + def merge(self, indict): + """ + A recursive update - useful for merging config files. + + >>> a = '''[section1] + ... option1 = True + ... [[subsection]] + ... more_options = False + ... # end of file'''.splitlines() + >>> b = '''# File is user.ini + ... [section1] + ... option1 = False + ... # end of file'''.splitlines() + >>> c1 = ConfigObj(b) + >>> c2 = ConfigObj(a) + >>> c2.merge(c1) + >>> c2 + ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) + """ + for key, val in indict.items(): + if (key in self and isinstance(self[key], dict) and + isinstance(val, dict)): + self[key].merge(val) + else: + self[key] = val + + + def rename(self, oldkey, newkey): + """ + Change a keyname to another, without changing position in sequence. + + Implemented so that transformations can be made on keys, + as well as on values. (used by encode and decode) + + Also renames comments. + """ + if oldkey in self.scalars: + the_list = self.scalars + elif oldkey in self.sections: + the_list = self.sections + else: + raise KeyError('Key "%s" not found.' % oldkey) + pos = the_list.index(oldkey) + # + val = self[oldkey] + dict.__delitem__(self, oldkey) + dict.__setitem__(self, newkey, val) + the_list.remove(oldkey) + the_list.insert(pos, newkey) + comm = self.comments[oldkey] + inline_comment = self.inline_comments[oldkey] + del self.comments[oldkey] + del self.inline_comments[oldkey] + self.comments[newkey] = comm + self.inline_comments[newkey] = inline_comment + + + def walk(self, function, raise_errors=True, + call_on_sections=False, **keywargs): + """ + Walk every member and call a function on the keyword and value. + + Return a dictionary of the return values + + If the function raises an exception, raise the errror + unless ``raise_errors=False``, in which case set the return value to + ``False``. + + Any unrecognised keyword arguments you pass to walk, will be pased on + to the function you pass in. + + Note: if ``call_on_sections`` is ``True`` then - on encountering a + subsection, *first* the function is called for the *whole* subsection, + and then recurses into it's members. This means your function must be + able to handle strings, dictionaries and lists. This allows you + to change the key of subsections as well as for ordinary members. The + return value when called on the whole subsection has to be discarded. + + See the encode and decode methods for examples, including functions. + + .. admonition:: caution + + You can use ``walk`` to transform the names of members of a section + but you mustn't add or delete members. + + >>> config = '''[XXXXsection] + ... XXXXkey = XXXXvalue'''.splitlines() + >>> cfg = ConfigObj(config) + >>> cfg + ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) + >>> def transform(section, key): + ... val = section[key] + ... newkey = key.replace('XXXX', 'CLIENT1') + ... section.rename(key, newkey) + ... if isinstance(val, (tuple, list, dict)): + ... pass + ... else: + ... val = val.replace('XXXX', 'CLIENT1') + ... section[newkey] = val + >>> cfg.walk(transform, call_on_sections=True) + {'CLIENT1section': {'CLIENT1key': None}} + >>> cfg + ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) + """ + out = {} + # scalars first + for i in range(len(self.scalars)): + entry = self.scalars[i] + try: + val = function(self, entry, **keywargs) + # bound again in case name has changed + entry = self.scalars[i] + out[entry] = val + except Exception: + if raise_errors: + raise + else: + entry = self.scalars[i] + out[entry] = False + # then sections + for i in range(len(self.sections)): + entry = self.sections[i] + if call_on_sections: + try: + function(self, entry, **keywargs) + except Exception: + if raise_errors: + raise + else: + entry = self.sections[i] + out[entry] = False + # bound again in case name has changed + entry = self.sections[i] + # previous result is discarded + out[entry] = self[entry].walk( + function, + raise_errors=raise_errors, + call_on_sections=call_on_sections, + **keywargs) + return out + + + def as_bool(self, key): + """ + Accepts a key as input. The corresponding value must be a string or + the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to + retain compatibility with Python 2.2. + + If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns + ``True``. + + If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns + ``False``. + + ``as_bool`` is not case sensitive. + + Any other input will raise a ``ValueError``. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_bool('a') + Traceback (most recent call last): + ValueError: Value "fish" is neither True nor False + >>> a['b'] = 'True' + >>> a.as_bool('b') + 1 + >>> a['b'] = 'off' + >>> a.as_bool('b') + 0 + """ + val = self[key] + if val == True: + return True + elif val == False: + return False + else: + try: + if not isinstance(val, basestring): + # TODO: Why do we raise a KeyError here? + raise KeyError() + else: + return self.main._bools[val.lower()] + except KeyError: + raise ValueError('Value "%s" is neither True nor False' % val) + + + def as_int(self, key): + """ + A convenience method which coerces the specified value to an integer. + + If the value is an invalid literal for ``int``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_int('a') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: 'fish' + >>> a['b'] = '1' + >>> a.as_int('b') + 1 + >>> a['b'] = '3.2' + >>> a.as_int('b') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: '3.2' + """ + return int(self[key]) + + + def as_float(self, key): + """ + A convenience method which coerces the specified value to a float. + + If the value is an invalid literal for ``float``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_float('a') + Traceback (most recent call last): + ValueError: invalid literal for float(): fish + >>> a['b'] = '1' + >>> a.as_float('b') + 1.0 + >>> a['b'] = '3.2' + >>> a.as_float('b') + 3.2000000000000002 + """ + return float(self[key]) + + + def as_list(self, key): + """ + A convenience method which fetches the specified value, guaranteeing + that it is a list. + + >>> a = ConfigObj() + >>> a['a'] = 1 + >>> a.as_list('a') + [1] + >>> a['a'] = (1,) + >>> a.as_list('a') + [1] + >>> a['a'] = [1] + >>> a.as_list('a') + [1] + """ + result = self[key] + if isinstance(result, (tuple, list)): + return list(result) + return [result] + + + def restore_default(self, key): + """ + Restore (and return) default value for the specified key. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + If there is no default value for this key, ``KeyError`` is raised. + """ + default = self.default_values[key] + dict.__setitem__(self, key, default) + if key not in self.defaults: + self.defaults.append(key) + return default + + + def restore_defaults(self): + """ + Recursively restore default values to all members + that have them. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + It doesn't delete or modify entries without default values. + """ + for key in self.default_values: + self.restore_default(key) + + for section in self.sections: + self[section].restore_defaults() + + +class ConfigObj(Section): + """An object to read, create, and write config files.""" + + _keyword = re.compile(r'''^ # line start + (\s*) # indentation + ( # keyword + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'"=].*?) # no quotes + ) + \s*=\s* # divider + (.*) # value (including list values and comments) + $ # line end + ''', + re.VERBOSE) + + _sectionmarker = re.compile(r'''^ + (\s*) # 1: indentation + ((?:\[\s*)+) # 2: section marker open + ( # 3: section name open + (?:"\s*\S.*?\s*")| # at least one non-space with double quotes + (?:'\s*\S.*?\s*')| # at least one non-space with single quotes + (?:[^'"\s].*?) # at least one non-space unquoted + ) # section name close + ((?:\s*\])+) # 4: section marker close + \s*(\#.*)? # 5: optional comment + $''', + re.VERBOSE) + + # this regexp pulls list values out as a single string + # or single values and comments + # FIXME: this regex adds a '' to the end of comma terminated lists + # workaround in ``_handle_value`` + _valueexp = re.compile(r'''^ + (?: + (?: + ( + (?: + (?: + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#][^,\#]*?) # unquoted + ) + \s*,\s* # comma + )* # match all list items ending in a comma (if any) + ) + ( + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#\s][^,]*?)| # unquoted + (?:(? 1: + msg = "Parsing failed with several errors.\nFirst error %s" % info + error = ConfigObjError(msg) + else: + error = self._errors[0] + # set the errors attribute; it's a list of tuples: + # (error_type, message, line_number) + error.errors = self._errors + # set the config attribute + error.config = self + raise error + # delete private attributes + del self._errors + + if configspec is None: + self.configspec = None + else: + self._handle_configspec(configspec) + + + def _initialise(self, options=None): + if options is None: + options = OPTION_DEFAULTS + + # initialise a few variables + self.filename = None + self._errors = [] + self.raise_errors = options['raise_errors'] + self.interpolation = options['interpolation'] + self.list_values = options['list_values'] + self.create_empty = options['create_empty'] + self.file_error = options['file_error'] + self.stringify = options['stringify'] + self.indent_type = options['indent_type'] + self.encoding = options['encoding'] + self.default_encoding = options['default_encoding'] + self.BOM = False + self.newlines = None + self.write_empty_values = options['write_empty_values'] + self.unrepr = options['unrepr'] + + self.initial_comment = [] + self.final_comment = [] + self.configspec = None + + if self._inspec: + self.list_values = False + + # Clear section attributes as well + Section._initialise(self) + + + def __repr__(self): + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return ('ConfigObj({%s})' % + ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)])) + + + def _handle_bom(self, infile): + """ + Handle any BOM, and decode if necessary. + + If an encoding is specified, that *must* be used - but the BOM should + still be removed (and the BOM attribute set). + + (If the encoding is wrongly specified, then a BOM for an alternative + encoding won't be discovered or removed.) + + If an encoding is not specified, UTF8 or UTF16 BOM will be detected and + removed. The BOM attribute will be set. UTF16 will be decoded to + unicode. + + NOTE: This method must not be called with an empty ``infile``. + + Specifying the *wrong* encoding is likely to cause a + ``UnicodeDecodeError``. + + ``infile`` must always be returned as a list of lines, but may be + passed in as a single string. + """ + if ((self.encoding is not None) and + (self.encoding.lower() not in BOM_LIST)): + # No need to check for a BOM + # the encoding specified doesn't have one + # just decode + return self._decode(infile, self.encoding) + + if isinstance(infile, (list, tuple)): + line = infile[0] + else: + line = infile + if self.encoding is not None: + # encoding explicitly supplied + # And it could have an associated BOM + # TODO: if encoding is just UTF16 - we ought to check for both + # TODO: big endian and little endian versions. + enc = BOM_LIST[self.encoding.lower()] + if enc == 'utf_16': + # For UTF16 we try big endian and little endian + for BOM, (encoding, final_encoding) in BOMS.items(): + if not final_encoding: + # skip UTF8 + continue + if infile.startswith(BOM): + ### BOM discovered + ##self.BOM = True + # Don't need to remove BOM + return self._decode(infile, encoding) + + # If we get this far, will *probably* raise a DecodeError + # As it doesn't appear to start with a BOM + return self._decode(infile, self.encoding) + + # Must be UTF8 + BOM = BOM_SET[enc] + if not line.startswith(BOM): + return self._decode(infile, self.encoding) + + newline = line[len(BOM):] + + # BOM removed + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + self.BOM = True + return self._decode(infile, self.encoding) + + # No encoding specified - so we need to check for UTF8/UTF16 + for BOM, (encoding, final_encoding) in BOMS.items(): + if not line.startswith(BOM): + continue + else: + # BOM discovered + self.encoding = final_encoding + if not final_encoding: + self.BOM = True + # UTF8 + # remove BOM + newline = line[len(BOM):] + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + # UTF8 - don't decode + if isinstance(infile, basestring): + return infile.splitlines(True) + else: + return infile + # UTF16 - have to decode + return self._decode(infile, encoding) + + # No BOM discovered and no encoding specified, just return + if isinstance(infile, basestring): + # infile read from a file will be a single string + return infile.splitlines(True) + return infile + + + def _a_to_u(self, aString): + """Decode ASCII strings to unicode if a self.encoding is specified.""" + if self.encoding: + return aString.decode('ascii') + else: + return aString + + + def _decode(self, infile, encoding): + """ + Decode infile to unicode. Using the specified encoding. + + if is a string, it also needs converting to a list. + """ + if isinstance(infile, basestring): + # can't be unicode + # NOTE: Could raise a ``UnicodeDecodeError`` + return infile.decode(encoding).splitlines(True) + for i, line in enumerate(infile): + if not isinstance(line, unicode): + # NOTE: The isinstance test here handles mixed lists of unicode/string + # NOTE: But the decode will break on any non-string values + # NOTE: Or could raise a ``UnicodeDecodeError`` + infile[i] = line.decode(encoding) + return infile + + + def _decode_element(self, line): + """Decode element to unicode if necessary.""" + if not self.encoding: + return line + if isinstance(line, str) and self.default_encoding: + return line.decode(self.default_encoding) + return line + + + def _str(self, value): + """ + Used by ``stringify`` within validate, to turn non-string values + into strings. + """ + if not isinstance(value, basestring): + return str(value) + else: + return value + + + def _parse(self, infile): + """Actually parse the config file.""" + temp_list_values = self.list_values + if self.unrepr: + self.list_values = False + + comment_list = [] + done_start = False + this_section = self + maxline = len(infile) - 1 + cur_index = -1 + reset_comment = False + + while cur_index < maxline: + if reset_comment: + comment_list = [] + cur_index += 1 + line = infile[cur_index] + sline = line.strip() + # do we have anything on the line ? + if not sline or sline.startswith('#'): + reset_comment = False + comment_list.append(line) + continue + + if not done_start: + # preserve initial comment + self.initial_comment = comment_list + comment_list = [] + done_start = True + + reset_comment = True + # first we check if it's a section marker + mat = self._sectionmarker.match(line) + if mat is not None: + # is a section line + (indent, sect_open, sect_name, sect_close, comment) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + cur_depth = sect_open.count('[') + if cur_depth != sect_close.count(']'): + self._handle_error("Cannot compute the section depth at line %s.", + NestingError, infile, cur_index) + continue + + if cur_depth < this_section.depth: + # the new section is dropping back to a previous level + try: + parent = self._match_depth(this_section, + cur_depth).parent + except SyntaxError: + self._handle_error("Cannot compute nesting level at line %s.", + NestingError, infile, cur_index) + continue + elif cur_depth == this_section.depth: + # the new section is a sibling of the current section + parent = this_section.parent + elif cur_depth == this_section.depth + 1: + # the new section is a child the current section + parent = this_section + else: + self._handle_error("Section too nested at line %s.", + NestingError, infile, cur_index) + + sect_name = self._unquote(sect_name) + if sect_name in parent: + self._handle_error('Duplicate section name at line %s.', + DuplicateError, infile, cur_index) + continue + + # create the new section + this_section = Section( + parent, + cur_depth, + self, + name=sect_name) + parent[sect_name] = this_section + parent.inline_comments[sect_name] = comment + parent.comments[sect_name] = comment_list + continue + # + # it's not a section marker, + # so it should be a valid ``key = value`` line + mat = self._keyword.match(line) + if mat is None: + # it neither matched as a keyword + # or a section marker + self._handle_error( + 'Invalid line at line "%s".', + ParseError, infile, cur_index) + else: + # is a keyword value + # value will include any inline comment + (indent, key, value) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + # check for a multiline value + if value[:3] in ['"""', "'''"]: + try: + value, comment, cur_index = self._multiline( + value, infile, cur_index, maxline) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if type(e) == UnknownType: + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if isinstance(e, UnknownType): + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + # extract comment and lists + try: + (value, comment) = self._handle_value(value) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + # + key = self._unquote(key) + if key in this_section: + self._handle_error( + 'Duplicate keyword name at line %s.', + DuplicateError, infile, cur_index) + continue + # add the key. + # we set unrepr because if we have got this far we will never + # be creating a new section + this_section.__setitem__(key, value, unrepr=True) + this_section.inline_comments[key] = comment + this_section.comments[key] = comment_list + continue + # + if self.indent_type is None: + # no indentation used, set the type accordingly + self.indent_type = '' + + # preserve the final comment + if not self and not self.initial_comment: + self.initial_comment = comment_list + elif not reset_comment: + self.final_comment = comment_list + self.list_values = temp_list_values + + + def _match_depth(self, sect, depth): + """ + Given a section and a depth level, walk back through the sections + parents to see if the depth level matches a previous section. + + Return a reference to the right section, + or raise a SyntaxError. + """ + while depth < sect.depth: + if sect is sect.parent: + # we've reached the top level already + raise SyntaxError() + sect = sect.parent + if sect.depth == depth: + return sect + # shouldn't get here + raise SyntaxError() + + + def _handle_error(self, text, ErrorClass, infile, cur_index): + """ + Handle an error according to the error settings. + + Either raise the error or store it. + The error will have occured at ``cur_index`` + """ + line = infile[cur_index] + cur_index += 1 + message = text % cur_index + error = ErrorClass(message, cur_index, line) + if self.raise_errors: + # raise the error - parsing stops here + raise error + # store the error + # reraise when parsing has finished + self._errors.append(error) + + + def _unquote(self, value): + """Return an unquoted version of a value""" + if not value: + # should only happen during parsing of lists + raise SyntaxError + if (value[0] == value[-1]) and (value[0] in ('"', "'")): + value = value[1:-1] + return value + + + def _quote(self, value, multiline=True): + """ + Return a safely quoted version of a value. + + Raise a ConfigObjError if the value cannot be safely quoted. + If multiline is ``True`` (default) then use triple quotes + if necessary. + + * Don't quote values that don't need it. + * Recursively quote members of a list and return a comma joined list. + * Multiline is ``False`` for lists. + * Obey list syntax for empty and single member lists. + + If ``list_values=False`` then the value is only quoted if it contains + a ``\\n`` (is multiline) or '#'. + + If ``write_empty_values`` is set, and the value is an empty string, it + won't be quoted. + """ + if multiline and self.write_empty_values and value == '': + # Only if multiline is set, so that it is used for values not + # keys, and not values that are part of a list + return '' + + if multiline and isinstance(value, (list, tuple)): + if not value: + return ',' + elif len(value) == 1: + return self._quote(value[0], multiline=False) + ',' + return ', '.join([self._quote(val, multiline=False) + for val in value]) + if not isinstance(value, basestring): + if self.stringify: + value = str(value) + else: + raise TypeError('Value "%s" is not a string.' % value) + + if not value: + return '""' + + no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value + need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) + hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) + check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote + + if check_for_single: + if not self.list_values: + # we don't quote if ``list_values=False`` + quot = noquot + # for normal values either single or double quotes will do + elif '\n' in value: + # will only happen if multiline is off - e.g. '\n' in key + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif ((value[0] not in wspace_plus) and + (value[-1] not in wspace_plus) and + (',' not in value)): + quot = noquot + else: + quot = self._get_single_quote(value) + else: + # if value has '\n' or "'" *and* '"', it will need triple quotes + quot = self._get_triple_quote(value) + + if quot == noquot and '#' in value and self.list_values: + quot = self._get_single_quote(value) + + return quot % value + + + def _get_single_quote(self, value): + if ("'" in value) and ('"' in value): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif '"' in value: + quot = squot + else: + quot = dquot + return quot + + + def _get_triple_quote(self, value): + if (value.find('"""') != -1) and (value.find("'''") != -1): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + if value.find('"""') == -1: + quot = tdquot + else: + quot = tsquot + return quot + + + def _handle_value(self, value): + """ + Given a value string, unquote, remove comment, + handle lists. (including empty and single member lists) + """ + if self._inspec: + # Parsing a configspec so don't handle comments + return (value, '') + # do we look for lists in values ? + if not self.list_values: + mat = self._nolistvalue.match(value) + if mat is None: + raise SyntaxError() + # NOTE: we don't unquote here + return mat.groups() + # + mat = self._valueexp.match(value) + if mat is None: + # the value is badly constructed, probably badly quoted, + # or an invalid list + raise SyntaxError() + (list_values, single, empty_list, comment) = mat.groups() + if (list_values == '') and (single is None): + # change this if you want to accept empty values + raise SyntaxError() + # NOTE: note there is no error handling from here if the regex + # is wrong: then incorrect values will slip through + if empty_list is not None: + # the single comma - meaning an empty list + return ([], comment) + if single is not None: + # handle empty values + if list_values and not single: + # FIXME: the '' is a workaround because our regex now matches + # '' at the end of a list if it has a trailing comma + single = None + else: + single = single or '""' + single = self._unquote(single) + if list_values == '': + # not a list value + return (single, comment) + the_list = self._listvalueexp.findall(list_values) + the_list = [self._unquote(val) for val in the_list] + if single is not None: + the_list += [single] + return (the_list, comment) + + + def _multiline(self, value, infile, cur_index, maxline): + """Extract the value, where we are in a multiline situation.""" + quot = value[:3] + newvalue = value[3:] + single_line = self._triple_quote[quot][0] + multi_line = self._triple_quote[quot][1] + mat = single_line.match(value) + if mat is not None: + retval = list(mat.groups()) + retval.append(cur_index) + return retval + elif newvalue.find(quot) != -1: + # somehow the triple quote is missing + raise SyntaxError() + # + while cur_index < maxline: + cur_index += 1 + newvalue += '\n' + line = infile[cur_index] + if line.find(quot) == -1: + newvalue += line + else: + # end of multiline, process it + break + else: + # we've got to the end of the config, oops... + raise SyntaxError() + mat = multi_line.match(line) + if mat is None: + # a badly formed line + raise SyntaxError() + (value, comment) = mat.groups() + return (newvalue + value, comment, cur_index) + + + def _handle_configspec(self, configspec): + """Parse the configspec.""" + # FIXME: Should we check that the configspec was created with the + # correct settings ? (i.e. ``list_values=False``) + if not isinstance(configspec, ConfigObj): + try: + configspec = ConfigObj(configspec, + raise_errors=True, + file_error=True, + _inspec=True) + except ConfigObjError, e: + # FIXME: Should these errors have a reference + # to the already parsed ConfigObj ? + raise ConfigspecError('Parsing configspec failed: %s' % e) + except IOError, e: + raise IOError('Reading configspec failed: %s' % e) + + self.configspec = configspec + + + + def _set_configspec(self, section, copy): + """ + Called by validate. Handles setting the configspec on subsections + including sections to be validated by __many__ + """ + configspec = section.configspec + many = configspec.get('__many__') + if isinstance(many, dict): + for entry in section.sections: + if entry not in configspec: + section[entry].configspec = many + + for entry in configspec.sections: + if entry == '__many__': + continue + if entry not in section: + section[entry] = {} + section[entry]._created = True + if copy: + # copy comments + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + + # Could be a scalar when we expect a section + if isinstance(section[entry], Section): + section[entry].configspec = configspec[entry] + + + def _write_line(self, indent_string, entry, this_entry, comment): + """Write an individual line, for the write method""" + # NOTE: the calls to self._quote here handles non-StringType values. + if not self.unrepr: + val = self._decode_element(self._quote(this_entry)) + else: + val = repr(this_entry) + return '%s%s%s%s%s' % (indent_string, + self._decode_element(self._quote(entry, multiline=False)), + self._a_to_u(' = '), + val, + self._decode_element(comment)) + + + def _write_marker(self, indent_string, depth, entry, comment): + """Write a section marker line""" + return '%s%s%s%s%s' % (indent_string, + self._a_to_u('[' * depth), + self._quote(self._decode_element(entry), multiline=False), + self._a_to_u(']' * depth), + self._decode_element(comment)) + + + def _handle_comment(self, comment): + """Deal with a comment.""" + if not comment: + return '' + start = self.indent_type + if not comment.startswith('#'): + start += self._a_to_u(' # ') + return (start + comment) + + + # Public methods + + def write(self, outfile=None, section=None): + """ + Write the current ConfigObj as a file + + tekNico: FIXME: use StringIO instead of real files + + >>> filename = a.filename + >>> a.filename = 'test.ini' + >>> a.write() + >>> a.filename = filename + >>> a == ConfigObj('test.ini', raise_errors=True) + 1 + >>> import os + >>> os.remove('test.ini') + """ + if self.indent_type is None: + # this can be true if initialised from a dictionary + self.indent_type = DEFAULT_INDENT_TYPE + + out = [] + cs = self._a_to_u('#') + csp = self._a_to_u('# ') + if section is None: + int_val = self.interpolation + self.interpolation = False + section = self + for line in self.initial_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + + indent_string = self.indent_type * section.depth + for entry in (section.scalars + section.sections): + if entry in section.defaults: + # don't write out default values + continue + for comment_line in section.comments[entry]: + comment_line = self._decode_element(comment_line.lstrip()) + if comment_line and not comment_line.startswith(cs): + comment_line = csp + comment_line + out.append(indent_string + comment_line) + this_entry = section[entry] + comment = self._handle_comment(section.inline_comments[entry]) + + if isinstance(this_entry, dict): + # a section + out.append(self._write_marker( + indent_string, + this_entry.depth, + entry, + comment)) + out.extend(self.write(section=this_entry)) + else: + out.append(self._write_line( + indent_string, + entry, + this_entry, + comment)) + + if section is self: + for line in self.final_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + self.interpolation = int_val + + if section is not self: + return out + + if (self.filename is None) and (outfile is None): + # output a list of lines + # might need to encode + # NOTE: This will *screw* UTF16, each line will start with the BOM + if self.encoding: + out = [l.encode(self.encoding) for l in out] + if (self.BOM and ((self.encoding is None) or + (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): + # Add the UTF8 BOM + if not out: + out.append('') + out[0] = BOM_UTF8 + out[0] + return out + + # Turn the list to a string, joined with correct newlines + newline = self.newlines or os.linesep + if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' + and sys.platform == 'win32' and newline == '\r\n'): + # Windows specific hack to avoid writing '\r\r\n' + newline = '\n' + output = self._a_to_u(newline).join(out) + if self.encoding: + output = output.encode(self.encoding) + if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): + # Add the UTF8 BOM + output = BOM_UTF8 + output + + if not output.endswith(newline): + output += newline + if outfile is not None: + outfile.write(output) + else: + h = open(self.filename, 'wb') + h.write(output) + h.close() + + + def validate(self, validator, preserve_errors=False, copy=False, + section=None): + """ + Test the ConfigObj against a configspec. + + It uses the ``validator`` object from *validate.py*. + + To run ``validate`` on the current ConfigObj, call: :: + + test = config.validate(validator) + + (Normally having previously passed in the configspec when the ConfigObj + was created - you can dynamically assign a dictionary of checks to the + ``configspec`` attribute of a section though). + + It returns ``True`` if everything passes, or a dictionary of + pass/fails (True/False). If every member of a subsection passes, it + will just have the value ``True``. (It also returns ``False`` if all + members fail). + + In addition, it converts the values from strings to their native + types if their checks pass (and ``stringify`` is set). + + If ``preserve_errors`` is ``True`` (``False`` is default) then instead + of a marking a fail with a ``False``, it will preserve the actual + exception object. This can contain info about the reason for failure. + For example the ``VdtValueTooSmallError`` indicates that the value + supplied was too small. If a value (or section) is missing it will + still be marked as ``False``. + + You must have the validate module to use ``preserve_errors=True``. + + You can then use the ``flatten_errors`` function to turn your nested + results dictionary into a flattened list of failures - useful for + displaying meaningful error messages. + """ + if section is None: + if self.configspec is None: + raise ValueError('No configspec supplied.') + if preserve_errors: + # We do this once to remove a top level dependency on the validate module + # Which makes importing configobj faster + from validate import VdtMissingValue + self._vdtMissingValue = VdtMissingValue + + section = self + + if copy: + section.initial_comment = section.configspec.initial_comment + section.final_comment = section.configspec.final_comment + section.encoding = section.configspec.encoding + section.BOM = section.configspec.BOM + section.newlines = section.configspec.newlines + section.indent_type = section.configspec.indent_type + + # + # section.default_values.clear() #?? + configspec = section.configspec + self._set_configspec(section, copy) + + + def validate_entry(entry, spec, val, missing, ret_true, ret_false): + section.default_values.pop(entry, None) + + try: + section.default_values[entry] = validator.get_default_value(configspec[entry]) + except (KeyError, AttributeError, validator.baseErrorClass): + # No default, bad default or validator has no 'get_default_value' + # (e.g. SimpleVal) + pass + + try: + check = validator.check(spec, + val, + missing=missing + ) + except validator.baseErrorClass, e: + if not preserve_errors or isinstance(e, self._vdtMissingValue): + out[entry] = False + else: + # preserve the error + out[entry] = e + ret_false = False + ret_true = False + else: + ret_false = False + out[entry] = True + if self.stringify or missing: + # if we are doing type conversion + # or the value is a supplied default + if not self.stringify: + if isinstance(check, (list, tuple)): + # preserve lists + check = [self._str(item) for item in check] + elif missing and check is None: + # convert the None from a default to a '' + check = '' + else: + check = self._str(check) + if (check != val) or missing: + section[entry] = check + if not copy and missing and entry not in section.defaults: + section.defaults.append(entry) + return ret_true, ret_false + + # + out = {} + ret_true = True + ret_false = True + + unvalidated = [k for k in section.scalars if k not in configspec] + incorrect_sections = [k for k in configspec.sections if k in section.scalars] + incorrect_scalars = [k for k in configspec.scalars if k in section.sections] + + for entry in configspec.scalars: + if entry in ('__many__', '___many___'): + # reserved names + continue + if (not entry in section.scalars) or (entry in section.defaults): + # missing entries + # or entries from defaults + missing = True + val = None + if copy and entry not in section.scalars: + # copy comments + section.comments[entry] = ( + configspec.comments.get(entry, [])) + section.inline_comments[entry] = ( + configspec.inline_comments.get(entry, '')) + # + else: + missing = False + val = section[entry] + + ret_true, ret_false = validate_entry(entry, configspec[entry], val, + missing, ret_true, ret_false) + + many = None + if '__many__' in configspec.scalars: + many = configspec['__many__'] + elif '___many___' in configspec.scalars: + many = configspec['___many___'] + + if many is not None: + for entry in unvalidated: + val = section[entry] + ret_true, ret_false = validate_entry(entry, many, val, False, + ret_true, ret_false) + unvalidated = [] + + for entry in incorrect_scalars: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Value %r was provided as a section' % entry + out[entry] = validator.baseErrorClass(msg) + for entry in incorrect_sections: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Section %r was provided as a single value' % entry + out[entry] = validator.baseErrorClass(msg) + + # Missing sections will have been created as empty ones when the + # configspec was read. + for entry in section.sections: + # FIXME: this means DEFAULT is not copied in copy mode + if section is self and entry == 'DEFAULT': + continue + if section[entry].configspec is None: + unvalidated.append(entry) + continue + if copy: + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) + out[entry] = check + if check == False: + ret_true = False + elif check == True: + ret_false = False + else: + ret_true = False + + section.extra_values = unvalidated + if preserve_errors and not section._created: + # If the section wasn't created (i.e. it wasn't missing) + # then we can't return False, we need to preserve errors + ret_false = False + # + if ret_false and preserve_errors and out: + # If we are preserving errors, but all + # the failures are from missing sections / values + # then we can return False. Otherwise there is a + # real failure that we need to preserve. + ret_false = not any(out.values()) + if ret_true: + return True + elif ret_false: + return False + return out + + + def reset(self): + """Clear ConfigObj instance and restore to 'freshly created' state.""" + self.clear() + self._initialise() + # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) + # requires an empty dictionary + self.configspec = None + # Just to be sure ;-) + self._original_configspec = None + + + def reload(self): + """ + Reload a ConfigObj from file. + + This method raises a ``ReloadError`` if the ConfigObj doesn't have + a filename attribute pointing to a file. + """ + if not isinstance(self.filename, basestring): + raise ReloadError() + + filename = self.filename + current_options = {} + for entry in OPTION_DEFAULTS: + if entry == 'configspec': + continue + current_options[entry] = getattr(self, entry) + + configspec = self._original_configspec + current_options['configspec'] = configspec + + self.clear() + self._initialise(current_options) + self._load(filename, configspec) + + + +class SimpleVal(object): + """ + A simple validator. + Can be used to check that all members expected are present. + + To use it, provide a configspec with all your members in (the value given + will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` + method of your ``ConfigObj``. ``validate`` will return ``True`` if all + members are present, or a dictionary with True/False meaning + present/missing. (Whole missing sections will be replaced with ``False``) + """ + + def __init__(self): + self.baseErrorClass = ConfigObjError + + def check(self, check, member, missing=False): + """A dummy check method, always returns the value unchanged.""" + if missing: + raise self.baseErrorClass() + return member + + +def flatten_errors(cfg, res, levels=None, results=None): + """ + An example function that will turn a nested dictionary of results + (as returned by ``ConfigObj.validate``) into a flat list. + + ``cfg`` is the ConfigObj instance being checked, ``res`` is the results + dictionary returned by ``validate``. + + (This is a recursive function, so you shouldn't use the ``levels`` or + ``results`` arguments - they are used by the function.) + + Returns a list of keys that failed. Each member of the list is a tuple:: + + ([list of sections...], key, result) + + If ``validate`` was called with ``preserve_errors=False`` (the default) + then ``result`` will always be ``False``. + + *list of sections* is a flattened list of sections that the key was found + in. + + If the section was missing (or a section was expected and a scalar provided + - or vice-versa) then key will be ``None``. + + If the value (or section) was missing then ``result`` will be ``False``. + + If ``validate`` was called with ``preserve_errors=True`` and a value + was present, but failed the check, then ``result`` will be the exception + object returned. You can use this as a string that describes the failure. + + For example *The value "3" is of the wrong type*. + """ + if levels is None: + # first time called + levels = [] + results = [] + if res == True: + return results + if res == False or isinstance(res, Exception): + results.append((levels[:], None, res)) + if levels: + levels.pop() + return results + for (key, val) in res.items(): + if val == True: + continue + if isinstance(cfg.get(key), dict): + # Go down one level + levels.append(key) + flatten_errors(cfg[key], val, levels, results) + continue + results.append((levels[:], key, val)) + # + # Go up one level + if levels: + levels.pop() + # + return results + + +def get_extra_values(conf, _prepend=()): + """ + Find all the values and sections not in the configspec from a validated + ConfigObj. + + ``get_extra_values`` returns a list of tuples where each tuple represents + either an extra section, or an extra value. + + The tuples contain two values, a tuple representing the section the value + is in and the name of the extra values. For extra values in the top level + section the first member will be an empty tuple. For values in the 'foo' + section the first member will be ``('foo',)``. For members in the 'bar' + subsection of the 'foo' section the first member will be ``('foo', 'bar')``. + + NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't + been validated it will return an empty list. + """ + out = [] + + out.extend([(_prepend, name) for name in conf.extra_values]) + for name in conf.sections: + if name not in conf.extra_values: + out.extend(get_extra_values(conf[name], _prepend + (name,))) + return out + + +"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py new file mode 100644 index 00000000..fd4a4148 --- /dev/null +++ b/python/pkg/cdec/sa/__init__.py @@ -0,0 +1,4 @@ +from cdec.sa._sa import sym_fromstring,\ + SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\ + HieroCachingRuleFactory, Sampler +from cdec.sa.extractor import GrammarExtractor diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py new file mode 100644 index 00000000..30e605a6 --- /dev/null +++ b/python/pkg/cdec/sa/compile.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import argparse +import os +import logging +import cdec.configobj +import cdec.sa + +MAX_PHRASE_LENGTH = 4 +def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): + lcp = cdec.sa.LCP(f_sa) + stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True) + precomp = cdec.sa.Precomputation(from_stats=stats, + fsarray=f_sa, + precompute_rank=rank1, + precompute_secondary_rank=rank2, + max_length=max_len, + max_nonterminals=max_nt, + train_max_initial_size=max_size, + train_min_gap_size=min_gap) + return precomp + +def main(): + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger('cdec.sa.compile') + parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.') + parser.add_argument('--maxnt', '-n', type=int, default=2, + help='Maximum number of non-terminal symbols') + parser.add_argument('--maxlen', '-l', type=int, default=5, + help='Maximum number of terminals') + parser.add_argument('--maxsize', '-s', type=int, default=15, + help='Maximum rule span') + parser.add_argument('--mingap', '-g', type=int, default=1, + help='Minimum gap size') + parser.add_argument('--rank1', '-r1', type=int, default=100, + help='Number of pre-computed frequent patterns') + parser.add_argument('--rank2', '-r2', type=int, default=10, + help='Number of pre-computed super-frequent patterns)') + parser.add_argument('-c', '--config', default='/dev/stdout', + help='Output configuration') + parser.add_argument('-o', '--output', required=True, + help='Output path') + parser.add_argument('-f', '--source', required=True, + help='Source language corpus') + parser.add_argument('-e', '--target', required=True, + help='Target language corpus') + parser.add_argument('-a', '--alignment', required=True, + help='Bitext word alignment') + args = parser.parse_args() + + param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") + params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) + + if not os.path.exists(args.output): + os.mkdir(args.output) + + f_sa_bin = os.path.join(args.output, 'f.sa.bin') + e_bin = os.path.join(args.output, 'e.bin') + precomp_file = 'precomp.{0}.{1}.{2}.{3}.{4}.{5}.bin'.format(*params) + precomp_bin = os.path.join(args.output, precomp_file) + a_bin = os.path.join(args.output, 'a.bin') + lex_bin = os.path.join(args.output, 'lex.bin') + + logger.info('Compiling source suffix array') + f_sa = cdec.sa.SuffixArray(from_text=args.source) + f_sa.write_binary(f_sa_bin) + + logger.info('Compiling target data array') + e = cdec.sa.DataArray(from_text=args.target) + e.write_binary(e_bin) + + logger.info('Precomputing frequent phrases') + precompute(f_sa, *params).write_binary(precomp_bin) + + logger.info('Compiling alignment') + a = cdec.sa.Alignment(from_text=args.alignment) + a.write_binary(a_bin) + + logger.info('Compiling bilexical dictionary') + lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa) + lex.write_binary(lex_bin) + + # Write configuration + config = cdec.configobj.ConfigObj(args.config, unrepr=True) + config['f_sa_file'] = f_sa_bin + config['e_file'] = e_bin + config['a_file'] = a_bin + config['lex_file'] = lex_bin + config['precompute_file'] = precomp_bin + for name, value in zip(param_names, params): + config[name] = value + config.write() + +if __name__ == '__main__': + main() diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py new file mode 100644 index 00000000..918aa3bb --- /dev/null +++ b/python/pkg/cdec/sa/extract.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +import sys +import os +import argparse +import logging +import cdec.sa + +def main(): + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.') + parser.add_argument('-c', '--config', required=True, + help='Extractor configuration') + parser.add_argument('-g', '--grammars', required=True, + help='Grammar output path') + args = parser.parse_args() + + if not os.path.exists(args.grammars): + os.mkdir(args.grammars) + + extractor = cdec.sa.GrammarExtractor(args.config) + for i, sentence in enumerate(sys.stdin): + sentence = sentence[:-1] + grammar_file = os.path.join(args.grammars, 'grammar.{0}'.format(i)) + with open(grammar_file, 'w') as output: + for rule in extractor.grammar(sentence): + output.write(str(rule)+'\n') + grammar_file = os.path.abspath(grammar_file) + print('{1}'.format(grammar_file, sentence)) + +if __name__ == '__main__': + main() diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py new file mode 100644 index 00000000..bb912e16 --- /dev/null +++ b/python/pkg/cdec/sa/extractor.py @@ -0,0 +1,78 @@ +from itertools import chain +import os +import cdec.configobj +from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\ + MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE +import cdec.sa + +# maximum span of a grammar rule in TEST DATA +MAX_INITIAL_SIZE = 15 + +class GrammarExtractor: + def __init__(self, config): + if isinstance(config, str) or isinstance(config, unicode): + if not os.path.exists(config): + raise IOError('cannot read configuration from {0}'.format(config)) + config = cdec.configobj.ConfigObj(config, unrepr=True) + alignment = cdec.sa.Alignment(from_binary=config['a_file']) + self.factory = cdec.sa.HieroCachingRuleFactory( + # compiled alignment object (REQUIRED) + alignment, + # name of generic nonterminal used by Hiero + category="[X]", + # maximum number of contiguous chunks of terminal symbols in RHS of a rule + max_chunks=config['max_nt']+1, + # maximum span of a grammar rule in TEST DATA + max_initial_size=MAX_INITIAL_SIZE, + # maximum number of symbols (both T and NT) allowed in a rule + max_length=config['max_len'], + # maximum number of nonterminals allowed in a rule (set >2 at your own risk) + max_nonterminals=config['max_nt'], + # maximum number of contiguous chunks of terminal symbols + # in target-side RHS of a rule. + max_target_chunks=config['max_nt']+1, + # maximum number of target side symbols (both T and NT) allowed in a rule. + max_target_length=MAX_INITIAL_SIZE, + # minimum span of a nonterminal in the RHS of a rule in TEST DATA + min_gap_size=1, + # filename of file containing precomputed collocations + precompute_file=config['precompute_file'], + # maximum frequency rank of patterns used to compute triples (< 20) + precompute_secondary_rank=config['rank2'], + # maximum frequency rank of patterns used to compute collocations (< 300) + precompute_rank=config['rank1'], + # require extracted rules to have at least one aligned word + require_aligned_terminal=True, + # require each contiguous chunk of extracted rules + # to have at least one aligned word + require_aligned_chunks=False, + # maximum span of a grammar rule extracted from TRAINING DATA + train_max_initial_size=config['max_size'], + # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA + train_min_gap_size=config['min_gap'], + # True if phrases should be tight, False otherwise (better but slower) + tight_phrases=True, + ) + + # lexical weighting tables + tt = cdec.sa.BiLex(from_binary=config['lex_file']) + + self.models = (EgivenFCoherent, SampleCountF, CountEF, + MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE) + + fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file']) + edarray = cdec.sa.DataArray(from_binary=config['e_file']) + + # lower=faster, higher=better; improvements level off above 200-300 range, + # -1 = don't sample, use all data (VERY SLOW!) + sampler = cdec.sa.Sampler(300, fsarray) + + self.factory.configure(fsarray, edarray, sampler) + + def grammar(self, sentence): + if isinstance(sentence, unicode): + sentence = sentence.encode('utf8') + cnet = chain(('',), sentence.split(), ('',)) + cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet) + cnet = tuple(((word, None, 1), ) for word in cnet) + return self.factory.input(cnet, self.models) diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py new file mode 100644 index 00000000..325b9e13 --- /dev/null +++ b/python/pkg/cdec/sa/features.py @@ -0,0 +1,57 @@ +from __future__ import division +import math + +MAXSCORE = 99 + +def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) + return -math.log10(paircount/fcount) + +def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + paircount) + +def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + fsample_count) + +def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): + prob = paircount/fsample_count + return -math.log10(prob) if prob > 0 else MAXSCORE + +def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): + return -math.log10(fcount/fsample_count) + +def MaxLexEgivenF(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + fwords = fphrase.words + fwords.append('NULL') + def score(): + for e in ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def MaxLexFgivenE(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + ewords = ephrase.words + ewords.append('NULL') + def score(): + for f in fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount == 1) + +def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount == 1) + +def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount > 1) + +def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 1) + +def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 0.01) diff --git a/python/pkg/cdec/score.py b/python/pkg/cdec/score.py new file mode 100644 index 00000000..22257774 --- /dev/null +++ b/python/pkg/cdec/score.py @@ -0,0 +1 @@ +from _cdec import BLEU, TER, CER, Metric diff --git a/python/setup.py b/python/setup.py index 1d1d7e45..7be976e8 100644 --- a/python/setup.py +++ b/python/setup.py @@ -48,5 +48,6 @@ setup( name='cdec', ext_modules=ext_modules, requires=['configobj'], - packages=['cdec', 'cdec.sa'] + packages=['cdec', 'cdec.sa'], + package_dir={'': 'pkg'} ) diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index 34f170bf..b7f3627a 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.beta1 on Fri Jul 27 22:15:31 2012 */ +/* Generated by Cython 0.17.beta1 on Fri Jul 27 23:31:04 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -2013,7 +2013,7 @@ static int __pyx_pf_3_sa_8Alphabet___cinit__(struct __pyx_obj_3_sa_Alphabet *__p static void __pyx_pf_3_sa_8Alphabet_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_3_sa_Alphabet *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_3_sa_8Alphabet_9terminals___get__(struct __pyx_obj_3_sa_Alphabet *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_3_sa_8Alphabet_12nonterminals___get__(struct __pyx_obj_3_sa_Alphabet *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_3_sa_2sym_fromstring(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_terminal); /* proto */ +static PyObject *__pyx_pf_3_sa_2sym_fromstring(CYTHON_UNUSED PyObject *__pyx_self, char *__pyx_v_string, int __pyx_v_terminal); /* proto */ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, PyObject *__pyx_v_words); /* proto */ static void __pyx_pf_3_sa_6Phrase_2__dealloc__(struct __pyx_obj_3_sa_Phrase *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__pyx_v_self); /* proto */ @@ -21750,7 +21750,7 @@ static int __pyx_f_3_sa_sym_setindex(int __pyx_v_sym, int __pyx_v_id) { * cdef int sym_setindex(int sym, int id): * return ALPHABET.setindex(sym, id) # <<<<<<<<<<<<<< * - * def sym_fromstring(bytes string, bint terminal): + * def sym_fromstring(char* string, bint terminal): */ __pyx_r = ((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_3_sa_ALPHABET->__pyx_vtab)->setindex(__pyx_v_3_sa_ALPHABET, __pyx_v_sym, __pyx_v_id); goto __pyx_L0; @@ -21765,7 +21765,7 @@ static int __pyx_f_3_sa_sym_setindex(int __pyx_v_sym, int __pyx_v_id) { static PyObject *__pyx_pw_3_sa_3sym_fromstring(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyMethodDef __pyx_mdef_3_sa_3sym_fromstring = {__Pyx_NAMESTR("sym_fromstring"), (PyCFunction)__pyx_pw_3_sa_3sym_fromstring, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}; static PyObject *__pyx_pw_3_sa_3sym_fromstring(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_string = 0; + char *__pyx_v_string; int __pyx_v_terminal; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations @@ -21802,7 +21802,7 @@ static PyObject *__pyx_pw_3_sa_3sym_fromstring(PyObject *__pyx_self, PyObject *_ values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); } - __pyx_v_string = ((PyObject*)values[0]); + __pyx_v_string = PyBytes_AsString(values[0]); if (unlikely((!__pyx_v_string) && PyErr_Occurred())) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_terminal = __Pyx_PyObject_IsTrue(values[1]); if (unlikely((__pyx_v_terminal == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; @@ -21813,12 +21813,7 @@ static PyObject *__pyx_pw_3_sa_3sym_fromstring(PyObject *__pyx_self, PyObject *_ __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyBytes_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_3_sa_2sym_fromstring(__pyx_self, __pyx_v_string, __pyx_v_terminal); - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } @@ -21826,15 +21821,14 @@ static PyObject *__pyx_pw_3_sa_3sym_fromstring(PyObject *__pyx_self, PyObject *_ /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104 * return ALPHABET.setindex(sym, id) * - * def sym_fromstring(bytes string, bint terminal): # <<<<<<<<<<<<<< + * def sym_fromstring(char* string, bint terminal): # <<<<<<<<<<<<<< * return ALPHABET.fromstring(string, terminal) */ -static PyObject *__pyx_pf_3_sa_2sym_fromstring(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_terminal) { +static PyObject *__pyx_pf_3_sa_2sym_fromstring(CYTHON_UNUSED PyObject *__pyx_self, char *__pyx_v_string, int __pyx_v_terminal) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations - char *__pyx_t_1; - PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_1 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -21842,21 +21836,20 @@ static PyObject *__pyx_pf_3_sa_2sym_fromstring(CYTHON_UNUSED PyObject *__pyx_sel /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":105 * - * def sym_fromstring(bytes string, bint terminal): + * def sym_fromstring(char* string, bint terminal): * return ALPHABET.fromstring(string, terminal) # <<<<<<<<<<<<<< */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_AsString(((PyObject *)__pyx_v_string)); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyInt_FromLong(((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_3_sa_ALPHABET->__pyx_vtab)->fromstring(__pyx_v_3_sa_ALPHABET, __pyx_t_1, __pyx_v_terminal)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; + __pyx_t_1 = PyInt_FromLong(((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_3_sa_ALPHABET->__pyx_vtab)->fromstring(__pyx_v_3_sa_ALPHABET, __pyx_v_string, __pyx_v_terminal)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("_sa.sym_fromstring", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -60511,7 +60504,7 @@ static int __Pyx_InitCachedConstants(void) { /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104 * return ALPHABET.setindex(sym, id) * - * def sym_fromstring(bytes string, bint terminal): # <<<<<<<<<<<<<< + * def sym_fromstring(char* string, bint terminal): # <<<<<<<<<<<<<< * return ALPHABET.fromstring(string, terminal) */ __pyx_k_tuple_137 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_137)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -60933,7 +60926,7 @@ PyMODINIT_FUNC PyInit__sa(void) /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104 * return ALPHABET.setindex(sym, id) * - * def sym_fromstring(bytes string, bint terminal): # <<<<<<<<<<<<<< + * def sym_fromstring(char* string, bint terminal): # <<<<<<<<<<<<<< * return ALPHABET.fromstring(string, terminal) */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_3_sa_3sym_fromstring, NULL, __pyx_n_s___sa); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} diff --git a/python/src/sa/sym.pxi b/python/src/sa/sym.pxi index 4b41886f..132925f6 100644 --- a/python/src/sa/sym.pxi +++ b/python/src/sa/sym.pxi @@ -101,5 +101,5 @@ cdef int sym_getindex(int sym): cdef int sym_setindex(int sym, int id): return ALPHABET.setindex(sym, id) -def sym_fromstring(bytes string, bint terminal): +def sym_fromstring(char* string, bint terminal): return ALPHABET.fromstring(string, terminal) -- cgit v1.2.3