diff options
70 files changed, 2775 insertions, 3482 deletions
@@ -76,7 +76,6 @@ dpmert/scorer_test dpmert/sentclient dpmert/sentserver dpmert/union_forests -dtrain/dtrain extools/build_lexical_translation extools/extractor extools/extractor_monolingual @@ -226,6 +225,7 @@ training/dpmert/mr_dpmert_reduce training/dpmert/sentclient training/dpmert/sentserver training/dtrain/dtrain +training/dtrain/example/ training/latent_svm/latent_svm training/minrisk/minrisk_optimize training/mira/ada_opt_sm diff --git a/CMakeLists.txt b/CMakeLists.txt index 19e3cfd4..708ab28f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(cdec) add_definitions(-DKENLM_MAX_ORDER=6 -DHAVE_CONFIG_H) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -set(CMAKE_CXX_FLAGS "-Wall -std=c++11 -O3 -g") +set(CMAKE_CXX_FLAGS "-Wall -std=c++11 -O3 -g -fPIC") set(METEOR_JAR "" CACHE FILEPATH "Path to meteor.jar") enable_testing() @@ -43,6 +43,9 @@ endif(LIBLZMA_FOUND) # for pycdec find_package(PythonInterp 2.7 REQUIRED) +# for pthread +find_package(Threads REQUIRED) + # generate config.h configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 29a1cbea..f39e6000 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -69,10 +69,10 @@ my $CCONFIG = { # 'DefaultJobs' => 12, # }, 'cluster.cl.uni-heidelberg.de' => { - 'HOST_REGEXP' => qr/(node\d\d\.cluster\.lan|cluster\.cl\.uni-heidelberg\.de)/i, + 'HOST_REGEXP' => qr/^(CLuster|node\d\d\.cluster\.lan|cluster\.cl\.uni-heidelberg\.de)$/, 'JobControl' => 'qsub', - 'QSubMemFlag' => '-l mem_free=', - 'DefaultJobs' => 14, + 'QSubMemFlag' => '-l h_vmem=', + 'DefaultJobs' => 16, }, 'LOCAL' => { # LOCAL must be last in the list!!! 'HOST_REGEXP' => qr//, diff --git a/extractor/extract.cc b/extractor/extract.cc index 08f209cc..b16a4e1c 100644 --- a/extractor/extract.cc +++ b/extractor/extract.cc @@ -14,7 +14,6 @@ const unsigned omp_get_num_threads() { return 1; } #endif -#include "filelib.h" #include "alignment.h" #include "data_array.h" #include "features/count_source_target.h" @@ -25,6 +24,7 @@ #include "features/max_lex_target_given_source.h" #include "features/sample_source_count.h" #include "features/target_given_source_coherent.h" +#include "filelib.h" #include "grammar.h" #include "grammar_extractor.h" #include "precomputation.h" diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index 00564a36..81d0d8be 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -24,6 +24,7 @@ #include "features/max_lex_target_given_source.h" #include "features/sample_source_count.h" #include "features/target_given_source_coherent.h" +#include "filelib.h" #include "grammar.h" #include "grammar_extractor.h" #include "precomputation.h" @@ -41,8 +42,8 @@ using namespace extractor; using namespace features; // Returns the file path in which a given grammar should be written. -fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number) { - string file_name = "grammar." + to_string(file_number); +fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number, bool use_zip) { + string file_name = "grammar." + to_string(file_number) + (use_zip ? ".gz" : ""); return grammar_path / file_name; } @@ -61,6 +62,7 @@ int main(int argc, char** argv) { ("bitext,b", po::value<string>(), "Parallel text (source ||| target)") ("alignment,a", po::value<string>()->required(), "Bitext word alignment") ("grammars,g", po::value<string>()->required(), "Grammars output path") + ("gzip,z", "Gzip grammars") ("threads,t", po::value<int>()->default_value(1), threads_option.c_str()) ("frequent", po::value<int>()->default_value(100), "Number of precomputed frequent patterns") @@ -205,6 +207,7 @@ int main(int argc, char** argv) { vm["max_rule_symbols"].as<int>(), vm["max_samples"].as<int>(), vm["tight_phrases"].as<bool>()); + const bool use_zip = vm.count("gzip"); // Creates the grammars directory if it doesn't exist. fs::path grammar_path = vm["grammars"].as<string>(); @@ -239,12 +242,12 @@ int main(int argc, char** argv) { } Grammar grammar = extractor.GetGrammar( sentences[i], blacklisted_sentence_ids); - ofstream output(GetGrammarFilePath(grammar_path, i).c_str()); - output << grammar; + WriteFile wf(GetGrammarFilePath(grammar_path, i, use_zip).c_str()); + *wf.stream() << grammar; } for (size_t i = 0; i < sentences.size(); ++i) { - cout << "<seg grammar=" << GetGrammarFilePath(grammar_path, i) << " id=\"" + cout << "<seg grammar=" << GetGrammarFilePath(grammar_path, i, use_zip) << " id=\"" << i << "\"> " << sentences[i] << " </seg> " << suffixes[i] << endl; } diff --git a/klm/lm/builder/CMakeLists.txt b/klm/lm/builder/CMakeLists.txt index e52875cb..5da01261 100644 --- a/klm/lm/builder/CMakeLists.txt +++ b/klm/lm/builder/CMakeLists.txt @@ -24,8 +24,9 @@ set(lmplz_SRCS sort.hh) add_executable(lmplz ${lmplz_SRCS}) -target_link_libraries(lmplz klm klm_util_double klm_util_stream klm_util ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${LIBLZMA_LIBRARIES}) +target_link_libraries(lmplz klm klm_util_double klm_util_stream klm_util ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${LIBLZMA_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) set(dump_counts_SRCS print.cc dump_counts_main.cc) add_executable(dump_counts ${dump_counts_SRCS}) -target_link_libraries(dump_counts klm klm_util_double klm_util_stream klm_util ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${LIBLZMA_LIBRARIES}) +target_link_libraries(dump_counts klm klm_util_double klm_util_stream klm_util ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${LIBLZMA_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) + diff --git a/python/README.md b/python/README.md index 03d9f31d..75207125 100644 --- a/python/README.md +++ b/python/README.md @@ -16,6 +16,10 @@ To re-build pycdec from the cython source, modify setup.py in the following ways Then just build and install normally, as described above. +To rebuild cdec/\_cdec.cpp, run: + + cython --cplus \_cdec.pyx + ## Grammar extractor Compile a parallel corpus and a word alignment into a suffix array representation: diff --git a/python/cdec/_cdec.cpp b/python/cdec/_cdec.cpp index e76eb468..8bfeb540 100644 --- a/python/cdec/_cdec.cpp +++ b/python/cdec/_cdec.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.21 */ +/* Generated by Cython 0.22 */ #define PY_SSIZE_T_CLEAN #ifndef CYTHON_USE_PYLONG_INTERNALS @@ -19,7 +19,7 @@ #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) #error Cython requires Python 2.6+ or Python 3.2+. #else -#define CYTHON_ABI "0_21" +#define CYTHON_ABI "0_22" #include <stddef.h> #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) @@ -54,7 +54,7 @@ #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_CPYTHON 1 #endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) #define Py_OptimizeFlag 0 #endif #define __PYX_BUILD_PY_SSIZE_T "n" @@ -73,8 +73,6 @@ #if PY_MAJOR_VERSION >= 3 #define Py_TPFLAGS_CHECKTYPES 0 #define Py_TPFLAGS_HAVE_INDEX 0 -#endif -#if PY_MAJOR_VERSION >= 3 #define Py_TPFLAGS_HAVE_NEWBUFFER 0 #endif #if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE) @@ -101,10 +99,12 @@ #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) + #define __Pyx_PyFrozenSet_Size(s) PyObject_Size(s) #else #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) + #define __Pyx_PyFrozenSet_Size(s) PySet_Size(s) #endif #define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) #define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) @@ -151,6 +151,11 @@ #if PY_MAJOR_VERSION >= 3 #define PyBoolObject PyLongObject #endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif #if PY_VERSION_HEX < 0x030200A4 typedef long Py_hash_t; #define __Pyx_PyInt_FromHash_t PyInt_FromLong @@ -160,7 +165,9 @@ #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t #endif #if PY_MAJOR_VERSION >= 3 - #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) + #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) +#else + #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) #endif #ifndef CYTHON_INLINE #if defined(__GNUC__) @@ -196,11 +203,22 @@ static CYTHON_INLINE float __PYX_NAN() { return value; } #endif +#define __Pyx_void_to_None(void_result) (void_result, Py_INCREF(Py_None), Py_None) #ifdef __cplusplus template<typename T> void __Pyx_call_destructor(T* x) { x->~T(); } +template<typename T> +class __Pyx_FakeReference { + public: + __Pyx_FakeReference() : ptr(NULL) { } + __Pyx_FakeReference(T& ref) : ptr(&ref) { } + T *operator->() { return ptr; } + operator T&() { return *ptr; } + private: + T *ptr; +}; #endif @@ -320,11 +338,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); #endif #define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((const char*)s) -#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((const char*)s) -#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((const char*)s) -#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((const char*)s) -#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) #if PY_MAJOR_VERSION < 3 static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { @@ -360,7 +378,7 @@ static int __Pyx_init_sys_getdefaultencoding_params(void) { const char* default_encoding_c; sys = PyImport_ImportModule("sys"); if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); Py_DECREF(sys); if (!default_encoding) goto bad; default_encoding_c = PyBytes_AsString(default_encoding); @@ -453,7 +471,7 @@ static const char *__pyx_f[] = { "cdec/hypergraph.pxi", "cdec/lattice.pxi", "cdec/mteval.pxi", - "stringsource", + "cdec/stringsource", "cdec/sa/_sa.pxd", }; @@ -609,7 +627,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_Rule { }; -/* "cdec/vectors.pxi":3 +/* "vectors.pxi":3 * from cython.operator cimport preincrement as pinc * * cdef class DenseVector: # <<<<<<<<<<<<<< @@ -623,7 +641,7 @@ struct __pyx_obj_4cdec_5_cdec_DenseVector { }; -/* "cdec/vectors.pxi":48 +/* "vectors.pxi":48 * return sparse * * cdef class SparseVector: # <<<<<<<<<<<<<< @@ -636,7 +654,7 @@ struct __pyx_obj_4cdec_5_cdec_SparseVector { }; -/* "cdec/grammar.pxi":8 +/* "grammar.pxi":8 * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) * * cdef class NT: # <<<<<<<<<<<<<< @@ -650,7 +668,7 @@ struct __pyx_obj_4cdec_5_cdec_NT { }; -/* "cdec/grammar.pxi":21 +/* "grammar.pxi":21 * return '[%s]' % self.cat * * cdef class NTRef: # <<<<<<<<<<<<<< @@ -663,7 +681,7 @@ struct __pyx_obj_4cdec_5_cdec_NTRef { }; -/* "cdec/grammar.pxi":49 +/* "grammar.pxi":49 * return TRule(lhs, f, e, scores, a) * * cdef class TRule: # <<<<<<<<<<<<<< @@ -676,7 +694,7 @@ struct __pyx_obj_4cdec_5_cdec_TRule { }; -/* "cdec/grammar.pxi":183 +/* "grammar.pxi":183 * _phrase(self.f), _phrase(self.e), scores) * * cdef class MRule(TRule): # <<<<<<<<<<<<<< @@ -688,7 +706,7 @@ struct __pyx_obj_4cdec_5_cdec_MRule { }; -/* "cdec/grammar.pxi":199 +/* "grammar.pxi":199 * super(MRule, self).__init__(lhs, rhs, e, scores, None) * * cdef class Grammar: # <<<<<<<<<<<<<< @@ -701,7 +719,7 @@ struct __pyx_obj_4cdec_5_cdec_Grammar { }; -/* "cdec/grammar.pxi":223 +/* "grammar.pxi":223 * self.grammar.get().SetGrammarName(name) * * cdef class TextGrammar(Grammar): # <<<<<<<<<<<<<< @@ -713,7 +731,7 @@ struct __pyx_obj_4cdec_5_cdec_TextGrammar { }; -/* "cdec/hypergraph.pxi":4 +/* "hypergraph.pxi":4 * cimport kbest * * cdef class Hypergraph: # <<<<<<<<<<<<<< @@ -728,7 +746,7 @@ struct __pyx_obj_4cdec_5_cdec_Hypergraph { }; -/* "cdec/hypergraph.pxi":259 +/* "hypergraph.pxi":259 * return vector * * cdef class HypergraphEdge: # <<<<<<<<<<<<<< @@ -744,7 +762,7 @@ struct __pyx_obj_4cdec_5_cdec_HypergraphEdge { }; -/* "cdec/hypergraph.pxi":309 +/* "hypergraph.pxi":309 * raise NotImplemented('comparison not implemented for HypergraphEdge') * * cdef class HypergraphNode: # <<<<<<<<<<<<<< @@ -759,7 +777,7 @@ struct __pyx_obj_4cdec_5_cdec_HypergraphNode { }; -/* "cdec/lattice.pxi":3 +/* "lattice.pxi":3 * cimport lattice * * cdef class Lattice: # <<<<<<<<<<<<<< @@ -772,7 +790,7 @@ struct __pyx_obj_4cdec_5_cdec_Lattice { }; -/* "cdec/mteval.pxi":12 +/* "mteval.pxi":12 * return stats * * cdef class Candidate: # <<<<<<<<<<<<<< @@ -786,7 +804,7 @@ struct __pyx_obj_4cdec_5_cdec_Candidate { }; -/* "cdec/mteval.pxi":26 +/* "mteval.pxi":26 * return fmap * * cdef class SufficientStats: # <<<<<<<<<<<<<< @@ -800,7 +818,7 @@ struct __pyx_obj_4cdec_5_cdec_SufficientStats { }; -/* "cdec/mteval.pxi":68 +/* "mteval.pxi":68 * return result * * cdef class CandidateSet: # <<<<<<<<<<<<<< @@ -815,7 +833,7 @@ struct __pyx_obj_4cdec_5_cdec_CandidateSet { }; -/* "cdec/mteval.pxi":103 +/* "mteval.pxi":103 * self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) * * cdef class SegmentEvaluator: # <<<<<<<<<<<<<< @@ -829,7 +847,7 @@ struct __pyx_obj_4cdec_5_cdec_SegmentEvaluator { }; -/* "cdec/mteval.pxi":124 +/* "mteval.pxi":124 * return CandidateSet(self) * * cdef class Scorer: # <<<<<<<<<<<<<< @@ -843,7 +861,7 @@ struct __pyx_obj_4cdec_5_cdec_Scorer { }; -/* "cdec/mteval.pxi":179 +/* "mteval.pxi":179 * out.fields[i] = ss[i] * * cdef class Metric: # <<<<<<<<<<<<<< @@ -870,7 +888,7 @@ struct __pyx_obj_4cdec_5_cdec_Decoder { }; -/* "cdec/vectors.pxi":32 +/* "vectors.pxi":32 * self.vector[0][fid] = value * * def __iter__(self): # <<<<<<<<<<<<<< @@ -886,7 +904,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct____iter__ { }; -/* "cdec/vectors.pxi":72 +/* "vectors.pxi":72 * self.vector.set_value(fid, value) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -903,7 +921,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_1___iter__ { }; -/* "cdec/grammar.pxi":5 +/* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -916,7 +934,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_2__phrase { }; -/* "cdec/grammar.pxi":6 +/* "grammar.pxi":6 * * def _phrase(phrase): * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) # <<<<<<<<<<<<<< @@ -933,7 +951,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_3_genexpr { }; -/* "cdec/grammar.pxi":137 +/* "grammar.pxi":137 * * property a: * def __get__(self): # <<<<<<<<<<<<<< @@ -950,7 +968,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_4___get__ { }; -/* "cdec/grammar.pxi":178 +/* "grammar.pxi":178 * self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) * * def __str__(self): # <<<<<<<<<<<<<< @@ -963,7 +981,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_5___str__ { }; -/* "cdec/grammar.pxi":179 +/* "grammar.pxi":179 * * def __str__(self): * scores = ' '.join('%s=%s' % feat for feat in self.scores) # <<<<<<<<<<<<<< @@ -980,7 +998,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_6_genexpr { }; -/* "cdec/grammar.pxi":205 +/* "grammar.pxi":205 * del self.grammar * * def __iter__(self): # <<<<<<<<<<<<<< @@ -999,7 +1017,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_7___iter__ { }; -/* "cdec/hypergraph.pxi":49 +/* "hypergraph.pxi":49 * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') * * def kbest(self, size): # <<<<<<<<<<<<<< @@ -1018,7 +1036,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_8_kbest { }; -/* "cdec/hypergraph.pxi":62 +/* "hypergraph.pxi":62 * del derivations * * def kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -1041,7 +1059,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_9_kbest_trees { }; -/* "cdec/hypergraph.pxi":81 +/* "hypergraph.pxi":81 * del e_derivations * * def kbest_features(self, size): # <<<<<<<<<<<<<< @@ -1061,7 +1079,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_10_kbest_features { }; -/* "cdec/hypergraph.pxi":97 +/* "hypergraph.pxi":97 * del derivations * * def unique_kbest(self, size): # <<<<<<<<<<<<<< @@ -1080,7 +1098,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_11_unique_kbest { }; -/* "cdec/hypergraph.pxi":110 +/* "hypergraph.pxi":110 * del derivations * * def unique_kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -1103,7 +1121,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_12_unique_kbest_trees { }; -/* "cdec/hypergraph.pxi":129 +/* "hypergraph.pxi":129 * del e_derivations * * def unique_kbest_features(self, size): # <<<<<<<<<<<<<< @@ -1123,7 +1141,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_13_unique_kbest_features { }; -/* "cdec/hypergraph.pxi":145 +/* "hypergraph.pxi":145 * del derivations * * def sample(self, unsigned n): # <<<<<<<<<<<<<< @@ -1141,7 +1159,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_14_sample { }; -/* "cdec/hypergraph.pxi":156 +/* "hypergraph.pxi":156 * del hypos * * def sample_hypotheses(self, unsigned n): # <<<<<<<<<<<<<< @@ -1160,7 +1178,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_15_sample_hypotheses { }; -/* "cdec/hypergraph.pxi":171 +/* "hypergraph.pxi":171 * del hypos * * def sample_trees(self, unsigned n): # <<<<<<<<<<<<<< @@ -1178,7 +1196,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_16_sample_trees { }; -/* "cdec/hypergraph.pxi":224 +/* "hypergraph.pxi":224 * * property edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -1194,7 +1212,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_17___get__ { }; -/* "cdec/hypergraph.pxi":230 +/* "hypergraph.pxi":230 * * property nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -1210,7 +1228,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_18___get__ { }; -/* "cdec/hypergraph.pxi":279 +/* "hypergraph.pxi":279 * * property tail_nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -1226,7 +1244,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_19___get__ { }; -/* "cdec/hypergraph.pxi":323 +/* "hypergraph.pxi":323 * * property in_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -1242,7 +1260,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_20___get__ { }; -/* "cdec/hypergraph.pxi":329 +/* "hypergraph.pxi":329 * * property out_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -1258,7 +1276,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_21___get__ { }; -/* "cdec/lattice.pxi":56 +/* "lattice.pxi":64 * return unicode(str(self), 'utf8') * * def __iter__(self): # <<<<<<<<<<<<<< @@ -1274,7 +1292,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_22___iter__ { }; -/* "cdec/lattice.pxi":61 +/* "lattice.pxi":69 * yield self[i] * * def todot(self): # <<<<<<<<<<<<<< @@ -1287,7 +1305,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_23_todot { }; -/* "cdec/lattice.pxi":63 +/* "lattice.pxi":71 * def todot(self): * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): # <<<<<<<<<<<<<< @@ -1310,7 +1328,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_24_lines { }; -/* "cdec/mteval.pxi":47 +/* "mteval.pxi":47 * return self.stats.size() * * def __iter__(self): # <<<<<<<<<<<<<< @@ -1326,7 +1344,7 @@ struct __pyx_obj_4cdec_5_cdec___pyx_scope_struct_25___iter__ { }; -/* "cdec/mteval.pxi":93 +/* "mteval.pxi":93 * return candidate * * def __iter__(self): # <<<<<<<<<<<<<< @@ -1447,7 +1465,7 @@ struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase { static struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase *__pyx_vtabptr_4cdec_2sa_3_sa_Phrase; -/* "cdec/hypergraph.pxi":4 +/* "hypergraph.pxi":4 * cimport kbest * * cdef class Hypergraph: # <<<<<<<<<<<<<< @@ -1461,7 +1479,7 @@ struct __pyx_vtabstruct_4cdec_5_cdec_Hypergraph { static struct __pyx_vtabstruct_4cdec_5_cdec_Hypergraph *__pyx_vtabptr_4cdec_5_cdec_Hypergraph; -/* "cdec/hypergraph.pxi":259 +/* "hypergraph.pxi":259 * return vector * * cdef class HypergraphEdge: # <<<<<<<<<<<<<< @@ -1475,7 +1493,7 @@ struct __pyx_vtabstruct_4cdec_5_cdec_HypergraphEdge { static struct __pyx_vtabstruct_4cdec_5_cdec_HypergraphEdge *__pyx_vtabptr_4cdec_5_cdec_HypergraphEdge; -/* "cdec/hypergraph.pxi":309 +/* "hypergraph.pxi":309 * raise NotImplemented('comparison not implemented for HypergraphEdge') * * cdef class HypergraphNode: # <<<<<<<<<<<<<< @@ -1487,6 +1505,8 @@ struct __pyx_vtabstruct_4cdec_5_cdec_HypergraphNode { PyObject *(*init)(struct __pyx_obj_4cdec_5_cdec_HypergraphNode *, Hypergraph *, unsigned int); }; static struct __pyx_vtabstruct_4cdec_5_cdec_HypergraphNode *__pyx_vtabptr_4cdec_5_cdec_HypergraphNode; + +/* --- Runtime support code (head) --- */ #ifndef CYTHON_REFNANNY #define CYTHON_REFNANNY 0 #endif @@ -1845,14 +1865,12 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename); -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); - -static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); - static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); +static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); + #ifndef __Pyx_CppExn2PyErr #include <new> #include <typeinfo> @@ -1892,6 +1910,8 @@ static void __Pyx_CppExn2PyErr() { } #endif +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); @@ -2049,7 +2069,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_stats(PyObject *, PyObject *); /*proto*/ static float __pyx_f_4cdec_5_cdec__compute_score(void *, SufficientStats *); /*proto*/ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *, std::string *, std::vector<std::string> *, SufficientStats *); /*proto*/ -static std::string __pyx_convert_string_from_py_(PyObject *); /*proto*/ +static std::string __pyx_convert_string_from_py_std__in_string(PyObject *); /*proto*/ #define __Pyx_MODULE_NAME "cdec._cdec" int __pyx_module_is_main_cdec___cdec = 0; @@ -2440,11 +2460,9 @@ static char __pyx_k_Hypergraph_sample_hypotheses[] = "Hypergraph.sample_hypothes static char __pyx_k_cannot_create_lattice_from_s[] = "cannot create lattice from %s"; static char __pyx_k_Cannot_translate_input_type_s[] = "Cannot translate input type %s"; static char __pyx_k_Hypergraph_unique_kbest_trees[] = "Hypergraph.unique_kbest_trees"; -static char __pyx_k_Users_waziz_workspace_mtm14_my[] = "/Users/waziz/workspace/mtm14/my-cdec/python/cdec/lattice.pxi"; static char __pyx_k_cannot_reweight_hypergraph_with[] = "cannot reweight hypergraph with %s"; +static char __pyx_k_home_pks_src_cdec_dtrain_python[] = "/home/pks/src/cdec-dtrain/python/cdec/lattice.pxi"; static char __pyx_k_Hypergraph_unique_kbest_features[] = "Hypergraph.unique_kbest_features"; -static char __pyx_k_Users_waziz_workspace_mtm14_my_2[] = "/Users/waziz/workspace/mtm14/my-cdec/python/cdec/grammar.pxi"; -static char __pyx_k_Users_waziz_workspace_mtm14_my_3[] = "/Users/waziz/workspace/mtm14/my-cdec/python/cdec/_cdec.pyx"; static char __pyx_k_candidate_set_index_out_of_range[] = "candidate set index out of range"; static char __pyx_k_cannot_initialize_weights_with_s[] = "cannot initialize weights with %s"; static char __pyx_k_cannot_intersect_hypergraph_with[] = "cannot intersect hypergraph with %s"; @@ -2453,6 +2471,8 @@ static char __pyx_k_comparison_not_implemented_for_H[] = "comparison not impleme static char __pyx_k_comparison_not_implemented_for_S[] = "comparison not implemented for SparseVector"; static char __pyx_k_sufficient_stats_vector_index_ou[] = "sufficient stats vector index out of range"; static char __pyx_k_the_grammar_should_contain_TRule[] = "the grammar should contain TRule objects"; +static char __pyx_k_home_pks_src_cdec_dtrain_python_2[] = "/home/pks/src/cdec-dtrain/python/cdec/grammar.pxi"; +static char __pyx_k_home_pks_src_cdec_dtrain_python_3[] = "/home/pks/src/cdec-dtrain/python/cdec/_cdec.pyx"; static char __pyx_k_comparison_not_implemented_for_H_2[] = "comparison not implemented for HypergraphNode"; static PyObject *__pyx_n_s_BLEU; static PyObject *__pyx_n_s_CER; @@ -2488,9 +2508,6 @@ static PyObject *__pyx_n_s_SufficientStats___iter; static PyObject *__pyx_n_s_TER; static PyObject *__pyx_n_s_TRule___get; static PyObject *__pyx_n_s_TypeError; -static PyObject *__pyx_kp_s_Users_waziz_workspace_mtm14_my; -static PyObject *__pyx_kp_s_Users_waziz_workspace_mtm14_my_2; -static PyObject *__pyx_kp_s_Users_waziz_workspace_mtm14_my_3; static PyObject *__pyx_n_s_ValueError; static PyObject *__pyx_kp_s__10; static PyObject *__pyx_kp_s__11; @@ -2548,6 +2565,9 @@ static PyObject *__pyx_n_s_genexpr; static PyObject *__pyx_n_s_get; static PyObject *__pyx_n_s_get_2; static PyObject *__pyx_n_s_grammar; +static PyObject *__pyx_kp_s_home_pks_src_cdec_dtrain_python; +static PyObject *__pyx_kp_s_home_pks_src_cdec_dtrain_python_2; +static PyObject *__pyx_kp_s_home_pks_src_cdec_dtrain_python_3; static PyObject *__pyx_n_s_hyp; static PyObject *__pyx_n_s_hypergraph; static PyObject *__pyx_n_s_i; @@ -2836,7 +2856,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_as_str(PyObject *__pyx_v_data, struct __py return __pyx_r; } -/* "cdec/vectors.pxi":7 +/* "vectors.pxi":7 * cdef bint owned # if True, do not manage memory * * def __init__(self): # <<<<<<<<<<<<<< @@ -2873,7 +2893,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector___init__(struct __pyx_obj_4cdec_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/vectors.pxi":9 + /* "vectors.pxi":9 * def __init__(self): * """DenseVector() -> Dense weight/feature vector.""" * self.vector = new vector[weight_t]() # <<<<<<<<<<<<<< @@ -2888,7 +2908,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector___init__(struct __pyx_obj_4cdec_5 } __pyx_v_self->vector = __pyx_t_1; - /* "cdec/vectors.pxi":10 + /* "vectors.pxi":10 * """DenseVector() -> Dense weight/feature vector.""" * self.vector = new vector[weight_t]() * self.owned = False # <<<<<<<<<<<<<< @@ -2897,7 +2917,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector___init__(struct __pyx_obj_4cdec_5 */ __pyx_v_self->owned = 0; - /* "cdec/vectors.pxi":7 + /* "vectors.pxi":7 * cdef bint owned # if True, do not manage memory * * def __init__(self): # <<<<<<<<<<<<<< @@ -2916,7 +2936,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector___init__(struct __pyx_obj_4cdec_5 return __pyx_r; } -/* "cdec/vectors.pxi":12 +/* "vectors.pxi":12 * self.owned = False * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -2940,7 +2960,7 @@ static void __pyx_pf_4cdec_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_4c int __pyx_t_1; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/vectors.pxi":13 + /* "vectors.pxi":13 * * def __dealloc__(self): * if not self.owned: # <<<<<<<<<<<<<< @@ -2950,7 +2970,7 @@ static void __pyx_pf_4cdec_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_4c __pyx_t_1 = ((!(__pyx_v_self->owned != 0)) != 0); if (__pyx_t_1) { - /* "cdec/vectors.pxi":14 + /* "vectors.pxi":14 * def __dealloc__(self): * if not self.owned: * del self.vector # <<<<<<<<<<<<<< @@ -2962,7 +2982,7 @@ static void __pyx_pf_4cdec_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_4c } __pyx_L3:; - /* "cdec/vectors.pxi":12 + /* "vectors.pxi":12 * self.owned = False * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -2974,7 +2994,7 @@ static void __pyx_pf_4cdec_5_cdec_11DenseVector_2__dealloc__(struct __pyx_obj_4c __Pyx_RefNannyFinishContext(); } -/* "cdec/vectors.pxi":16 +/* "vectors.pxi":16 * del self.vector * * def __len__(self): # <<<<<<<<<<<<<< @@ -3000,7 +3020,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_11DenseVector_4__len__(struct __pyx_obj_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/vectors.pxi":17 + /* "vectors.pxi":17 * * def __len__(self): * return self.vector.size() # <<<<<<<<<<<<<< @@ -3010,7 +3030,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_11DenseVector_4__len__(struct __pyx_obj_ __pyx_r = __pyx_v_self->vector->size(); goto __pyx_L0; - /* "cdec/vectors.pxi":16 + /* "vectors.pxi":16 * del self.vector * * def __len__(self): # <<<<<<<<<<<<<< @@ -3024,7 +3044,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_11DenseVector_4__len__(struct __pyx_obj_ return __pyx_r; } -/* "cdec/vectors.pxi":19 +/* "vectors.pxi":19 * return self.vector.size() * * def __getitem__(self, char* fname): # <<<<<<<<<<<<<< @@ -3071,7 +3091,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "cdec/vectors.pxi":20 + /* "vectors.pxi":20 * * def __getitem__(self, char* fname): * cdef int fid = FDConvert(fname) # <<<<<<<<<<<<<< @@ -3080,7 +3100,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o */ __pyx_v_fid = FD::Convert(__pyx_v_fname); - /* "cdec/vectors.pxi":21 + /* "vectors.pxi":21 * def __getitem__(self, char* fname): * cdef int fid = FDConvert(fname) * if 0 <= fid < self.vector.size(): # <<<<<<<<<<<<<< @@ -3094,7 +3114,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/vectors.pxi":22 + /* "vectors.pxi":22 * cdef int fid = FDConvert(fname) * if 0 <= fid < self.vector.size(): * return self.vector[0][fid] # <<<<<<<<<<<<<< @@ -3109,7 +3129,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o goto __pyx_L0; } - /* "cdec/vectors.pxi":23 + /* "vectors.pxi":23 * if 0 <= fid < self.vector.size(): * return self.vector[0][fid] * raise KeyError(fname) # <<<<<<<<<<<<<< @@ -3130,7 +3150,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; {__pyx_filename = __pyx_f[1]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/vectors.pxi":19 + /* "vectors.pxi":19 * return self.vector.size() * * def __getitem__(self, char* fname): # <<<<<<<<<<<<<< @@ -3150,7 +3170,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_6__getitem__(struct __pyx_o return __pyx_r; } -/* "cdec/vectors.pxi":25 +/* "vectors.pxi":25 * raise KeyError(fname) * * def __setitem__(self, char* fname, float value): # <<<<<<<<<<<<<< @@ -3200,7 +3220,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "cdec/vectors.pxi":26 + /* "vectors.pxi":26 * * def __setitem__(self, char* fname, float value): * cdef int fid = FDConvert(fname) # <<<<<<<<<<<<<< @@ -3209,7 +3229,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd */ __pyx_v_fid = FD::Convert(__pyx_v_fname); - /* "cdec/vectors.pxi":27 + /* "vectors.pxi":27 * def __setitem__(self, char* fname, float value): * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) # <<<<<<<<<<<<<< @@ -3233,7 +3253,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd {__pyx_filename = __pyx_f[1]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/vectors.pxi":28 + /* "vectors.pxi":28 * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) * if self.vector.size() <= fid: # <<<<<<<<<<<<<< @@ -3243,19 +3263,24 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd __pyx_t_1 = ((__pyx_v_self->vector->size() <= __pyx_v_fid) != 0); if (__pyx_t_1) { - /* "cdec/vectors.pxi":29 + /* "vectors.pxi":29 * if fid < 0: raise KeyError(fname) * if self.vector.size() <= fid: * self.vector.resize(fid + 1) # <<<<<<<<<<<<<< * self.vector[0][fid] = value * */ - __pyx_v_self->vector->resize((__pyx_v_fid + 1)); + try { + __pyx_v_self->vector->resize((__pyx_v_fid + 1)); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } goto __pyx_L4; } __pyx_L4:; - /* "cdec/vectors.pxi":30 + /* "vectors.pxi":30 * if self.vector.size() <= fid: * self.vector.resize(fid + 1) * self.vector[0][fid] = value # <<<<<<<<<<<<<< @@ -3264,7 +3289,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd */ ((__pyx_v_self->vector[0])[__pyx_v_fid]) = __pyx_v_value; - /* "cdec/vectors.pxi":25 + /* "vectors.pxi":25 * raise KeyError(fname) * * def __setitem__(self, char* fname, float value): # <<<<<<<<<<<<<< @@ -3286,7 +3311,7 @@ static int __pyx_pf_4cdec_5_cdec_11DenseVector_8__setitem__(struct __pyx_obj_4cd } static PyObject *__pyx_gb_4cdec_5_cdec_11DenseVector_12generator(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/vectors.pxi":32 +/* "vectors.pxi":32 * self.vector[0][fid] = value * * def __iter__(self): # <<<<<<<<<<<<<< @@ -3365,7 +3390,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_11DenseVector_12generator(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/vectors.pxi":34 + /* "vectors.pxi":34 * def __iter__(self): * cdef unsigned fid * for fid in range(1, self.vector.size()): # <<<<<<<<<<<<<< @@ -3376,7 +3401,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_11DenseVector_12generator(__pyx_Generator for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_fid = __pyx_t_2; - /* "cdec/vectors.pxi":35 + /* "vectors.pxi":35 * cdef unsigned fid * for fid in range(1, self.vector.size()): * yield str(FDConvert(fid).c_str()), self.vector[0][fid] # <<<<<<<<<<<<<< @@ -3418,7 +3443,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_11DenseVector_12generator(__pyx_Generator if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/vectors.pxi":32 + /* "vectors.pxi":32 * self.vector[0][fid] = value * * def __iter__(self): # <<<<<<<<<<<<<< @@ -3442,7 +3467,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_11DenseVector_12generator(__pyx_Generator return NULL; } -/* "cdec/vectors.pxi":37 +/* "vectors.pxi":37 * yield str(FDConvert(fid).c_str()), self.vector[0][fid] * * def dot(self, SparseVector other): # <<<<<<<<<<<<<< @@ -3484,7 +3509,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_13dot(struct __pyx_obj_4cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("dot", 0); - /* "cdec/vectors.pxi":39 + /* "vectors.pxi":39 * def dot(self, SparseVector other): * """vector.dot(SparseVector other) -> Dot product of the two vectors.""" * return other.dot(self) # <<<<<<<<<<<<<< @@ -3523,7 +3548,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_13dot(struct __pyx_obj_4cde __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/vectors.pxi":37 + /* "vectors.pxi":37 * yield str(FDConvert(fid).c_str()), self.vector[0][fid] * * def dot(self, SparseVector other): # <<<<<<<<<<<<<< @@ -3545,7 +3570,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_13dot(struct __pyx_obj_4cde return __pyx_r; } -/* "cdec/vectors.pxi":41 +/* "vectors.pxi":41 * return other.dot(self) * * def tosparse(self): # <<<<<<<<<<<<<< @@ -3577,7 +3602,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("tosparse", 0); - /* "cdec/vectors.pxi":43 + /* "vectors.pxi":43 * def tosparse(self): * """vector.tosparse() -> Equivalent SparseVector.""" * cdef SparseVector sparse = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -3590,7 +3615,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj __pyx_v_sparse = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/vectors.pxi":44 + /* "vectors.pxi":44 * """vector.tosparse() -> Equivalent SparseVector.""" * cdef SparseVector sparse = SparseVector.__new__(SparseVector) * sparse.vector = new FastSparseVector[weight_t]() # <<<<<<<<<<<<<< @@ -3599,7 +3624,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj */ __pyx_v_sparse->vector = new FastSparseVector<weight_t> (); - /* "cdec/vectors.pxi":45 + /* "vectors.pxi":45 * cdef SparseVector sparse = SparseVector.__new__(SparseVector) * sparse.vector = new FastSparseVector[weight_t]() * InitSparseVector(self.vector[0], sparse.vector) # <<<<<<<<<<<<<< @@ -3608,7 +3633,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj */ Weights::InitSparseVector((__pyx_v_self->vector[0]), __pyx_v_sparse->vector); - /* "cdec/vectors.pxi":46 + /* "vectors.pxi":46 * sparse.vector = new FastSparseVector[weight_t]() * InitSparseVector(self.vector[0], sparse.vector) * return sparse # <<<<<<<<<<<<<< @@ -3620,7 +3645,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_sparse); goto __pyx_L0; - /* "cdec/vectors.pxi":41 + /* "vectors.pxi":41 * return other.dot(self) * * def tosparse(self): # <<<<<<<<<<<<<< @@ -3640,7 +3665,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_11DenseVector_15tosparse(struct __pyx_obj return __pyx_r; } -/* "cdec/vectors.pxi":51 +/* "vectors.pxi":51 * cdef FastSparseVector[weight_t]* vector * * def __init__(self): # <<<<<<<<<<<<<< @@ -3673,7 +3698,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector___init__(struct __pyx_obj_4cdec_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/vectors.pxi":53 + /* "vectors.pxi":53 * def __init__(self): * """SparseVector() -> Sparse feature/weight vector.""" * self.vector = new FastSparseVector[weight_t]() # <<<<<<<<<<<<<< @@ -3682,7 +3707,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector___init__(struct __pyx_obj_4cdec_ */ __pyx_v_self->vector = new FastSparseVector<weight_t> (); - /* "cdec/vectors.pxi":51 + /* "vectors.pxi":51 * cdef FastSparseVector[weight_t]* vector * * def __init__(self): # <<<<<<<<<<<<<< @@ -3696,7 +3721,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector___init__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/vectors.pxi":55 +/* "vectors.pxi":55 * self.vector = new FastSparseVector[weight_t]() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -3719,7 +3744,7 @@ static void __pyx_pf_4cdec_5_cdec_12SparseVector_2__dealloc__(struct __pyx_obj_4 __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/vectors.pxi":56 + /* "vectors.pxi":56 * * def __dealloc__(self): * del self.vector # <<<<<<<<<<<<<< @@ -3728,7 +3753,7 @@ static void __pyx_pf_4cdec_5_cdec_12SparseVector_2__dealloc__(struct __pyx_obj_4 */ delete __pyx_v_self->vector; - /* "cdec/vectors.pxi":55 + /* "vectors.pxi":55 * self.vector = new FastSparseVector[weight_t]() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -3740,7 +3765,7 @@ static void __pyx_pf_4cdec_5_cdec_12SparseVector_2__dealloc__(struct __pyx_obj_4 __Pyx_RefNannyFinishContext(); } -/* "cdec/vectors.pxi":58 +/* "vectors.pxi":58 * del self.vector * * def copy(self): # <<<<<<<<<<<<<< @@ -3771,7 +3796,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_4copy(struct __pyx_obj_4cd int __pyx_clineno = 0; __Pyx_RefNannySetupContext("copy", 0); - /* "cdec/vectors.pxi":60 + /* "vectors.pxi":60 * def copy(self): * """vector.copy() -> SparseVector copy.""" * return self * 1 # <<<<<<<<<<<<<< @@ -3785,7 +3810,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_4copy(struct __pyx_obj_4cd __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/vectors.pxi":58 + /* "vectors.pxi":58 * del self.vector * * def copy(self): # <<<<<<<<<<<<<< @@ -3804,7 +3829,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_4copy(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/vectors.pxi":62 +/* "vectors.pxi":62 * return self * 1 * * def __getitem__(self, char* fname): # <<<<<<<<<<<<<< @@ -3850,7 +3875,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_6__getitem__(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "cdec/vectors.pxi":63 + /* "vectors.pxi":63 * * def __getitem__(self, char* fname): * cdef int fid = FDConvert(fname) # <<<<<<<<<<<<<< @@ -3859,7 +3884,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_6__getitem__(struct __pyx_ */ __pyx_v_fid = FD::Convert(__pyx_v_fname); - /* "cdec/vectors.pxi":64 + /* "vectors.pxi":64 * def __getitem__(self, char* fname): * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) # <<<<<<<<<<<<<< @@ -3883,7 +3908,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_6__getitem__(struct __pyx_ {__pyx_filename = __pyx_f[1]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/vectors.pxi":65 + /* "vectors.pxi":65 * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) * return self.vector.value(fid) # <<<<<<<<<<<<<< @@ -3897,7 +3922,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_6__getitem__(struct __pyx_ __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/vectors.pxi":62 + /* "vectors.pxi":62 * return self * 1 * * def __getitem__(self, char* fname): # <<<<<<<<<<<<<< @@ -3917,7 +3942,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_6__getitem__(struct __pyx_ return __pyx_r; } -/* "cdec/vectors.pxi":67 +/* "vectors.pxi":67 * return self.vector.value(fid) * * def __setitem__(self, char* fname, float value): # <<<<<<<<<<<<<< @@ -3967,7 +3992,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_4c int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "cdec/vectors.pxi":68 + /* "vectors.pxi":68 * * def __setitem__(self, char* fname, float value): * cdef int fid = FDConvert(fname) # <<<<<<<<<<<<<< @@ -3976,7 +4001,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_4c */ __pyx_v_fid = FD::Convert(__pyx_v_fname); - /* "cdec/vectors.pxi":69 + /* "vectors.pxi":69 * def __setitem__(self, char* fname, float value): * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) # <<<<<<<<<<<<<< @@ -4000,7 +4025,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_4c {__pyx_filename = __pyx_f[1]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/vectors.pxi":70 + /* "vectors.pxi":70 * cdef int fid = FDConvert(fname) * if fid < 0: raise KeyError(fname) * self.vector.set_value(fid, value) # <<<<<<<<<<<<<< @@ -4009,7 +4034,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_4c */ __pyx_v_self->vector->set_value(__pyx_v_fid, __pyx_v_value); - /* "cdec/vectors.pxi":67 + /* "vectors.pxi":67 * return self.vector.value(fid) * * def __setitem__(self, char* fname, float value): # <<<<<<<<<<<<<< @@ -4031,7 +4056,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_8__setitem__(struct __pyx_obj_4c } static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/vectors.pxi":72 +/* "vectors.pxi":72 * self.vector.set_value(fid, value) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -4119,7 +4144,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/vectors.pxi":73 + /* "vectors.pxi":73 * * def __iter__(self): * cdef FastSparseVector[weight_t].const_iterator* it = new FastSparseVector[weight_t].const_iterator(self.vector[0], False) # <<<<<<<<<<<<<< @@ -4128,7 +4153,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat */ __pyx_cur_scope->__pyx_v_it = new FastSparseVector<weight_t> ::const_iterator((__pyx_cur_scope->__pyx_v_self->vector[0]), 0); - /* "cdec/vectors.pxi":75 + /* "vectors.pxi":75 * cdef FastSparseVector[weight_t].const_iterator* it = new FastSparseVector[weight_t].const_iterator(self.vector[0], False) * cdef unsigned i * try: # <<<<<<<<<<<<<< @@ -4137,7 +4162,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat */ /*try:*/ { - /* "cdec/vectors.pxi":76 + /* "vectors.pxi":76 * cdef unsigned i * try: * for i in range(self.vector.size()): # <<<<<<<<<<<<<< @@ -4148,7 +4173,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/vectors.pxi":77 + /* "vectors.pxi":77 * try: * for i in range(self.vector.size()): * yield (str(FDConvert(it[0].ptr().first).c_str()), it[0].ptr().second) # <<<<<<<<<<<<<< @@ -4189,7 +4214,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L5_error;} - /* "cdec/vectors.pxi":78 + /* "vectors.pxi":78 * for i in range(self.vector.size()): * yield (str(FDConvert(it[0].ptr().first).c_str()), it[0].ptr().second) * pinc(it[0]) # ++it # <<<<<<<<<<<<<< @@ -4200,7 +4225,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat } } - /* "cdec/vectors.pxi":80 + /* "vectors.pxi":80 * pinc(it[0]) # ++it * finally: * del it # <<<<<<<<<<<<<< @@ -4247,7 +4272,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat __pyx_L6:; } - /* "cdec/vectors.pxi":72 + /* "vectors.pxi":72 * self.vector.set_value(fid, value) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -4271,7 +4296,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12SparseVector_12generator1(__pyx_Generat return NULL; } -/* "cdec/vectors.pxi":82 +/* "vectors.pxi":82 * del it * * def dot(self, other): # <<<<<<<<<<<<<< @@ -4305,7 +4330,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd int __pyx_clineno = 0; __Pyx_RefNannySetupContext("dot", 0); - /* "cdec/vectors.pxi":84 + /* "vectors.pxi":84 * def dot(self, other): * """vector.dot(SparseVector/DenseVector other) -> Dot product of the two vectors.""" * if isinstance(other, DenseVector): # <<<<<<<<<<<<<< @@ -4316,7 +4341,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/vectors.pxi":85 + /* "vectors.pxi":85 * """vector.dot(SparseVector/DenseVector other) -> Dot product of the two vectors.""" * if isinstance(other, DenseVector): * return self.vector.dot((<DenseVector> other).vector[0]) # <<<<<<<<<<<<<< @@ -4331,7 +4356,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd goto __pyx_L0; } - /* "cdec/vectors.pxi":86 + /* "vectors.pxi":86 * if isinstance(other, DenseVector): * return self.vector.dot((<DenseVector> other).vector[0]) * elif isinstance(other, SparseVector): # <<<<<<<<<<<<<< @@ -4342,7 +4367,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "cdec/vectors.pxi":87 + /* "vectors.pxi":87 * return self.vector.dot((<DenseVector> other).vector[0]) * elif isinstance(other, SparseVector): * return self.vector.dot((<SparseVector> other).vector[0]) # <<<<<<<<<<<<<< @@ -4357,7 +4382,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd goto __pyx_L0; } - /* "cdec/vectors.pxi":88 + /* "vectors.pxi":88 * elif isinstance(other, SparseVector): * return self.vector.dot((<SparseVector> other).vector[0]) * raise TypeError('cannot take the dot product of %s and SparseVector' % type(other)) # <<<<<<<<<<<<<< @@ -4378,7 +4403,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; {__pyx_filename = __pyx_f[1]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/vectors.pxi":82 + /* "vectors.pxi":82 * del it * * def dot(self, other): # <<<<<<<<<<<<<< @@ -4398,7 +4423,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_13dot(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/vectors.pxi":90 +/* "vectors.pxi":90 * raise TypeError('cannot take the dot product of %s and SparseVector' % type(other)) * * def __richcmp__(SparseVector x, SparseVector y, int op): # <<<<<<<<<<<<<< @@ -4438,7 +4463,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__richcmp__", 0); - /* "cdec/vectors.pxi":93 + /* "vectors.pxi":93 * if op == 2: # == * return x.vector[0] == y.vector[0] * elif op == 3: # != # <<<<<<<<<<<<<< @@ -4447,7 +4472,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx */ switch (__pyx_v_op) { - /* "cdec/vectors.pxi":91 + /* "vectors.pxi":91 * * def __richcmp__(SparseVector x, SparseVector y, int op): * if op == 2: # == # <<<<<<<<<<<<<< @@ -4456,7 +4481,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx */ case 2: - /* "cdec/vectors.pxi":92 + /* "vectors.pxi":92 * def __richcmp__(SparseVector x, SparseVector y, int op): * if op == 2: # == * return x.vector[0] == y.vector[0] # <<<<<<<<<<<<<< @@ -4471,7 +4496,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx goto __pyx_L0; break; - /* "cdec/vectors.pxi":93 + /* "vectors.pxi":93 * if op == 2: # == * return x.vector[0] == y.vector[0] * elif op == 3: # != # <<<<<<<<<<<<<< @@ -4480,7 +4505,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx */ case 3: - /* "cdec/vectors.pxi":94 + /* "vectors.pxi":94 * return x.vector[0] == y.vector[0] * elif op == 3: # != * return not (x == y) # <<<<<<<<<<<<<< @@ -4500,7 +4525,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx default: break; } - /* "cdec/vectors.pxi":95 + /* "vectors.pxi":95 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for SparseVector') # <<<<<<<<<<<<<< @@ -4513,7 +4538,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; {__pyx_filename = __pyx_f[1]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/vectors.pxi":90 + /* "vectors.pxi":90 * raise TypeError('cannot take the dot product of %s and SparseVector' % type(other)) * * def __richcmp__(SparseVector x, SparseVector y, int op): # <<<<<<<<<<<<<< @@ -4532,7 +4557,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_15__richcmp__(struct __pyx return __pyx_r; } -/* "cdec/vectors.pxi":97 +/* "vectors.pxi":97 * raise NotImplemented('comparison not implemented for SparseVector') * * def __len__(self): # <<<<<<<<<<<<<< @@ -4558,7 +4583,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12SparseVector_17__len__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/vectors.pxi":98 + /* "vectors.pxi":98 * * def __len__(self): * return self.vector.size() # <<<<<<<<<<<<<< @@ -4568,7 +4593,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12SparseVector_17__len__(struct __pyx_ob __pyx_r = __pyx_v_self->vector->size(); goto __pyx_L0; - /* "cdec/vectors.pxi":97 + /* "vectors.pxi":97 * raise NotImplemented('comparison not implemented for SparseVector') * * def __len__(self): # <<<<<<<<<<<<<< @@ -4582,7 +4607,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12SparseVector_17__len__(struct __pyx_ob return __pyx_r; } -/* "cdec/vectors.pxi":100 +/* "vectors.pxi":100 * return self.vector.size() * * def __contains__(self, char* fname): # <<<<<<<<<<<<<< @@ -4621,7 +4646,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_19__contains__(struct __pyx_obj_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__contains__", 0); - /* "cdec/vectors.pxi":101 + /* "vectors.pxi":101 * * def __contains__(self, char* fname): * return self.vector.nonzero(FDConvert(fname)) # <<<<<<<<<<<<<< @@ -4631,7 +4656,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_19__contains__(struct __pyx_obj_ __pyx_r = __pyx_v_self->vector->nonzero(FD::Convert(__pyx_v_fname)); goto __pyx_L0; - /* "cdec/vectors.pxi":100 + /* "vectors.pxi":100 * return self.vector.size() * * def __contains__(self, char* fname): # <<<<<<<<<<<<<< @@ -4645,7 +4670,7 @@ static int __pyx_pf_4cdec_5_cdec_12SparseVector_19__contains__(struct __pyx_obj_ return __pyx_r; } -/* "cdec/vectors.pxi":103 +/* "vectors.pxi":103 * return self.vector.nonzero(FDConvert(fname)) * * def __neg__(self): # <<<<<<<<<<<<<< @@ -4676,7 +4701,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__neg__", 0); - /* "cdec/vectors.pxi":104 + /* "vectors.pxi":104 * * def __neg__(self): * cdef SparseVector result = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -4689,7 +4714,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/vectors.pxi":105 + /* "vectors.pxi":105 * def __neg__(self): * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](self.vector[0]) # <<<<<<<<<<<<<< @@ -4698,7 +4723,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj */ __pyx_v_result->vector = new FastSparseVector<weight_t> ((__pyx_v_self->vector[0])); - /* "cdec/vectors.pxi":106 + /* "vectors.pxi":106 * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](self.vector[0]) * result.vector[0] *= -1.0 # <<<<<<<<<<<<<< @@ -4707,7 +4732,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj */ (__pyx_v_result->vector[0]) *= -1.0; - /* "cdec/vectors.pxi":107 + /* "vectors.pxi":107 * result.vector = new FastSparseVector[weight_t](self.vector[0]) * result.vector[0] *= -1.0 * return result # <<<<<<<<<<<<<< @@ -4719,7 +4744,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/vectors.pxi":103 + /* "vectors.pxi":103 * return self.vector.nonzero(FDConvert(fname)) * * def __neg__(self): # <<<<<<<<<<<<<< @@ -4739,7 +4764,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_21__neg__(struct __pyx_obj return __pyx_r; } -/* "cdec/vectors.pxi":109 +/* "vectors.pxi":109 * return result * * def __iadd__(SparseVector self, SparseVector other): # <<<<<<<<<<<<<< @@ -4773,7 +4798,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_23__iadd__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__iadd__", 0); - /* "cdec/vectors.pxi":110 + /* "vectors.pxi":110 * * def __iadd__(SparseVector self, SparseVector other): * self.vector[0] += other.vector[0] # <<<<<<<<<<<<<< @@ -4782,7 +4807,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_23__iadd__(struct __pyx_ob */ (__pyx_v_self->vector[0]) += (__pyx_v_other->vector[0]); - /* "cdec/vectors.pxi":111 + /* "vectors.pxi":111 * def __iadd__(SparseVector self, SparseVector other): * self.vector[0] += other.vector[0] * return self # <<<<<<<<<<<<<< @@ -4794,7 +4819,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_23__iadd__(struct __pyx_ob __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/vectors.pxi":109 + /* "vectors.pxi":109 * return result * * def __iadd__(SparseVector self, SparseVector other): # <<<<<<<<<<<<<< @@ -4809,7 +4834,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_23__iadd__(struct __pyx_ob return __pyx_r; } -/* "cdec/vectors.pxi":113 +/* "vectors.pxi":113 * return self * * def __isub__(SparseVector self, SparseVector other): # <<<<<<<<<<<<<< @@ -4843,7 +4868,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_25__isub__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__isub__", 0); - /* "cdec/vectors.pxi":114 + /* "vectors.pxi":114 * * def __isub__(SparseVector self, SparseVector other): * self.vector[0] -= other.vector[0] # <<<<<<<<<<<<<< @@ -4852,7 +4877,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_25__isub__(struct __pyx_ob */ (__pyx_v_self->vector[0]) -= (__pyx_v_other->vector[0]); - /* "cdec/vectors.pxi":115 + /* "vectors.pxi":115 * def __isub__(SparseVector self, SparseVector other): * self.vector[0] -= other.vector[0] * return self # <<<<<<<<<<<<<< @@ -4864,7 +4889,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_25__isub__(struct __pyx_ob __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/vectors.pxi":113 + /* "vectors.pxi":113 * return self * * def __isub__(SparseVector self, SparseVector other): # <<<<<<<<<<<<<< @@ -4879,7 +4904,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_25__isub__(struct __pyx_ob return __pyx_r; } -/* "cdec/vectors.pxi":117 +/* "vectors.pxi":117 * return self * * def __imul__(SparseVector self, float scalar): # <<<<<<<<<<<<<< @@ -4918,7 +4943,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_27__imul__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__imul__", 0); - /* "cdec/vectors.pxi":118 + /* "vectors.pxi":118 * * def __imul__(SparseVector self, float scalar): * self.vector[0] *= scalar # <<<<<<<<<<<<<< @@ -4927,7 +4952,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_27__imul__(struct __pyx_ob */ (__pyx_v_self->vector[0]) *= __pyx_v_scalar; - /* "cdec/vectors.pxi":119 + /* "vectors.pxi":119 * def __imul__(SparseVector self, float scalar): * self.vector[0] *= scalar * return self # <<<<<<<<<<<<<< @@ -4939,7 +4964,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_27__imul__(struct __pyx_ob __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/vectors.pxi":117 + /* "vectors.pxi":117 * return self * * def __imul__(SparseVector self, float scalar): # <<<<<<<<<<<<<< @@ -4954,7 +4979,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_27__imul__(struct __pyx_ob return __pyx_r; } -/* "cdec/vectors.pxi":121 +/* "vectors.pxi":121 * return self * * def __idiv__(SparseVector self, float scalar): # <<<<<<<<<<<<<< @@ -4996,7 +5021,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_29__idiv__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__idiv__", 0); - /* "cdec/vectors.pxi":122 + /* "vectors.pxi":122 * * def __idiv__(SparseVector self, float scalar): * self.vector[0] /= scalar # <<<<<<<<<<<<<< @@ -5005,7 +5030,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_29__idiv__(struct __pyx_ob */ (__pyx_v_self->vector[0]) /= __pyx_v_scalar; - /* "cdec/vectors.pxi":123 + /* "vectors.pxi":123 * def __idiv__(SparseVector self, float scalar): * self.vector[0] /= scalar * return self # <<<<<<<<<<<<<< @@ -5017,7 +5042,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_29__idiv__(struct __pyx_ob __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/vectors.pxi":121 + /* "vectors.pxi":121 * return self * * def __idiv__(SparseVector self, float scalar): # <<<<<<<<<<<<<< @@ -5033,7 +5058,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_29__idiv__(struct __pyx_ob } #endif /*!(#if PY_MAJOR_VERSION < 3)*/ -/* "cdec/vectors.pxi":125 +/* "vectors.pxi":125 * return self * * def __add__(SparseVector x, SparseVector y): # <<<<<<<<<<<<<< @@ -5073,7 +5098,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_31__add__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__add__", 0); - /* "cdec/vectors.pxi":126 + /* "vectors.pxi":126 * * def __add__(SparseVector x, SparseVector y): * cdef SparseVector result = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -5086,7 +5111,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_31__add__(struct __pyx_obj __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/vectors.pxi":127 + /* "vectors.pxi":127 * def __add__(SparseVector x, SparseVector y): * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](x.vector[0] + y.vector[0]) # <<<<<<<<<<<<<< @@ -5095,7 +5120,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_31__add__(struct __pyx_obj */ __pyx_v_result->vector = new FastSparseVector<weight_t> (((__pyx_v_x->vector[0]) + (__pyx_v_y->vector[0]))); - /* "cdec/vectors.pxi":128 + /* "vectors.pxi":128 * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](x.vector[0] + y.vector[0]) * return result # <<<<<<<<<<<<<< @@ -5107,7 +5132,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_31__add__(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/vectors.pxi":125 + /* "vectors.pxi":125 * return self * * def __add__(SparseVector x, SparseVector y): # <<<<<<<<<<<<<< @@ -5127,7 +5152,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_31__add__(struct __pyx_obj return __pyx_r; } -/* "cdec/vectors.pxi":130 +/* "vectors.pxi":130 * return result * * def __sub__(SparseVector x, SparseVector y): # <<<<<<<<<<<<<< @@ -5167,7 +5192,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_33__sub__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__sub__", 0); - /* "cdec/vectors.pxi":131 + /* "vectors.pxi":131 * * def __sub__(SparseVector x, SparseVector y): * cdef SparseVector result = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -5180,7 +5205,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_33__sub__(struct __pyx_obj __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/vectors.pxi":132 + /* "vectors.pxi":132 * def __sub__(SparseVector x, SparseVector y): * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](x.vector[0] - y.vector[0]) # <<<<<<<<<<<<<< @@ -5189,7 +5214,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_33__sub__(struct __pyx_obj */ __pyx_v_result->vector = new FastSparseVector<weight_t> (((__pyx_v_x->vector[0]) - (__pyx_v_y->vector[0]))); - /* "cdec/vectors.pxi":133 + /* "vectors.pxi":133 * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](x.vector[0] - y.vector[0]) * return result # <<<<<<<<<<<<<< @@ -5201,7 +5226,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_33__sub__(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/vectors.pxi":130 + /* "vectors.pxi":130 * return result * * def __sub__(SparseVector x, SparseVector y): # <<<<<<<<<<<<<< @@ -5221,7 +5246,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_33__sub__(struct __pyx_obj return __pyx_r; } -/* "cdec/vectors.pxi":135 +/* "vectors.pxi":135 * return result * * def __mul__(x, y): # <<<<<<<<<<<<<< @@ -5257,7 +5282,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__mul__", 0); - /* "cdec/vectors.pxi":138 + /* "vectors.pxi":138 * cdef SparseVector vector * cdef float scalar * if isinstance(x, SparseVector): vector, scalar = x, y # <<<<<<<<<<<<<< @@ -5278,7 +5303,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ } /*else*/ { - /* "cdec/vectors.pxi":139 + /* "vectors.pxi":139 * cdef float scalar * if isinstance(x, SparseVector): vector, scalar = x, y * else: vector, scalar = y, x # <<<<<<<<<<<<<< @@ -5295,7 +5320,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ } __pyx_L3:; - /* "cdec/vectors.pxi":140 + /* "vectors.pxi":140 * if isinstance(x, SparseVector): vector, scalar = x, y * else: vector, scalar = y, x * cdef SparseVector result = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -5308,7 +5333,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/vectors.pxi":141 + /* "vectors.pxi":141 * else: vector, scalar = y, x * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar) # <<<<<<<<<<<<<< @@ -5317,7 +5342,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ */ __pyx_v_result->vector = new FastSparseVector<weight_t> (((__pyx_v_vector->vector[0]) * __pyx_v_scalar)); - /* "cdec/vectors.pxi":142 + /* "vectors.pxi":142 * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](vector.vector[0] * scalar) * return result # <<<<<<<<<<<<<< @@ -5329,7 +5354,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/vectors.pxi":135 + /* "vectors.pxi":135 * return result * * def __mul__(x, y): # <<<<<<<<<<<<<< @@ -5350,7 +5375,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_35__mul__(PyObject *__pyx_ return __pyx_r; } -/* "cdec/vectors.pxi":144 +/* "vectors.pxi":144 * return result * * def __div__(x, y): # <<<<<<<<<<<<<< @@ -5389,7 +5414,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__div__", 0); - /* "cdec/vectors.pxi":147 + /* "vectors.pxi":147 * cdef SparseVector vector * cdef float scalar * if isinstance(x, SparseVector): vector, scalar = x, y # <<<<<<<<<<<<<< @@ -5410,7 +5435,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ } /*else*/ { - /* "cdec/vectors.pxi":148 + /* "vectors.pxi":148 * cdef float scalar * if isinstance(x, SparseVector): vector, scalar = x, y * else: vector, scalar = y, x # <<<<<<<<<<<<<< @@ -5427,7 +5452,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ } __pyx_L3:; - /* "cdec/vectors.pxi":149 + /* "vectors.pxi":149 * if isinstance(x, SparseVector): vector, scalar = x, y * else: vector, scalar = y, x * cdef SparseVector result = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -5440,7 +5465,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/vectors.pxi":150 + /* "vectors.pxi":150 * else: vector, scalar = y, x * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar) # <<<<<<<<<<<<<< @@ -5448,7 +5473,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ */ __pyx_v_result->vector = new FastSparseVector<weight_t> (((__pyx_v_vector->vector[0]) / __pyx_v_scalar)); - /* "cdec/vectors.pxi":151 + /* "vectors.pxi":151 * cdef SparseVector result = SparseVector.__new__(SparseVector) * result.vector = new FastSparseVector[weight_t](vector.vector[0] / scalar) * return result # <<<<<<<<<<<<<< @@ -5458,7 +5483,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/vectors.pxi":144 + /* "vectors.pxi":144 * return result * * def __div__(x, y): # <<<<<<<<<<<<<< @@ -5480,7 +5505,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12SparseVector_37__div__(PyObject *__pyx_ } #endif /*!(#if PY_MAJOR_VERSION < 3)*/ -/* "cdec/grammar.pxi":5 +/* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -5503,7 +5528,7 @@ static PyObject *__pyx_pw_4cdec_5_cdec_1_phrase(PyObject *__pyx_self, PyObject * } static PyObject *__pyx_gb_4cdec_5_cdec_7_phrase_2generator22(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/grammar.pxi":6 +/* "grammar.pxi":6 * * def _phrase(phrase): * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) # <<<<<<<<<<<<<< @@ -5671,7 +5696,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7_phrase_2generator22(__pyx_GeneratorObje return NULL; } -/* "cdec/grammar.pxi":5 +/* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -5699,7 +5724,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec__phrase(CYTHON_UNUSED PyObject *__pyx_sel __Pyx_INCREF(__pyx_cur_scope->__pyx_v_phrase); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_phrase); - /* "cdec/grammar.pxi":6 + /* "grammar.pxi":6 * * def _phrase(phrase): * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) # <<<<<<<<<<<<<< @@ -5716,7 +5741,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec__phrase(CYTHON_UNUSED PyObject *__pyx_sel __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":5 + /* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -5737,7 +5762,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec__phrase(CYTHON_UNUSED PyObject *__pyx_sel return __pyx_r; } -/* "cdec/grammar.pxi":11 +/* "grammar.pxi":11 * cdef public bytes cat * cdef public unsigned ref * def __init__(self, bytes cat, unsigned ref=0): # <<<<<<<<<<<<<< @@ -5826,7 +5851,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT___init__(struct __pyx_obj_4cdec_5_cdec_NT * __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/grammar.pxi":13 + /* "grammar.pxi":13 * def __init__(self, bytes cat, unsigned ref=0): * """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`.""" * self.cat = cat # <<<<<<<<<<<<<< @@ -5839,7 +5864,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT___init__(struct __pyx_obj_4cdec_5_cdec_NT * __Pyx_DECREF(__pyx_v_self->cat); __pyx_v_self->cat = __pyx_v_cat; - /* "cdec/grammar.pxi":14 + /* "grammar.pxi":14 * """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`.""" * self.cat = cat * self.ref = ref # <<<<<<<<<<<<<< @@ -5848,7 +5873,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT___init__(struct __pyx_obj_4cdec_5_cdec_NT * */ __pyx_v_self->ref = __pyx_v_ref; - /* "cdec/grammar.pxi":11 + /* "grammar.pxi":11 * cdef public bytes cat * cdef public unsigned ref * def __init__(self, bytes cat, unsigned ref=0): # <<<<<<<<<<<<<< @@ -5862,7 +5887,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT___init__(struct __pyx_obj_4cdec_5_cdec_NT * return __pyx_r; } -/* "cdec/grammar.pxi":16 +/* "grammar.pxi":16 * self.ref = ref * * def __str__(self): # <<<<<<<<<<<<<< @@ -5894,7 +5919,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_2NT_2__str__(struct __pyx_obj_4cdec_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "cdec/grammar.pxi":17 + /* "grammar.pxi":17 * * def __str__(self): * if self.ref > 0: # <<<<<<<<<<<<<< @@ -5904,7 +5929,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_2NT_2__str__(struct __pyx_obj_4cdec_5_cde __pyx_t_1 = ((__pyx_v_self->ref > 0) != 0); if (__pyx_t_1) { - /* "cdec/grammar.pxi":18 + /* "grammar.pxi":18 * def __str__(self): * if self.ref > 0: * return '[%s,%d]' % (self.cat, self.ref) # <<<<<<<<<<<<<< @@ -5930,7 +5955,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_2NT_2__str__(struct __pyx_obj_4cdec_5_cde goto __pyx_L0; } - /* "cdec/grammar.pxi":19 + /* "grammar.pxi":19 * if self.ref > 0: * return '[%s,%d]' % (self.cat, self.ref) * return '[%s]' % self.cat # <<<<<<<<<<<<<< @@ -5944,7 +5969,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_2NT_2__str__(struct __pyx_obj_4cdec_5_cde __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":16 + /* "grammar.pxi":16 * self.ref = ref * * def __str__(self): # <<<<<<<<<<<<<< @@ -5964,7 +5989,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_2NT_2__str__(struct __pyx_obj_4cdec_5_cde return __pyx_r; } -/* "cdec/grammar.pxi":9 +/* "grammar.pxi":9 * * cdef class NT: * cdef public bytes cat # <<<<<<<<<<<<<< @@ -6072,7 +6097,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT_3cat_4__del__(struct __pyx_obj_4cdec_5_cdec return __pyx_r; } -/* "cdec/grammar.pxi":10 +/* "grammar.pxi":10 * cdef class NT: * cdef public bytes cat * cdef public unsigned ref # <<<<<<<<<<<<<< @@ -6154,7 +6179,7 @@ static int __pyx_pf_4cdec_5_cdec_2NT_3ref_2__set__(struct __pyx_obj_4cdec_5_cdec return __pyx_r; } -/* "cdec/grammar.pxi":23 +/* "grammar.pxi":23 * cdef class NTRef: * cdef public unsigned ref * def __init__(self, unsigned ref): # <<<<<<<<<<<<<< @@ -6223,7 +6248,7 @@ static int __pyx_pf_4cdec_5_cdec_5NTRef___init__(struct __pyx_obj_4cdec_5_cdec_N __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/grammar.pxi":25 + /* "grammar.pxi":25 * def __init__(self, unsigned ref): * """NTRef(int ref) -> Non-terminal reference.""" * self.ref = ref # <<<<<<<<<<<<<< @@ -6232,7 +6257,7 @@ static int __pyx_pf_4cdec_5_cdec_5NTRef___init__(struct __pyx_obj_4cdec_5_cdec_N */ __pyx_v_self->ref = __pyx_v_ref; - /* "cdec/grammar.pxi":23 + /* "grammar.pxi":23 * cdef class NTRef: * cdef public unsigned ref * def __init__(self, unsigned ref): # <<<<<<<<<<<<<< @@ -6246,7 +6271,7 @@ static int __pyx_pf_4cdec_5_cdec_5NTRef___init__(struct __pyx_obj_4cdec_5_cdec_N return __pyx_r; } -/* "cdec/grammar.pxi":27 +/* "grammar.pxi":27 * self.ref = ref * * def __str__(self): # <<<<<<<<<<<<<< @@ -6277,7 +6302,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5NTRef_2__str__(struct __pyx_obj_4cdec_5_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "cdec/grammar.pxi":28 + /* "grammar.pxi":28 * * def __str__(self): * return '[%d]' % self.ref # <<<<<<<<<<<<<< @@ -6294,7 +6319,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5NTRef_2__str__(struct __pyx_obj_4cdec_5_ __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":27 + /* "grammar.pxi":27 * self.ref = ref * * def __str__(self): # <<<<<<<<<<<<<< @@ -6314,7 +6339,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5NTRef_2__str__(struct __pyx_obj_4cdec_5_ return __pyx_r; } -/* "cdec/grammar.pxi":22 +/* "grammar.pxi":22 * * cdef class NTRef: * cdef public unsigned ref # <<<<<<<<<<<<<< @@ -6396,7 +6421,7 @@ static int __pyx_pf_4cdec_5_cdec_5NTRef_3ref_2__set__(struct __pyx_obj_4cdec_5_c return __pyx_r; } -/* "cdec/grammar.pxi":30 +/* "grammar.pxi":30 * return '[%d]' % self.ref * * cdef TRule convert_rule(_sa.Rule rule): # <<<<<<<<<<<<<< @@ -6428,7 +6453,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("convert_rule", 0); - /* "cdec/grammar.pxi":31 + /* "grammar.pxi":31 * * cdef TRule convert_rule(_sa.Rule rule): * lhs = _sa.sym_tocat(rule.lhs) # <<<<<<<<<<<<<< @@ -6437,7 +6462,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st */ __pyx_v_lhs = __pyx_f_4cdec_2sa_3_sa_sym_tocat(__pyx_v_rule->lhs); - /* "cdec/grammar.pxi":32 + /* "grammar.pxi":32 * cdef TRule convert_rule(_sa.Rule rule): * lhs = _sa.sym_tocat(rule.lhs) * scores = dict(rule.scores) # <<<<<<<<<<<<<< @@ -6455,7 +6480,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_v_scores = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0; - /* "cdec/grammar.pxi":33 + /* "grammar.pxi":33 * lhs = _sa.sym_tocat(rule.lhs) * scores = dict(rule.scores) * f, e = [], [] # <<<<<<<<<<<<<< @@ -6471,7 +6496,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_v_e = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":34 + /* "grammar.pxi":34 * scores = dict(rule.scores) * f, e = [], [] * cdef int* fsyms = rule.f.syms # <<<<<<<<<<<<<< @@ -6481,7 +6506,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_t_3 = __pyx_v_rule->f->syms; __pyx_v_fsyms = __pyx_t_3; - /* "cdec/grammar.pxi":35 + /* "grammar.pxi":35 * f, e = [], [] * cdef int* fsyms = rule.f.syms * for i in range(rule.f.n): # <<<<<<<<<<<<<< @@ -6492,7 +6517,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "cdec/grammar.pxi":36 + /* "grammar.pxi":36 * cdef int* fsyms = rule.f.syms * for i in range(rule.f.n): * if _sa.sym_isvar(fsyms[i]): # <<<<<<<<<<<<<< @@ -6502,7 +6527,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_t_6 = (__pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_fsyms[__pyx_v_i])) != 0); if (__pyx_t_6) { - /* "cdec/grammar.pxi":37 + /* "grammar.pxi":37 * for i in range(rule.f.n): * if _sa.sym_isvar(fsyms[i]): * f.append(NT(_sa.sym_tocat(fsyms[i]))) # <<<<<<<<<<<<<< @@ -6525,7 +6550,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st } /*else*/ { - /* "cdec/grammar.pxi":39 + /* "grammar.pxi":39 * f.append(NT(_sa.sym_tocat(fsyms[i]))) * else: * f.append(_sa.sym_tostring(fsyms[i])) # <<<<<<<<<<<<<< @@ -6540,7 +6565,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_L5:; } - /* "cdec/grammar.pxi":40 + /* "grammar.pxi":40 * else: * f.append(_sa.sym_tostring(fsyms[i])) * cdef int* esyms = rule.e.syms # <<<<<<<<<<<<<< @@ -6550,7 +6575,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_t_3 = __pyx_v_rule->e->syms; __pyx_v_esyms = __pyx_t_3; - /* "cdec/grammar.pxi":41 + /* "grammar.pxi":41 * f.append(_sa.sym_tostring(fsyms[i])) * cdef int* esyms = rule.e.syms * for i in range(rule.e.n): # <<<<<<<<<<<<<< @@ -6561,7 +6586,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "cdec/grammar.pxi":42 + /* "grammar.pxi":42 * cdef int* esyms = rule.e.syms * for i in range(rule.e.n): * if _sa.sym_isvar(esyms[i]): # <<<<<<<<<<<<<< @@ -6571,7 +6596,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_t_6 = (__pyx_f_4cdec_2sa_3_sa_sym_isvar((__pyx_v_esyms[__pyx_v_i])) != 0); if (__pyx_t_6) { - /* "cdec/grammar.pxi":43 + /* "grammar.pxi":43 * for i in range(rule.e.n): * if _sa.sym_isvar(esyms[i]): * e.append(NTRef(_sa.sym_getindex(esyms[i]))) # <<<<<<<<<<<<<< @@ -6594,7 +6619,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st } /*else*/ { - /* "cdec/grammar.pxi":45 + /* "grammar.pxi":45 * e.append(NTRef(_sa.sym_getindex(esyms[i]))) * else: * e.append(_sa.sym_tostring(esyms[i])) # <<<<<<<<<<<<<< @@ -6609,7 +6634,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st __pyx_L8:; } - /* "cdec/grammar.pxi":46 + /* "grammar.pxi":46 * else: * e.append(_sa.sym_tostring(esyms[i])) * a = list(rule.alignments()) # <<<<<<<<<<<<<< @@ -6636,18 +6661,13 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_List(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyList_Type))), __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_a = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_a = ((PyObject*)__pyx_t_2); + __pyx_t_2 = 0; - /* "cdec/grammar.pxi":47 + /* "grammar.pxi":47 * e.append(_sa.sym_tostring(esyms[i])) * a = list(rule.alignments()) * return TRule(lhs, f, e, scores, a) # <<<<<<<<<<<<<< @@ -6655,33 +6675,33 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st * cdef class TRule: */ __Pyx_XDECREF(((PyObject *)__pyx_r)); - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_lhs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(5); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_lhs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); + __pyx_t_1 = PyTuple_New(5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); + __Pyx_GIVEREF(__pyx_t_2); __Pyx_INCREF(__pyx_v_f); - PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_f); + PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_f); __Pyx_GIVEREF(__pyx_v_f); __Pyx_INCREF(__pyx_v_e); - PyTuple_SET_ITEM(__pyx_t_2, 2, __pyx_v_e); + PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_v_e); __Pyx_GIVEREF(__pyx_v_e); __Pyx_INCREF(__pyx_v_scores); - PyTuple_SET_ITEM(__pyx_t_2, 3, __pyx_v_scores); + PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_v_scores); __Pyx_GIVEREF(__pyx_v_scores); __Pyx_INCREF(__pyx_v_a); - PyTuple_SET_ITEM(__pyx_t_2, 4, __pyx_v_a); + PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_v_a); __Pyx_GIVEREF(__pyx_v_a); - __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_TRule)), __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = ((struct __pyx_obj_4cdec_5_cdec_TRule *)__pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_TRule)), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = ((struct __pyx_obj_4cdec_5_cdec_TRule *)__pyx_t_2); + __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":30 + /* "grammar.pxi":30 * return '[%d]' % self.ref * * cdef TRule convert_rule(_sa.Rule rule): # <<<<<<<<<<<<<< @@ -6706,7 +6726,7 @@ static struct __pyx_obj_4cdec_5_cdec_TRule *__pyx_f_4cdec_5_cdec_convert_rule(st return __pyx_r; } -/* "cdec/grammar.pxi":52 +/* "grammar.pxi":52 * cdef shared_ptr[grammar.TRule]* rule * * def __init__(self, lhs, f, e, scores, a=None, text=None): # <<<<<<<<<<<<<< @@ -6830,7 +6850,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/grammar.pxi":59 + /* "grammar.pxi":59 * scores: dictionary of feature scores * a: optional list of alignment points""" * self.rule = new shared_ptr[grammar.TRule](new grammar.TRule()) # <<<<<<<<<<<<<< @@ -6845,7 +6865,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_v_self->rule = new boost::shared_ptr<TRule> (__pyx_t_1); - /* "cdec/grammar.pxi":60 + /* "grammar.pxi":60 * a: optional list of alignment points""" * self.rule = new shared_ptr[grammar.TRule](new grammar.TRule()) * if lhs: # <<<<<<<<<<<<<< @@ -6855,7 +6875,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_lhs); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":61 + /* "grammar.pxi":61 * self.rule = new shared_ptr[grammar.TRule](new grammar.TRule()) * if lhs: * self.lhs = lhs # <<<<<<<<<<<<<< @@ -6867,7 +6887,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_L3:; - /* "cdec/grammar.pxi":62 + /* "grammar.pxi":62 * if lhs: * self.lhs = lhs * if e: # <<<<<<<<<<<<<< @@ -6877,7 +6897,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_e); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":63 + /* "grammar.pxi":63 * self.lhs = lhs * if e: * self.e = e # <<<<<<<<<<<<<< @@ -6889,7 +6909,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_L4:; - /* "cdec/grammar.pxi":64 + /* "grammar.pxi":64 * if e: * self.e = e * if f: # <<<<<<<<<<<<<< @@ -6899,7 +6919,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_f); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":65 + /* "grammar.pxi":65 * self.e = e * if f: * self.f = f # <<<<<<<<<<<<<< @@ -6911,7 +6931,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_L5:; - /* "cdec/grammar.pxi":66 + /* "grammar.pxi":66 * if f: * self.f = f * if scores: # <<<<<<<<<<<<<< @@ -6921,7 +6941,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_scores); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":67 + /* "grammar.pxi":67 * self.f = f * if scores: * self.scores = scores # <<<<<<<<<<<<<< @@ -6933,7 +6953,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_L6:; - /* "cdec/grammar.pxi":68 + /* "grammar.pxi":68 * if scores: * self.scores = scores * if a: # <<<<<<<<<<<<<< @@ -6943,7 +6963,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_a); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":69 + /* "grammar.pxi":69 * self.scores = scores * if a: * self.a = a # <<<<<<<<<<<<<< @@ -6955,7 +6975,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T } __pyx_L7:; - /* "cdec/grammar.pxi":70 + /* "grammar.pxi":70 * if a: * self.a = a * if text: # <<<<<<<<<<<<<< @@ -6965,20 +6985,20 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "cdec/grammar.pxi":71 + /* "grammar.pxi":71 * self.a = a * if text: * self.rule.get().ReadFromString(text, 0) # <<<<<<<<<<<<<< * self.rule.get().ComputeArity() * */ - __pyx_t_3 = __pyx_convert_string_from_py_(__pyx_v_text); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_convert_string_from_py_std__in_string(__pyx_v_text); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->rule->get()->ReadFromString(__pyx_t_3, 0); goto __pyx_L8; } __pyx_L8:; - /* "cdec/grammar.pxi":72 + /* "grammar.pxi":72 * if text: * self.rule.get().ReadFromString(text, 0) * self.rule.get().ComputeArity() # <<<<<<<<<<<<<< @@ -6987,7 +7007,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T */ __pyx_v_self->rule->get()->ComputeArity(); - /* "cdec/grammar.pxi":52 + /* "grammar.pxi":52 * cdef shared_ptr[grammar.TRule]* rule * * def __init__(self, lhs, f, e, scores, a=None, text=None): # <<<<<<<<<<<<<< @@ -7006,7 +7026,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule___init__(struct __pyx_obj_4cdec_5_cdec_T return __pyx_r; } -/* "cdec/grammar.pxi":74 +/* "grammar.pxi":74 * self.rule.get().ComputeArity() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -7029,7 +7049,7 @@ static void __pyx_pf_4cdec_5_cdec_5TRule_2__dealloc__(struct __pyx_obj_4cdec_5_c __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/grammar.pxi":75 + /* "grammar.pxi":75 * * def __dealloc__(self): * del self.rule # <<<<<<<<<<<<<< @@ -7038,7 +7058,7 @@ static void __pyx_pf_4cdec_5_cdec_5TRule_2__dealloc__(struct __pyx_obj_4cdec_5_c */ delete __pyx_v_self->rule; - /* "cdec/grammar.pxi":74 + /* "grammar.pxi":74 * self.rule.get().ComputeArity() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -7050,7 +7070,7 @@ static void __pyx_pf_4cdec_5_cdec_5TRule_2__dealloc__(struct __pyx_obj_4cdec_5_c __Pyx_RefNannyFinishContext(); } -/* "cdec/grammar.pxi":78 +/* "grammar.pxi":78 * * property arity: * def __get__(self): # <<<<<<<<<<<<<< @@ -7080,7 +7100,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_5arity___get__(struct __pyx_obj_4c int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":79 + /* "grammar.pxi":79 * property arity: * def __get__(self): * return self.rule.get().arity_ # <<<<<<<<<<<<<< @@ -7094,7 +7114,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_5arity___get__(struct __pyx_obj_4c __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":78 + /* "grammar.pxi":78 * * property arity: * def __get__(self): # <<<<<<<<<<<<<< @@ -7113,7 +7133,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_5arity___get__(struct __pyx_obj_4c return __pyx_r; } -/* "cdec/grammar.pxi":82 +/* "grammar.pxi":82 * * property f: * def __get__(self): # <<<<<<<<<<<<<< @@ -7154,7 +7174,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":83 + /* "grammar.pxi":83 * property f: * def __get__(self): * cdef vector[WordID]* f_ = &self.rule.get().f_ # <<<<<<<<<<<<<< @@ -7163,7 +7183,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ */ __pyx_v_f_ = (&__pyx_v_self->rule->get()->f_); - /* "cdec/grammar.pxi":85 + /* "grammar.pxi":85 * cdef vector[WordID]* f_ = &self.rule.get().f_ * cdef WordID w * cdef f = [] # <<<<<<<<<<<<<< @@ -7175,7 +7195,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ __pyx_v_f = __pyx_t_1; __pyx_t_1 = 0; - /* "cdec/grammar.pxi":87 + /* "grammar.pxi":87 * cdef f = [] * cdef unsigned i * cdef int idx = 0 # <<<<<<<<<<<<<< @@ -7184,7 +7204,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ */ __pyx_v_idx = 0; - /* "cdec/grammar.pxi":88 + /* "grammar.pxi":88 * cdef unsigned i * cdef int idx = 0 * for i in range(f_.size()): # <<<<<<<<<<<<<< @@ -7195,7 +7215,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/grammar.pxi":89 + /* "grammar.pxi":89 * cdef int idx = 0 * for i in range(f_.size()): * w = f_[0][i] # <<<<<<<<<<<<<< @@ -7204,7 +7224,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ */ __pyx_v_w = ((__pyx_v_f_[0])[__pyx_v_i]); - /* "cdec/grammar.pxi":90 + /* "grammar.pxi":90 * for i in range(f_.size()): * w = f_[0][i] * if w < 0: # <<<<<<<<<<<<<< @@ -7214,7 +7234,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ __pyx_t_4 = ((__pyx_v_w < 0) != 0); if (__pyx_t_4) { - /* "cdec/grammar.pxi":91 + /* "grammar.pxi":91 * w = f_[0][i] * if w < 0: * idx += 1 # <<<<<<<<<<<<<< @@ -7223,7 +7243,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ */ __pyx_v_idx = (__pyx_v_idx + 1); - /* "cdec/grammar.pxi":92 + /* "grammar.pxi":92 * if w < 0: * idx += 1 * f.append(NT(TDConvert(-w).c_str(), idx)) # <<<<<<<<<<<<<< @@ -7251,7 +7271,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ } /*else*/ { - /* "cdec/grammar.pxi":94 + /* "grammar.pxi":94 * f.append(NT(TDConvert(-w).c_str(), idx)) * else: * f.append(unicode(TDConvert(w).c_str(), encoding='utf8')) # <<<<<<<<<<<<<< @@ -7278,7 +7298,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ __pyx_L5:; } - /* "cdec/grammar.pxi":95 + /* "grammar.pxi":95 * else: * f.append(unicode(TDConvert(w).c_str(), encoding='utf8')) * return f # <<<<<<<<<<<<<< @@ -7290,7 +7310,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "cdec/grammar.pxi":82 + /* "grammar.pxi":82 * * property f: * def __get__(self): # <<<<<<<<<<<<<< @@ -7312,7 +7332,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1f___get__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/grammar.pxi":97 +/* "grammar.pxi":97 * return f * * def __set__(self, f): # <<<<<<<<<<<<<< @@ -7352,7 +7372,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - /* "cdec/grammar.pxi":98 + /* "grammar.pxi":98 * * def __set__(self, f): * cdef vector[WordID]* f_ = &self.rule.get().f_ # <<<<<<<<<<<<<< @@ -7361,7 +7381,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde */ __pyx_v_f_ = (&__pyx_v_self->rule->get()->f_); - /* "cdec/grammar.pxi":99 + /* "grammar.pxi":99 * def __set__(self, f): * cdef vector[WordID]* f_ = &self.rule.get().f_ * f_.resize(len(f)) # <<<<<<<<<<<<<< @@ -7369,9 +7389,14 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde * cdef int idx = 0 */ __pyx_t_1 = PyObject_Length(__pyx_v_f); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_f_->resize(__pyx_t_1); + try { + __pyx_v_f_->resize(__pyx_t_1); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[2]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/grammar.pxi":101 + /* "grammar.pxi":101 * f_.resize(len(f)) * cdef unsigned i * cdef int idx = 0 # <<<<<<<<<<<<<< @@ -7380,7 +7405,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde */ __pyx_v_idx = 0; - /* "cdec/grammar.pxi":102 + /* "grammar.pxi":102 * cdef unsigned i * cdef int idx = 0 * for i in range(len(f)): # <<<<<<<<<<<<<< @@ -7391,7 +7416,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "cdec/grammar.pxi":103 + /* "grammar.pxi":103 * cdef int idx = 0 * for i in range(len(f)): * if isinstance(f[i], NT): # <<<<<<<<<<<<<< @@ -7405,7 +7430,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "cdec/grammar.pxi":104 + /* "grammar.pxi":104 * for i in range(len(f)): * if isinstance(f[i], NT): * f_[0][i] = -TDConvert((<NT> f[i]).cat) # <<<<<<<<<<<<<< @@ -7421,7 +7446,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde } /*else*/ { - /* "cdec/grammar.pxi":106 + /* "grammar.pxi":106 * f_[0][i] = -TDConvert((<NT> f[i]).cat) * else: * fi = as_str(f[i]) # <<<<<<<<<<<<<< @@ -7436,7 +7461,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde __Pyx_XDECREF_SET(__pyx_v_fi, ((PyObject*)__pyx_t_7)); __pyx_t_7 = 0; - /* "cdec/grammar.pxi":107 + /* "grammar.pxi":107 * else: * fi = as_str(f[i]) * f_[0][i] = TDConvert(fi) # <<<<<<<<<<<<<< @@ -7449,7 +7474,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde __pyx_L5:; } - /* "cdec/grammar.pxi":97 + /* "grammar.pxi":97 * return f * * def __set__(self, f): # <<<<<<<<<<<<<< @@ -7471,7 +7496,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1f_2__set__(struct __pyx_obj_4cdec_5_cde return __pyx_r; } -/* "cdec/grammar.pxi":110 +/* "grammar.pxi":110 * * property e: * def __get__(self): # <<<<<<<<<<<<<< @@ -7512,7 +7537,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":111 + /* "grammar.pxi":111 * property e: * def __get__(self): * cdef vector[WordID]* e_ = &self.rule.get().e_ # <<<<<<<<<<<<<< @@ -7521,7 +7546,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ */ __pyx_v_e_ = (&__pyx_v_self->rule->get()->e_); - /* "cdec/grammar.pxi":113 + /* "grammar.pxi":113 * cdef vector[WordID]* e_ = &self.rule.get().e_ * cdef WordID w * cdef e = [] # <<<<<<<<<<<<<< @@ -7533,7 +7558,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ __pyx_v_e = __pyx_t_1; __pyx_t_1 = 0; - /* "cdec/grammar.pxi":115 + /* "grammar.pxi":115 * cdef e = [] * cdef unsigned i * cdef int idx = 0 # <<<<<<<<<<<<<< @@ -7542,7 +7567,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ */ __pyx_v_idx = 0; - /* "cdec/grammar.pxi":116 + /* "grammar.pxi":116 * cdef unsigned i * cdef int idx = 0 * for i in range(e_.size()): # <<<<<<<<<<<<<< @@ -7553,7 +7578,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/grammar.pxi":117 + /* "grammar.pxi":117 * cdef int idx = 0 * for i in range(e_.size()): * w = e_[0][i] # <<<<<<<<<<<<<< @@ -7562,7 +7587,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ */ __pyx_v_w = ((__pyx_v_e_[0])[__pyx_v_i]); - /* "cdec/grammar.pxi":118 + /* "grammar.pxi":118 * for i in range(e_.size()): * w = e_[0][i] * if w < 1: # <<<<<<<<<<<<<< @@ -7572,7 +7597,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ __pyx_t_4 = ((__pyx_v_w < 1) != 0); if (__pyx_t_4) { - /* "cdec/grammar.pxi":119 + /* "grammar.pxi":119 * w = e_[0][i] * if w < 1: * idx += 1 # <<<<<<<<<<<<<< @@ -7581,7 +7606,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ */ __pyx_v_idx = (__pyx_v_idx + 1); - /* "cdec/grammar.pxi":120 + /* "grammar.pxi":120 * if w < 1: * idx += 1 * e.append(NTRef(1-w)) # <<<<<<<<<<<<<< @@ -7604,7 +7629,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ } /*else*/ { - /* "cdec/grammar.pxi":122 + /* "grammar.pxi":122 * e.append(NTRef(1-w)) * else: * e.append(unicode(TDConvert(w).c_str(), encoding='utf8')) # <<<<<<<<<<<<<< @@ -7631,7 +7656,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ __pyx_L5:; } - /* "cdec/grammar.pxi":123 + /* "grammar.pxi":123 * else: * e.append(unicode(TDConvert(w).c_str(), encoding='utf8')) * return e # <<<<<<<<<<<<<< @@ -7643,7 +7668,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ __pyx_r = __pyx_v_e; goto __pyx_L0; - /* "cdec/grammar.pxi":110 + /* "grammar.pxi":110 * * property e: * def __get__(self): # <<<<<<<<<<<<<< @@ -7665,7 +7690,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_1e___get__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/grammar.pxi":125 +/* "grammar.pxi":125 * return e * * def __set__(self, e): # <<<<<<<<<<<<<< @@ -7705,7 +7730,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - /* "cdec/grammar.pxi":126 + /* "grammar.pxi":126 * * def __set__(self, e): * cdef vector[WordID]* e_ = &self.rule.get().e_ # <<<<<<<<<<<<<< @@ -7714,7 +7739,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde */ __pyx_v_e_ = (&__pyx_v_self->rule->get()->e_); - /* "cdec/grammar.pxi":127 + /* "grammar.pxi":127 * def __set__(self, e): * cdef vector[WordID]* e_ = &self.rule.get().e_ * e_.resize(len(e)) # <<<<<<<<<<<<<< @@ -7722,9 +7747,14 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde * for i in range(len(e)): */ __pyx_t_1 = PyObject_Length(__pyx_v_e); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_e_->resize(__pyx_t_1); + try { + __pyx_v_e_->resize(__pyx_t_1); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[2]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/grammar.pxi":129 + /* "grammar.pxi":129 * e_.resize(len(e)) * cdef unsigned i * for i in range(len(e)): # <<<<<<<<<<<<<< @@ -7735,7 +7765,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "cdec/grammar.pxi":130 + /* "grammar.pxi":130 * cdef unsigned i * for i in range(len(e)): * if isinstance(e[i], NTRef): # <<<<<<<<<<<<<< @@ -7749,7 +7779,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "cdec/grammar.pxi":131 + /* "grammar.pxi":131 * for i in range(len(e)): * if isinstance(e[i], NTRef): * e_[0][i] = 1-e[i].ref # <<<<<<<<<<<<<< @@ -7771,7 +7801,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde } /*else*/ { - /* "cdec/grammar.pxi":133 + /* "grammar.pxi":133 * e_[0][i] = 1-e[i].ref * else: * ei = as_str(e[i]) # <<<<<<<<<<<<<< @@ -7786,7 +7816,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde __Pyx_XDECREF_SET(__pyx_v_ei, ((PyObject*)__pyx_t_6)); __pyx_t_6 = 0; - /* "cdec/grammar.pxi":134 + /* "grammar.pxi":134 * else: * ei = as_str(e[i]) * e_[0][i] = TDConvert(ei) # <<<<<<<<<<<<<< @@ -7799,7 +7829,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde __pyx_L5:; } - /* "cdec/grammar.pxi":125 + /* "grammar.pxi":125 * return e * * def __set__(self, e): # <<<<<<<<<<<<<< @@ -7822,7 +7852,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1e_2__set__(struct __pyx_obj_4cdec_5_cde } static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/grammar.pxi":137 +/* "grammar.pxi":137 * * property a: * def __get__(self): # <<<<<<<<<<<<<< @@ -7901,7 +7931,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObje __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/grammar.pxi":139 + /* "grammar.pxi":139 * def __get__(self): * cdef unsigned i * cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_ # <<<<<<<<<<<<<< @@ -7910,7 +7940,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObje */ __pyx_cur_scope->__pyx_v_a = (&__pyx_cur_scope->__pyx_v_self->rule->get()->a_); - /* "cdec/grammar.pxi":140 + /* "grammar.pxi":140 * cdef unsigned i * cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_ * for i in range(a.size()): # <<<<<<<<<<<<<< @@ -7921,7 +7951,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObje for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/grammar.pxi":141 + /* "grammar.pxi":141 * cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_ * for i in range(a.size()): * yield (a[0][i].s_, a[0][i].t_) # <<<<<<<<<<<<<< @@ -7955,7 +7985,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObje if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/grammar.pxi":137 + /* "grammar.pxi":137 * * property a: * def __get__(self): # <<<<<<<<<<<<<< @@ -7979,7 +8009,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_1a_2generator2(__pyx_GeneratorObje return NULL; } -/* "cdec/grammar.pxi":143 +/* "grammar.pxi":143 * yield (a[0][i].s_, a[0][i].t_) * * def __set__(self, a): # <<<<<<<<<<<<<< @@ -8021,7 +8051,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - /* "cdec/grammar.pxi":144 + /* "grammar.pxi":144 * * def __set__(self, a): * cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_ # <<<<<<<<<<<<<< @@ -8030,7 +8060,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde */ __pyx_v_a_ = (&__pyx_v_self->rule->get()->a_); - /* "cdec/grammar.pxi":145 + /* "grammar.pxi":145 * def __set__(self, a): * cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_ * a_.resize(len(a)) # <<<<<<<<<<<<<< @@ -8038,9 +8068,14 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde * cdef int s, t */ __pyx_t_1 = PyObject_Length(__pyx_v_a); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_a_->resize(__pyx_t_1); + try { + __pyx_v_a_->resize(__pyx_t_1); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[2]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/grammar.pxi":148 + /* "grammar.pxi":148 * cdef unsigned i * cdef int s, t * for i in range(len(a)): # <<<<<<<<<<<<<< @@ -8051,7 +8086,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "cdec/grammar.pxi":149 + /* "grammar.pxi":149 * cdef int s, t * for i in range(len(a)): * s, t = a[i] # <<<<<<<<<<<<<< @@ -8117,7 +8152,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde __pyx_v_s = __pyx_t_8; __pyx_v_t = __pyx_t_9; - /* "cdec/grammar.pxi":150 + /* "grammar.pxi":150 * for i in range(len(a)): * s, t = a[i] * a_[0][i] = grammar.AlignmentPoint(s, t) # <<<<<<<<<<<<<< @@ -8127,7 +8162,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde ((__pyx_v_a_[0])[__pyx_v_i]) = AlignmentPoint(__pyx_v_s, __pyx_v_t); } - /* "cdec/grammar.pxi":143 + /* "grammar.pxi":143 * yield (a[0][i].s_, a[0][i].t_) * * def __set__(self, a): # <<<<<<<<<<<<<< @@ -8150,7 +8185,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_1a_3__set__(struct __pyx_obj_4cdec_5_cde return __pyx_r; } -/* "cdec/grammar.pxi":153 +/* "grammar.pxi":153 * * property scores: * def __get__(self): # <<<<<<<<<<<<<< @@ -8181,7 +8216,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_6scores___get__(struct __pyx_obj_4 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":154 + /* "grammar.pxi":154 * property scores: * def __get__(self): * cdef SparseVector scores = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -8194,7 +8229,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_6scores___get__(struct __pyx_obj_4 __pyx_v_scores = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":155 + /* "grammar.pxi":155 * def __get__(self): * cdef SparseVector scores = SparseVector.__new__(SparseVector) * scores.vector = new FastSparseVector[double](self.rule.get().scores_) # <<<<<<<<<<<<<< @@ -8203,7 +8238,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_6scores___get__(struct __pyx_obj_4 */ __pyx_v_scores->vector = new FastSparseVector<double> (__pyx_v_self->rule->get()->scores_); - /* "cdec/grammar.pxi":156 + /* "grammar.pxi":156 * cdef SparseVector scores = SparseVector.__new__(SparseVector) * scores.vector = new FastSparseVector[double](self.rule.get().scores_) * return scores # <<<<<<<<<<<<<< @@ -8215,7 +8250,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_6scores___get__(struct __pyx_obj_4 __pyx_r = ((PyObject *)__pyx_v_scores); goto __pyx_L0; - /* "cdec/grammar.pxi":153 + /* "grammar.pxi":153 * * property scores: * def __get__(self): # <<<<<<<<<<<<<< @@ -8235,7 +8270,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_6scores___get__(struct __pyx_obj_4 return __pyx_r; } -/* "cdec/grammar.pxi":158 +/* "grammar.pxi":158 * return scores * * def __set__(self, scores): # <<<<<<<<<<<<<< @@ -8280,7 +8315,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - /* "cdec/grammar.pxi":159 + /* "grammar.pxi":159 * * def __set__(self, scores): * cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_ # <<<<<<<<<<<<<< @@ -8289,7 +8324,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ */ __pyx_v_scores_ = (&__pyx_v_self->rule->get()->scores_); - /* "cdec/grammar.pxi":160 + /* "grammar.pxi":160 * def __set__(self, scores): * cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_ * scores_.clear() # <<<<<<<<<<<<<< @@ -8298,7 +8333,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ */ __pyx_v_scores_->clear(); - /* "cdec/grammar.pxi":163 + /* "grammar.pxi":163 * cdef int fid * cdef float fval * for fname, fval in scores.items(): # <<<<<<<<<<<<<< @@ -8419,7 +8454,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ __pyx_t_3 = 0; __pyx_v_fval = __pyx_t_9; - /* "cdec/grammar.pxi":164 + /* "grammar.pxi":164 * cdef float fval * for fname, fval in scores.items(): * fn = as_str(fname) # <<<<<<<<<<<<<< @@ -8431,7 +8466,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ __Pyx_XDECREF_SET(__pyx_v_fn, ((PyObject*)__pyx_t_1)); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":165 + /* "grammar.pxi":165 * for fname, fval in scores.items(): * fn = as_str(fname) * fid = FDConvert(fn) # <<<<<<<<<<<<<< @@ -8441,7 +8476,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ __pyx_t_10 = __Pyx_PyObject_AsString(__pyx_v_fn); if (unlikely((!__pyx_t_10) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_fid = FD::Convert(__pyx_t_10); - /* "cdec/grammar.pxi":166 + /* "grammar.pxi":166 * fn = as_str(fname) * fid = FDConvert(fn) * if fid < 0: raise KeyError(fname) # <<<<<<<<<<<<<< @@ -8463,7 +8498,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ {__pyx_filename = __pyx_f[2]; __pyx_lineno = 166; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/grammar.pxi":167 + /* "grammar.pxi":167 * fid = FDConvert(fn) * if fid < 0: raise KeyError(fname) * scores_.set_value(fid, fval) # <<<<<<<<<<<<<< @@ -8472,7 +8507,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ */ __pyx_v_scores_->set_value(__pyx_v_fid, __pyx_v_fval); - /* "cdec/grammar.pxi":163 + /* "grammar.pxi":163 * cdef int fid * cdef float fval * for fname, fval in scores.items(): # <<<<<<<<<<<<<< @@ -8482,7 +8517,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "cdec/grammar.pxi":158 + /* "grammar.pxi":158 * return scores * * def __set__(self, scores): # <<<<<<<<<<<<<< @@ -8508,7 +8543,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_6scores_2__set__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/grammar.pxi":170 +/* "grammar.pxi":170 * * property lhs: * def __get__(self): # <<<<<<<<<<<<<< @@ -8539,7 +8574,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_3lhs___get__(struct __pyx_obj_4cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":171 + /* "grammar.pxi":171 * property lhs: * def __get__(self): * return NT(TDConvert(-self.rule.get().lhs_).c_str()) # <<<<<<<<<<<<<< @@ -8561,7 +8596,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_3lhs___get__(struct __pyx_obj_4cde __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":170 + /* "grammar.pxi":170 * * property lhs: * def __get__(self): # <<<<<<<<<<<<<< @@ -8581,7 +8616,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_3lhs___get__(struct __pyx_obj_4cde return __pyx_r; } -/* "cdec/grammar.pxi":173 +/* "grammar.pxi":173 * return NT(TDConvert(-self.rule.get().lhs_).c_str()) * * def __set__(self, lhs): # <<<<<<<<<<<<<< @@ -8616,7 +8651,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_4cdec_5_c __Pyx_RefNannySetupContext("__set__", 0); __Pyx_INCREF(__pyx_v_lhs); - /* "cdec/grammar.pxi":174 + /* "grammar.pxi":174 * * def __set__(self, lhs): * if not isinstance(lhs, NT): # <<<<<<<<<<<<<< @@ -8627,7 +8662,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_4cdec_5_c __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_2) { - /* "cdec/grammar.pxi":175 + /* "grammar.pxi":175 * def __set__(self, lhs): * if not isinstance(lhs, NT): * lhs = NT(lhs) # <<<<<<<<<<<<<< @@ -8648,7 +8683,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_4cdec_5_c } __pyx_L3:; - /* "cdec/grammar.pxi":176 + /* "grammar.pxi":176 * if not isinstance(lhs, NT): * lhs = NT(lhs) * self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) # <<<<<<<<<<<<<< @@ -8658,7 +8693,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_4cdec_5_c __pyx_t_5 = __Pyx_PyObject_AsString(((struct __pyx_obj_4cdec_5_cdec_NT *)__pyx_v_lhs)->cat); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->rule->get()->lhs_ = (-TD::Convert(__pyx_t_5)); - /* "cdec/grammar.pxi":173 + /* "grammar.pxi":173 * return NT(TDConvert(-self.rule.get().lhs_).c_str()) * * def __set__(self, lhs): # <<<<<<<<<<<<<< @@ -8680,7 +8715,7 @@ static int __pyx_pf_4cdec_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_4cdec_5_c return __pyx_r; } -/* "cdec/grammar.pxi":178 +/* "grammar.pxi":178 * self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) * * def __str__(self): # <<<<<<<<<<<<<< @@ -8702,7 +8737,7 @@ static PyObject *__pyx_pw_4cdec_5_cdec_5TRule_5__str__(PyObject *__pyx_v_self) { } static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_7__str___2generator23(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/grammar.pxi":179 +/* "grammar.pxi":179 * * def __str__(self): * scores = ' '.join('%s=%s' % feat for feat in self.scores) # <<<<<<<<<<<<<< @@ -8849,7 +8884,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_5TRule_7__str___2generator23(__pyx_Genera return NULL; } -/* "cdec/grammar.pxi":178 +/* "grammar.pxi":178 * self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) * * def __str__(self): # <<<<<<<<<<<<<< @@ -8883,7 +8918,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - /* "cdec/grammar.pxi":179 + /* "grammar.pxi":179 * * def __str__(self): * scores = ' '.join('%s=%s' % feat for feat in self.scores) # <<<<<<<<<<<<<< @@ -8898,7 +8933,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ __pyx_v_scores = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0; - /* "cdec/grammar.pxi":180 + /* "grammar.pxi":180 * def __str__(self): * scores = ' '.join('%s=%s' % feat for feat in self.scores) * return '%s ||| %s ||| %s ||| %s' % (self.lhs, # <<<<<<<<<<<<<< @@ -8909,7 +8944,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_lhs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - /* "cdec/grammar.pxi":181 + /* "grammar.pxi":181 * scores = ' '.join('%s=%s' % feat for feat in self.scores) * return '%s ||| %s ||| %s ||| %s' % (self.lhs, * _phrase(self.f), _phrase(self.e), scores) # <<<<<<<<<<<<<< @@ -8977,7 +9012,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "cdec/grammar.pxi":180 + /* "grammar.pxi":180 * def __str__(self): * scores = ' '.join('%s=%s' % feat for feat in self.scores) * return '%s ||| %s ||| %s ||| %s' % (self.lhs, # <<<<<<<<<<<<<< @@ -9005,7 +9040,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ __pyx_t_3 = 0; goto __pyx_L0; - /* "cdec/grammar.pxi":178 + /* "grammar.pxi":178 * self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) * * def __str__(self): # <<<<<<<<<<<<<< @@ -9032,7 +9067,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_5TRule_4__str__(struct __pyx_obj_4cdec_5_ return __pyx_r; } -/* "cdec/grammar.pxi":184 +/* "grammar.pxi":184 * * cdef class MRule(TRule): * def __init__(self, lhs, rhs, scores): # <<<<<<<<<<<<<< @@ -9134,7 +9169,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/grammar.pxi":189 + /* "grammar.pxi":189 * rhs: right hand side phrase (list of words/NT) * scores: dictionary of feature scores""" * cdef unsigned i = 1 # <<<<<<<<<<<<<< @@ -9143,7 +9178,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M */ __pyx_v_i = 1; - /* "cdec/grammar.pxi":190 + /* "grammar.pxi":190 * scores: dictionary of feature scores""" * cdef unsigned i = 1 * e = [] # <<<<<<<<<<<<<< @@ -9155,7 +9190,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M __pyx_v_e = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":191 + /* "grammar.pxi":191 * cdef unsigned i = 1 * e = [] * for s in rhs: # <<<<<<<<<<<<<< @@ -9202,7 +9237,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M __Pyx_XDECREF_SET(__pyx_v_s, __pyx_t_4); __pyx_t_4 = 0; - /* "cdec/grammar.pxi":192 + /* "grammar.pxi":192 * e = [] * for s in rhs: * if isinstance(s, NT): # <<<<<<<<<<<<<< @@ -9213,7 +9248,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "cdec/grammar.pxi":193 + /* "grammar.pxi":193 * for s in rhs: * if isinstance(s, NT): * e.append(NTRef(i)) # <<<<<<<<<<<<<< @@ -9233,7 +9268,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_e, __pyx_t_4); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/grammar.pxi":194 + /* "grammar.pxi":194 * if isinstance(s, NT): * e.append(NTRef(i)) * i += 1 # <<<<<<<<<<<<<< @@ -9245,7 +9280,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M } /*else*/ { - /* "cdec/grammar.pxi":196 + /* "grammar.pxi":196 * i += 1 * else: * e.append(s) # <<<<<<<<<<<<<< @@ -9256,7 +9291,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M } __pyx_L5:; - /* "cdec/grammar.pxi":191 + /* "grammar.pxi":191 * cdef unsigned i = 1 * e = [] * for s in rhs: # <<<<<<<<<<<<<< @@ -9266,7 +9301,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":197 + /* "grammar.pxi":197 * else: * e.append(s) * super(MRule, self).__init__(lhs, rhs, e, scores, None) # <<<<<<<<<<<<<< @@ -9325,7 +9360,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":184 + /* "grammar.pxi":184 * * cdef class MRule(TRule): * def __init__(self, lhs, rhs, scores): # <<<<<<<<<<<<<< @@ -9350,7 +9385,7 @@ static int __pyx_pf_4cdec_5_cdec_5MRule___init__(struct __pyx_obj_4cdec_5_cdec_M return __pyx_r; } -/* "cdec/grammar.pxi":202 +/* "grammar.pxi":202 * cdef shared_ptr[grammar.Grammar]* grammar * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -9373,7 +9408,7 @@ static void __pyx_pf_4cdec_5_cdec_7Grammar___dealloc__(struct __pyx_obj_4cdec_5_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/grammar.pxi":203 + /* "grammar.pxi":203 * * def __dealloc__(self): * del self.grammar # <<<<<<<<<<<<<< @@ -9382,7 +9417,7 @@ static void __pyx_pf_4cdec_5_cdec_7Grammar___dealloc__(struct __pyx_obj_4cdec_5_ */ delete __pyx_v_self->grammar; - /* "cdec/grammar.pxi":202 + /* "grammar.pxi":202 * cdef shared_ptr[grammar.Grammar]* grammar * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -9395,7 +9430,7 @@ static void __pyx_pf_4cdec_5_cdec_7Grammar___dealloc__(struct __pyx_obj_4cdec_5_ } static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/grammar.pxi":205 +/* "grammar.pxi":205 * del self.grammar * * def __iter__(self): # <<<<<<<<<<<<<< @@ -9472,7 +9507,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/grammar.pxi":206 + /* "grammar.pxi":206 * * def __iter__(self): * cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot() # <<<<<<<<<<<<<< @@ -9481,7 +9516,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec */ __pyx_cur_scope->__pyx_v_root = __pyx_cur_scope->__pyx_v_self->grammar->get()->GetRoot(); - /* "cdec/grammar.pxi":207 + /* "grammar.pxi":207 * def __iter__(self): * cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot() * cdef grammar.const_RuleBin* rbin = root.GetRules() # <<<<<<<<<<<<<< @@ -9490,7 +9525,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec */ __pyx_cur_scope->__pyx_v_rbin = __pyx_cur_scope->__pyx_v_root->GetRules(); - /* "cdec/grammar.pxi":210 + /* "grammar.pxi":210 * cdef TRule trule * cdef unsigned i * for i in range(rbin.GetNumRules()): # <<<<<<<<<<<<<< @@ -9501,7 +9536,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/grammar.pxi":211 + /* "grammar.pxi":211 * cdef unsigned i * for i in range(rbin.GetNumRules()): * trule = TRule.__new__(TRule) # <<<<<<<<<<<<<< @@ -9516,7 +9551,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/grammar.pxi":212 + /* "grammar.pxi":212 * for i in range(rbin.GetNumRules()): * trule = TRule.__new__(TRule) * trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i)) # <<<<<<<<<<<<<< @@ -9525,7 +9560,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec */ __pyx_cur_scope->__pyx_v_trule->rule = new boost::shared_ptr<TRule> (__pyx_cur_scope->__pyx_v_rbin->GetIthRule(__pyx_cur_scope->__pyx_v_i)); - /* "cdec/grammar.pxi":213 + /* "grammar.pxi":213 * trule = TRule.__new__(TRule) * trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i)) * yield trule # <<<<<<<<<<<<<< @@ -9547,7 +9582,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/grammar.pxi":205 + /* "grammar.pxi":205 * del self.grammar * * def __iter__(self): # <<<<<<<<<<<<<< @@ -9569,7 +9604,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Grammar_4generator3(__pyx_GeneratorObjec return NULL; } -/* "cdec/grammar.pxi":216 +/* "grammar.pxi":216 * * property name: * def __get__(self): # <<<<<<<<<<<<<< @@ -9600,7 +9635,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Grammar_4name___get__(struct __pyx_obj_4 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/grammar.pxi":217 + /* "grammar.pxi":217 * property name: * def __get__(self): * str(self.grammar.get().GetGrammarName().c_str()) # <<<<<<<<<<<<<< @@ -9619,7 +9654,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Grammar_4name___get__(struct __pyx_obj_4 __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":216 + /* "grammar.pxi":216 * * property name: * def __get__(self): # <<<<<<<<<<<<<< @@ -9641,7 +9676,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Grammar_4name___get__(struct __pyx_obj_4 return __pyx_r; } -/* "cdec/grammar.pxi":219 +/* "grammar.pxi":219 * str(self.grammar.get().GetGrammarName().c_str()) * * def __set__(self, name): # <<<<<<<<<<<<<< @@ -9673,7 +9708,7 @@ static int __pyx_pf_4cdec_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_4cdec_ __Pyx_RefNannySetupContext("__set__", 0); __Pyx_INCREF(__pyx_v_name); - /* "cdec/grammar.pxi":220 + /* "grammar.pxi":220 * * def __set__(self, name): * name = as_str(name) # <<<<<<<<<<<<<< @@ -9685,17 +9720,17 @@ static int __pyx_pf_4cdec_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_4cdec_ __Pyx_DECREF_SET(__pyx_v_name, __pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":221 + /* "grammar.pxi":221 * def __set__(self, name): * name = as_str(name) * self.grammar.get().SetGrammarName(name) # <<<<<<<<<<<<<< * * cdef class TextGrammar(Grammar): */ - __pyx_t_2 = __pyx_convert_string_from_py_(__pyx_v_name); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __pyx_convert_string_from_py_std__in_string(__pyx_v_name); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->grammar->get()->SetGrammarName(__pyx_t_2); - /* "cdec/grammar.pxi":219 + /* "grammar.pxi":219 * str(self.grammar.get().GetGrammarName().c_str()) * * def __set__(self, name): # <<<<<<<<<<<<<< @@ -9716,7 +9751,7 @@ static int __pyx_pf_4cdec_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/grammar.pxi":224 +/* "grammar.pxi":224 * * cdef class TextGrammar(Grammar): * def __init__(self, rules): # <<<<<<<<<<<<<< @@ -9796,7 +9831,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/grammar.pxi":226 + /* "grammar.pxi":226 * def __init__(self, rules): * """TextGrammar(rules) -> SCFG Grammar containing the rules.""" * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) # <<<<<<<<<<<<<< @@ -9805,7 +9840,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 */ __pyx_v_self->__pyx_base.grammar = new boost::shared_ptr<Grammar> (new TextGrammar()); - /* "cdec/grammar.pxi":227 + /* "grammar.pxi":227 * """TextGrammar(rules) -> SCFG Grammar containing the rules.""" * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) * cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get() # <<<<<<<<<<<<<< @@ -9814,7 +9849,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 */ __pyx_v__g = ((TextGrammar *)__pyx_v_self->__pyx_base.grammar->get()); - /* "cdec/grammar.pxi":228 + /* "grammar.pxi":228 * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) * cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get() * for trule in rules: # <<<<<<<<<<<<<< @@ -9861,7 +9896,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 __Pyx_XDECREF_SET(__pyx_v_trule, __pyx_t_4); __pyx_t_4 = 0; - /* "cdec/grammar.pxi":229 + /* "grammar.pxi":229 * cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get() * for trule in rules: * if isinstance(trule, _sa.Rule): # <<<<<<<<<<<<<< @@ -9872,7 +9907,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "cdec/grammar.pxi":230 + /* "grammar.pxi":230 * for trule in rules: * if isinstance(trule, _sa.Rule): * trule = convert_rule(trule) # <<<<<<<<<<<<<< @@ -9887,7 +9922,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 goto __pyx_L5; } - /* "cdec/grammar.pxi":231 + /* "grammar.pxi":231 * if isinstance(trule, _sa.Rule): * trule = convert_rule(trule) * elif not isinstance(trule, TRule): # <<<<<<<<<<<<<< @@ -9898,7 +9933,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 __pyx_t_5 = ((!(__pyx_t_6 != 0)) != 0); if (__pyx_t_5) { - /* "cdec/grammar.pxi":232 + /* "grammar.pxi":232 * trule = convert_rule(trule) * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< @@ -9912,14 +9947,14 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 } __pyx_L5:; - /* "cdec/grammar.pxi":233 + /* "grammar.pxi":233 * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') * _g.AddRule((<TRule> trule).rule[0]) # <<<<<<<<<<<<<< */ __pyx_v__g->AddRule((((struct __pyx_obj_4cdec_5_cdec_TRule *)__pyx_v_trule)->rule[0])); - /* "cdec/grammar.pxi":228 + /* "grammar.pxi":228 * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) * cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get() * for trule in rules: # <<<<<<<<<<<<<< @@ -9929,7 +9964,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/grammar.pxi":224 + /* "grammar.pxi":224 * * cdef class TextGrammar(Grammar): * def __init__(self, rules): # <<<<<<<<<<<<<< @@ -9951,7 +9986,7 @@ static int __pyx_pf_4cdec_5_cdec_11TextGrammar___init__(struct __pyx_obj_4cdec_5 return __pyx_r; } -/* "cdec/hypergraph.pxi":8 +/* "hypergraph.pxi":8 * cdef MT19937* rng * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -9975,7 +10010,7 @@ static void __pyx_pf_4cdec_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_4cde int __pyx_t_1; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/hypergraph.pxi":9 + /* "hypergraph.pxi":9 * * def __dealloc__(self): * del self.hg # <<<<<<<<<<<<<< @@ -9984,7 +10019,7 @@ static void __pyx_pf_4cdec_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_4cde */ delete __pyx_v_self->hg; - /* "cdec/hypergraph.pxi":10 + /* "hypergraph.pxi":10 * def __dealloc__(self): * del self.hg * if self.rng != NULL: # <<<<<<<<<<<<<< @@ -9994,7 +10029,7 @@ static void __pyx_pf_4cdec_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_4cde __pyx_t_1 = ((__pyx_v_self->rng != NULL) != 0); if (__pyx_t_1) { - /* "cdec/hypergraph.pxi":11 + /* "hypergraph.pxi":11 * del self.hg * if self.rng != NULL: * del self.rng # <<<<<<<<<<<<<< @@ -10006,7 +10041,7 @@ static void __pyx_pf_4cdec_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_4cde } __pyx_L3:; - /* "cdec/hypergraph.pxi":8 + /* "hypergraph.pxi":8 * cdef MT19937* rng * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -10018,7 +10053,7 @@ static void __pyx_pf_4cdec_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_4cde __Pyx_RefNannyFinishContext(); } -/* "cdec/hypergraph.pxi":13 +/* "hypergraph.pxi":13 * del self.rng * * cdef MT19937* _rng(self): # <<<<<<<<<<<<<< @@ -10036,7 +10071,7 @@ static MT19937 *__pyx_f_4cdec_5_cdec_10Hypergraph__rng(struct __pyx_obj_4cdec_5_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_rng", 0); - /* "cdec/hypergraph.pxi":14 + /* "hypergraph.pxi":14 * * cdef MT19937* _rng(self): * if self.rng == NULL: # <<<<<<<<<<<<<< @@ -10046,7 +10081,7 @@ static MT19937 *__pyx_f_4cdec_5_cdec_10Hypergraph__rng(struct __pyx_obj_4cdec_5_ __pyx_t_1 = ((__pyx_v_self->rng == NULL) != 0); if (__pyx_t_1) { - /* "cdec/hypergraph.pxi":15 + /* "hypergraph.pxi":15 * cdef MT19937* _rng(self): * if self.rng == NULL: * self.rng = new MT19937() # <<<<<<<<<<<<<< @@ -10064,7 +10099,7 @@ static MT19937 *__pyx_f_4cdec_5_cdec_10Hypergraph__rng(struct __pyx_obj_4cdec_5_ } __pyx_L3:; - /* "cdec/hypergraph.pxi":16 + /* "hypergraph.pxi":16 * if self.rng == NULL: * self.rng = new MT19937() * return self.rng # <<<<<<<<<<<<<< @@ -10074,7 +10109,7 @@ static MT19937 *__pyx_f_4cdec_5_cdec_10Hypergraph__rng(struct __pyx_obj_4cdec_5_ __pyx_r = __pyx_v_self->rng; goto __pyx_L0; - /* "cdec/hypergraph.pxi":13 + /* "hypergraph.pxi":13 * del self.rng * * cdef MT19937* _rng(self): # <<<<<<<<<<<<<< @@ -10091,7 +10126,7 @@ static MT19937 *__pyx_f_4cdec_5_cdec_10Hypergraph__rng(struct __pyx_obj_4cdec_5_ return __pyx_r; } -/* "cdec/hypergraph.pxi":18 +/* "hypergraph.pxi":18 * return self.rng * * def viterbi(self): # <<<<<<<<<<<<<< @@ -10124,7 +10159,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_4c int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi", 0); - /* "cdec/hypergraph.pxi":21 + /* "hypergraph.pxi":21 * """hg.viterbi() -> String for the best hypothesis in the hypergraph.""" * cdef vector[WordID] trans * hypergraph.ViterbiESentence(self.hg[0], &trans) # <<<<<<<<<<<<<< @@ -10133,7 +10168,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_4c */ ViterbiESentence((__pyx_v_self->hg[0]), (&__pyx_v_trans)); - /* "cdec/hypergraph.pxi":22 + /* "hypergraph.pxi":22 * cdef vector[WordID] trans * hypergraph.ViterbiESentence(self.hg[0], &trans) * return unicode(GetString(trans).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10158,7 +10193,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_4c __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":18 + /* "hypergraph.pxi":18 * return self.rng * * def viterbi(self): # <<<<<<<<<<<<<< @@ -10178,7 +10213,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_4c return __pyx_r; } -/* "cdec/hypergraph.pxi":24 +/* "hypergraph.pxi":24 * return unicode(GetString(trans).c_str(), 'utf8') * * def viterbi_trees(self): # <<<<<<<<<<<<<< @@ -10212,7 +10247,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi_trees", 0); - /* "cdec/hypergraph.pxi":29 + /* "hypergraph.pxi":29 * e_tree: Target tree for the best hypothesis in the hypergraph. * """ * f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10235,7 +10270,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_ __pyx_v_f_tree = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":30 + /* "hypergraph.pxi":30 * """ * f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8') * e_tree = unicode(hypergraph.ViterbiETree(self.hg[0]).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10258,7 +10293,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_ __pyx_v_e_tree = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":31 + /* "hypergraph.pxi":31 * f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8') * e_tree = unicode(hypergraph.ViterbiETree(self.hg[0]).c_str(), 'utf8') * return (f_tree, e_tree) # <<<<<<<<<<<<<< @@ -10278,7 +10313,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_ __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":24 + /* "hypergraph.pxi":24 * return unicode(GetString(trans).c_str(), 'utf8') * * def viterbi_trees(self): # <<<<<<<<<<<<<< @@ -10300,7 +10335,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_ return __pyx_r; } -/* "cdec/hypergraph.pxi":33 +/* "hypergraph.pxi":33 * return (f_tree, e_tree) * * def viterbi_features(self): # <<<<<<<<<<<<<< @@ -10332,7 +10367,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6viterbi_features(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi_features", 0); - /* "cdec/hypergraph.pxi":36 + /* "hypergraph.pxi":36 * """hg.viterbi_features() -> SparseVector with the features corresponding * to the best derivation in the hypergraph.""" * cdef SparseVector fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -10345,7 +10380,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6viterbi_features(struct __p __pyx_v_fmap = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":37 + /* "hypergraph.pxi":37 * to the best derivation in the hypergraph.""" * cdef SparseVector fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0])) # <<<<<<<<<<<<<< @@ -10354,7 +10389,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6viterbi_features(struct __p */ __pyx_v_fmap->vector = new FastSparseVector<weight_t> (ViterbiFeatures((__pyx_v_self->hg[0]))); - /* "cdec/hypergraph.pxi":38 + /* "hypergraph.pxi":38 * cdef SparseVector fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0])) * return fmap # <<<<<<<<<<<<<< @@ -10366,7 +10401,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6viterbi_features(struct __p __pyx_r = ((PyObject *)__pyx_v_fmap); goto __pyx_L0; - /* "cdec/hypergraph.pxi":33 + /* "hypergraph.pxi":33 * return (f_tree, e_tree) * * def viterbi_features(self): # <<<<<<<<<<<<<< @@ -10386,7 +10421,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6viterbi_features(struct __p return __pyx_r; } -/* "cdec/hypergraph.pxi":40 +/* "hypergraph.pxi":40 * return fmap * * def viterbi_forest(self): # <<<<<<<<<<<<<< @@ -10417,7 +10452,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi_forest", 0); - /* "cdec/hypergraph.pxi":41 + /* "hypergraph.pxi":41 * * def viterbi_forest(self): * cdef Hypergraph hg = Hypergraph() # <<<<<<<<<<<<<< @@ -10429,7 +10464,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx __pyx_v_hg = ((struct __pyx_obj_4cdec_5_cdec_Hypergraph *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":42 + /* "hypergraph.pxi":42 * def viterbi_forest(self): * cdef Hypergraph hg = Hypergraph() * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) # <<<<<<<<<<<<<< @@ -10438,7 +10473,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx */ __pyx_v_hg->hg = new Hypergraph(((__pyx_v_self->hg[0]).CreateViterbiHypergraph(NULL).get()[0])); - /* "cdec/hypergraph.pxi":43 + /* "hypergraph.pxi":43 * cdef Hypergraph hg = Hypergraph() * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) * return hg # <<<<<<<<<<<<<< @@ -10450,7 +10485,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx __pyx_r = ((PyObject *)__pyx_v_hg); goto __pyx_L0; - /* "cdec/hypergraph.pxi":40 + /* "hypergraph.pxi":40 * return fmap * * def viterbi_forest(self): # <<<<<<<<<<<<<< @@ -10470,7 +10505,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx return __pyx_r; } -/* "cdec/hypergraph.pxi":45 +/* "hypergraph.pxi":45 * return hg * * def viterbi_joshua(self): # <<<<<<<<<<<<<< @@ -10502,7 +10537,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_10viterbi_joshua(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi_joshua", 0); - /* "cdec/hypergraph.pxi":47 + /* "hypergraph.pxi":47 * def viterbi_joshua(self): * """hg.viterbi_joshua() -> Joshua representation of the best derivation.""" * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10527,7 +10562,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_10viterbi_joshua(struct __py __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":45 + /* "hypergraph.pxi":45 * return hg * * def viterbi_joshua(self): # <<<<<<<<<<<<<< @@ -10548,7 +10583,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_10viterbi_joshua(struct __py } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":49 +/* "hypergraph.pxi":49 * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') * * def kbest(self, size): # <<<<<<<<<<<<<< @@ -10640,7 +10675,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":51 + /* "hypergraph.pxi":51 * def kbest(self, size): * """hg.kbest(size) -> List of k-best hypotheses in the hypergraph.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.NoFilter[vector[int]]]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.NoFilter[vector[int]]](self.hg[0], size) # <<<<<<<<<<<<<< @@ -10650,7 +10685,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<std::vector<WordID> ,ESentenceTraversal,KBest::NoFilter<std::vector<int> > > ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":54 + /* "hypergraph.pxi":54 * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.NoFilter[vector[int]]].Derivation* derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -10659,7 +10694,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":55 + /* "hypergraph.pxi":55 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -10670,7 +10705,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":56 + /* "hypergraph.pxi":56 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -10679,7 +10714,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":57 + /* "hypergraph.pxi":57 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -10691,7 +10726,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":58 + /* "hypergraph.pxi":58 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * yield unicode(GetString(derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10728,7 +10763,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":60 + /* "hypergraph.pxi":60 * yield unicode(GetString(derivation._yield).c_str(), 'utf8') * finally: * del derivations # <<<<<<<<<<<<<< @@ -10774,7 +10809,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":49 + /* "hypergraph.pxi":49 * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') * * def kbest(self, size): # <<<<<<<<<<<<<< @@ -10798,7 +10833,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_14generator4(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":62 +/* "hypergraph.pxi":62 * del derivations * * def kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -10891,7 +10926,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":64 + /* "hypergraph.pxi":64 * def kbest_trees(self, size): * """hg.kbest_trees(size) -> List of k-best trees in the hypergrapt.NoFilter.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.NoFilter[vector[int]]]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.NoFilter[vector[int]]](self.hg[0], size) # <<<<<<<<<<<<<< @@ -10901,7 +10936,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_f_derivations = new KBest::KBestDerivations<std::vector<WordID> ,FTreeTraversal,KBest::NoFilter<std::vector<int> > > ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":66 + /* "hypergraph.pxi":66 * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.NoFilter[vector[int]]]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.NoFilter[vector[int]]](self.hg[0], size) * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.NoFilter[vector[int]]].Derivation* f_derivation * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.NoFilter[vector[int]]]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.NoFilter[vector[int]]](self.hg[0], size) # <<<<<<<<<<<<<< @@ -10911,7 +10946,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_e_derivations = new KBest::KBestDerivations<std::vector<WordID> ,ETreeTraversal,KBest::NoFilter<std::vector<int> > > ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":69 + /* "hypergraph.pxi":69 * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.NoFilter[vector[int]]].Derivation* e_derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -10920,7 +10955,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":70 + /* "hypergraph.pxi":70 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -10931,7 +10966,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":71 + /* "hypergraph.pxi":71 * try: * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -10940,7 +10975,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator */ __pyx_cur_scope->__pyx_v_f_derivation = __pyx_cur_scope->__pyx_v_f_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":72 + /* "hypergraph.pxi":72 * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -10949,7 +10984,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator */ __pyx_cur_scope->__pyx_v_e_derivation = __pyx_cur_scope->__pyx_v_e_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":73 + /* "hypergraph.pxi":73 * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break # <<<<<<<<<<<<<< @@ -10958,12 +10993,10 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator */ __pyx_t_4 = ((!(__pyx_cur_scope->__pyx_v_f_derivation != 0)) != 0); if (!__pyx_t_4) { - goto __pyx_L11_next_or; } else { __pyx_t_3 = __pyx_t_4; goto __pyx_L10_bool_binop_done; } - __pyx_L11_next_or:; __pyx_t_4 = ((!(__pyx_cur_scope->__pyx_v_e_derivation != 0)) != 0); __pyx_t_3 = __pyx_t_4; __pyx_L10_bool_binop_done:; @@ -10971,7 +11004,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":74 + /* "hypergraph.pxi":74 * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -10996,7 +11029,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - /* "cdec/hypergraph.pxi":75 + /* "hypergraph.pxi":75 * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -11021,7 +11054,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - /* "cdec/hypergraph.pxi":76 + /* "hypergraph.pxi":76 * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') * yield (f_tree, e_tree) # <<<<<<<<<<<<<< @@ -11053,7 +11086,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":78 + /* "hypergraph.pxi":78 * yield (f_tree, e_tree) * finally: * del f_derivations # <<<<<<<<<<<<<< @@ -11064,7 +11097,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator /*normal exit:*/{ delete __pyx_cur_scope->__pyx_v_f_derivations; - /* "cdec/hypergraph.pxi":79 + /* "hypergraph.pxi":79 * finally: * del f_derivations * del e_derivations # <<<<<<<<<<<<<< @@ -11090,7 +11123,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_t_7 = __pyx_lineno; __pyx_t_8 = __pyx_clineno; __pyx_t_9 = __pyx_filename; { - /* "cdec/hypergraph.pxi":78 + /* "hypergraph.pxi":78 * yield (f_tree, e_tree) * finally: * del f_derivations # <<<<<<<<<<<<<< @@ -11099,7 +11132,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator */ delete __pyx_cur_scope->__pyx_v_f_derivations; - /* "cdec/hypergraph.pxi":79 + /* "hypergraph.pxi":79 * finally: * del f_derivations * del e_derivations # <<<<<<<<<<<<<< @@ -11125,7 +11158,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":62 + /* "hypergraph.pxi":62 * del derivations * * def kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -11149,7 +11182,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_17generator5(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":81 +/* "hypergraph.pxi":81 * del e_derivations * * def kbest_features(self, size): # <<<<<<<<<<<<<< @@ -11240,7 +11273,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":83 + /* "hypergraph.pxi":83 * def kbest_features(self, size): * """hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph.""" * cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal, kbest.NoFilter[FastSparseVector[double]]]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal, kbest.NoFilter[FastSparseVector[double]]](self.hg[0], size) # <<<<<<<<<<<<<< @@ -11250,7 +11283,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<FastSparseVector<weight_t> ,FeatureVectorTraversal,KBest::NoFilter<FastSparseVector<double> > > ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":87 + /* "hypergraph.pxi":87 * cdef SparseVector fmap * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -11259,7 +11292,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":88 + /* "hypergraph.pxi":88 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -11270,7 +11303,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":89 + /* "hypergraph.pxi":89 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -11279,7 +11312,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":90 + /* "hypergraph.pxi":90 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -11291,7 +11324,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":91 + /* "hypergraph.pxi":91 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -11306,7 +11339,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/hypergraph.pxi":92 + /* "hypergraph.pxi":92 * if not derivation: break * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) # <<<<<<<<<<<<<< @@ -11315,7 +11348,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator */ __pyx_cur_scope->__pyx_v_fmap->vector = new FastSparseVector<weight_t> (__pyx_cur_scope->__pyx_v_derivation->yield); - /* "cdec/hypergraph.pxi":93 + /* "hypergraph.pxi":93 * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) * yield fmap # <<<<<<<<<<<<<< @@ -11339,7 +11372,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":95 + /* "hypergraph.pxi":95 * yield fmap * finally: * del derivations # <<<<<<<<<<<<<< @@ -11384,7 +11417,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":81 + /* "hypergraph.pxi":81 * del e_derivations * * def kbest_features(self, size): # <<<<<<<<<<<<<< @@ -11407,7 +11440,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_20generator6(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":97 +/* "hypergraph.pxi":97 * del derivations * * def unique_kbest(self, size): # <<<<<<<<<<<<<< @@ -11499,7 +11532,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":99 + /* "hypergraph.pxi":99 * def unique_kbest(self, size): * """hg.kbest(size) -> List of unique k-best hypotheses in the hypergraph.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.FilterUnique]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.FilterUnique](self.hg[0], size) # <<<<<<<<<<<<<< @@ -11509,7 +11542,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<std::vector<WordID> ,ESentenceTraversal,KBest::FilterUnique> ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":102 + /* "hypergraph.pxi":102 * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal, kbest.FilterUnique].Derivation* derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -11518,7 +11551,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":103 + /* "hypergraph.pxi":103 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -11529,7 +11562,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":104 + /* "hypergraph.pxi":104 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -11538,7 +11571,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":105 + /* "hypergraph.pxi":105 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -11550,7 +11583,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":106 + /* "hypergraph.pxi":106 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * yield unicode(GetString(derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -11587,7 +11620,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":108 + /* "hypergraph.pxi":108 * yield unicode(GetString(derivation._yield).c_str(), 'utf8') * finally: * del derivations # <<<<<<<<<<<<<< @@ -11633,7 +11666,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":97 + /* "hypergraph.pxi":97 * del derivations * * def unique_kbest(self, size): # <<<<<<<<<<<<<< @@ -11657,7 +11690,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_23generator7(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":110 +/* "hypergraph.pxi":110 * del derivations * * def unique_kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -11750,7 +11783,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":112 + /* "hypergraph.pxi":112 * def unique_kbest_trees(self, size): * """hg.kbest_trees(size) -> List of unique k-best trees in the hypergraph.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.FilterUnique]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.FilterUnique](self.hg[0], size) # <<<<<<<<<<<<<< @@ -11760,7 +11793,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_f_derivations = new KBest::KBestDerivations<std::vector<WordID> ,FTreeTraversal,KBest::FilterUnique> ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":114 + /* "hypergraph.pxi":114 * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.FilterUnique]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.FilterUnique](self.hg[0], size) * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal, kbest.FilterUnique].Derivation* f_derivation * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.FilterUnique]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.FilterUnique](self.hg[0], size) # <<<<<<<<<<<<<< @@ -11770,7 +11803,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_e_derivations = new KBest::KBestDerivations<std::vector<WordID> ,ETreeTraversal,KBest::FilterUnique> ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":117 + /* "hypergraph.pxi":117 * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal, kbest.FilterUnique].Derivation* e_derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -11779,7 +11812,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":118 + /* "hypergraph.pxi":118 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -11790,7 +11823,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":119 + /* "hypergraph.pxi":119 * try: * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -11799,7 +11832,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator */ __pyx_cur_scope->__pyx_v_f_derivation = __pyx_cur_scope->__pyx_v_f_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":120 + /* "hypergraph.pxi":120 * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -11808,7 +11841,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator */ __pyx_cur_scope->__pyx_v_e_derivation = __pyx_cur_scope->__pyx_v_e_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":121 + /* "hypergraph.pxi":121 * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break # <<<<<<<<<<<<<< @@ -11817,12 +11850,10 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator */ __pyx_t_4 = ((!(__pyx_cur_scope->__pyx_v_f_derivation != 0)) != 0); if (!__pyx_t_4) { - goto __pyx_L11_next_or; } else { __pyx_t_3 = __pyx_t_4; goto __pyx_L10_bool_binop_done; } - __pyx_L11_next_or:; __pyx_t_4 = ((!(__pyx_cur_scope->__pyx_v_e_derivation != 0)) != 0); __pyx_t_3 = __pyx_t_4; __pyx_L10_bool_binop_done:; @@ -11830,7 +11861,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":122 + /* "hypergraph.pxi":122 * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -11855,7 +11886,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - /* "cdec/hypergraph.pxi":123 + /* "hypergraph.pxi":123 * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -11880,7 +11911,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - /* "cdec/hypergraph.pxi":124 + /* "hypergraph.pxi":124 * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') * yield (f_tree, e_tree) # <<<<<<<<<<<<<< @@ -11912,7 +11943,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":126 + /* "hypergraph.pxi":126 * yield (f_tree, e_tree) * finally: * del f_derivations # <<<<<<<<<<<<<< @@ -11923,7 +11954,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator /*normal exit:*/{ delete __pyx_cur_scope->__pyx_v_f_derivations; - /* "cdec/hypergraph.pxi":127 + /* "hypergraph.pxi":127 * finally: * del f_derivations * del e_derivations # <<<<<<<<<<<<<< @@ -11949,7 +11980,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_t_7 = __pyx_lineno; __pyx_t_8 = __pyx_clineno; __pyx_t_9 = __pyx_filename; { - /* "cdec/hypergraph.pxi":126 + /* "hypergraph.pxi":126 * yield (f_tree, e_tree) * finally: * del f_derivations # <<<<<<<<<<<<<< @@ -11958,7 +11989,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator */ delete __pyx_cur_scope->__pyx_v_f_derivations; - /* "cdec/hypergraph.pxi":127 + /* "hypergraph.pxi":127 * finally: * del f_derivations * del e_derivations # <<<<<<<<<<<<<< @@ -11984,7 +12015,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":110 + /* "hypergraph.pxi":110 * del derivations * * def unique_kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -12008,7 +12039,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_26generator8(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":129 +/* "hypergraph.pxi":129 * del e_derivations * * def unique_kbest_features(self, size): # <<<<<<<<<<<<<< @@ -12099,7 +12130,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":131 + /* "hypergraph.pxi":131 * def unique_kbest_features(self, size): * """hg.kbest_trees(size) -> List of unique k-best feature vectors in the hypergraph.""" * cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal, kbest.NoFilter[FastSparseVector[double]]]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal, kbest.NoFilter[FastSparseVector[double]]](self.hg[0], size) # <<<<<<<<<<<<<< @@ -12109,7 +12140,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator __pyx_t_1 = __Pyx_PyInt_As_unsigned_int(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations<FastSparseVector<weight_t> ,FeatureVectorTraversal,KBest::NoFilter<FastSparseVector<double> > > ((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "cdec/hypergraph.pxi":135 + /* "hypergraph.pxi":135 * cdef SparseVector fmap * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -12118,7 +12149,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator */ /*try:*/ { - /* "cdec/hypergraph.pxi":136 + /* "hypergraph.pxi":136 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< @@ -12129,7 +12160,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "cdec/hypergraph.pxi":137 + /* "hypergraph.pxi":137 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -12138,7 +12169,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "cdec/hypergraph.pxi":138 + /* "hypergraph.pxi":138 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -12150,7 +12181,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator goto __pyx_L8_break; } - /* "cdec/hypergraph.pxi":139 + /* "hypergraph.pxi":139 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -12165,7 +12196,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/hypergraph.pxi":140 + /* "hypergraph.pxi":140 * if not derivation: break * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) # <<<<<<<<<<<<<< @@ -12174,7 +12205,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator */ __pyx_cur_scope->__pyx_v_fmap->vector = new FastSparseVector<weight_t> (__pyx_cur_scope->__pyx_v_derivation->yield); - /* "cdec/hypergraph.pxi":141 + /* "hypergraph.pxi":141 * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) * yield fmap # <<<<<<<<<<<<<< @@ -12198,7 +12229,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator __pyx_L8_break:; } - /* "cdec/hypergraph.pxi":143 + /* "hypergraph.pxi":143 * yield fmap * finally: * del derivations # <<<<<<<<<<<<<< @@ -12243,7 +12274,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator __pyx_L6:; } - /* "cdec/hypergraph.pxi":129 + /* "hypergraph.pxi":129 * del e_derivations * * def unique_kbest_features(self, size): # <<<<<<<<<<<<<< @@ -12266,7 +12297,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_29generator9(__pyx_Generator } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":145 +/* "hypergraph.pxi":145 * del derivations * * def sample(self, unsigned n): # <<<<<<<<<<<<<< @@ -12369,7 +12400,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":147 + /* "hypergraph.pxi":147 * def sample(self, unsigned n): * """hg.sample(n) -> Sample of n hypotheses from the hypergraph.""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() # <<<<<<<<<<<<<< @@ -12384,7 +12415,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato } __pyx_cur_scope->__pyx_v_hypos = __pyx_t_1; - /* "cdec/hypergraph.pxi":148 + /* "hypergraph.pxi":148 * """hg.sample(n) -> Sample of n hypotheses from the hypergraph.""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) # <<<<<<<<<<<<<< @@ -12393,7 +12424,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato */ HypergraphSampler::sample_hypotheses((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_4cdec_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_hypos); - /* "cdec/hypergraph.pxi":150 + /* "hypergraph.pxi":150 * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -12402,7 +12433,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato */ /*try:*/ { - /* "cdec/hypergraph.pxi":151 + /* "hypergraph.pxi":151 * cdef unsigned k * try: * for k in range(hypos.size()): # <<<<<<<<<<<<<< @@ -12413,7 +12444,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_3; - /* "cdec/hypergraph.pxi":152 + /* "hypergraph.pxi":152 * try: * for k in range(hypos.size()): * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -12449,7 +12480,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato } } - /* "cdec/hypergraph.pxi":154 + /* "hypergraph.pxi":154 * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8') * finally: * del hypos # <<<<<<<<<<<<<< @@ -12495,7 +12526,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato __pyx_L6:; } - /* "cdec/hypergraph.pxi":145 + /* "hypergraph.pxi":145 * del derivations * * def sample(self, unsigned n): # <<<<<<<<<<<<<< @@ -12519,7 +12550,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_32generator10(__pyx_Generato } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":156 +/* "hypergraph.pxi":156 * del hypos * * def sample_hypotheses(self, unsigned n): # <<<<<<<<<<<<<< @@ -12623,7 +12654,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":159 + /* "hypergraph.pxi":159 * """hg.sample_string(n) -> Sample of n hypotheses from the hypergraph. * Generates (sample_string, dot, fmap)""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() # <<<<<<<<<<<<<< @@ -12638,7 +12669,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato } __pyx_cur_scope->__pyx_v_hypos = __pyx_t_1; - /* "cdec/hypergraph.pxi":160 + /* "hypergraph.pxi":160 * Generates (sample_string, dot, fmap)""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() * cdef SparseVector fmap = None # <<<<<<<<<<<<<< @@ -12649,7 +12680,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato __Pyx_GIVEREF(Py_None); __pyx_cur_scope->__pyx_v_fmap = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)Py_None); - /* "cdec/hypergraph.pxi":161 + /* "hypergraph.pxi":161 * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() * cdef SparseVector fmap = None * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) # <<<<<<<<<<<<<< @@ -12658,7 +12689,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato */ HypergraphSampler::sample_hypotheses((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_4cdec_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_hypos); - /* "cdec/hypergraph.pxi":163 + /* "hypergraph.pxi":163 * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -12667,7 +12698,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato */ /*try:*/ { - /* "cdec/hypergraph.pxi":164 + /* "hypergraph.pxi":164 * cdef unsigned k * try: * for k in range(hypos.size()): # <<<<<<<<<<<<<< @@ -12678,7 +12709,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_3; - /* "cdec/hypergraph.pxi":165 + /* "hypergraph.pxi":165 * try: * for k in range(hypos.size()): * fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -12693,7 +12724,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/hypergraph.pxi":166 + /* "hypergraph.pxi":166 * for k in range(hypos.size()): * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](hypos[0][k].fmap) # <<<<<<<<<<<<<< @@ -12702,7 +12733,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato */ __pyx_cur_scope->__pyx_v_fmap->vector = new FastSparseVector<weight_t> (((__pyx_cur_scope->__pyx_v_hypos[0])[__pyx_cur_scope->__pyx_v_k]).fmap); - /* "cdec/hypergraph.pxi":167 + /* "hypergraph.pxi":167 * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](hypos[0][k].fmap) * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8'), hypos[0][k].model_score.as_float(), fmap # <<<<<<<<<<<<<< @@ -12751,7 +12782,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato } } - /* "cdec/hypergraph.pxi":169 + /* "hypergraph.pxi":169 * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8'), hypos[0][k].model_score.as_float(), fmap * finally: * del hypos # <<<<<<<<<<<<<< @@ -12798,7 +12829,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato __pyx_L6:; } - /* "cdec/hypergraph.pxi":156 + /* "hypergraph.pxi":156 * del hypos * * def sample_hypotheses(self, unsigned n): # <<<<<<<<<<<<<< @@ -12823,7 +12854,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_35generator11(__pyx_Generato } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":171 +/* "hypergraph.pxi":171 * del hypos * * def sample_trees(self, unsigned n): # <<<<<<<<<<<<<< @@ -12926,7 +12957,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":173 + /* "hypergraph.pxi":173 * def sample_trees(self, unsigned n): * """hg.sample_trees(n) -> Sample of n trees from the hypergraph.""" * cdef vector[string]* trees = new vector[string]() # <<<<<<<<<<<<<< @@ -12941,7 +12972,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato } __pyx_cur_scope->__pyx_v_trees = __pyx_t_1; - /* "cdec/hypergraph.pxi":174 + /* "hypergraph.pxi":174 * """hg.sample_trees(n) -> Sample of n trees from the hypergraph.""" * cdef vector[string]* trees = new vector[string]() * hypergraph.sample_trees(self.hg[0], n, self._rng(), trees) # <<<<<<<<<<<<<< @@ -12950,7 +12981,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato */ HypergraphSampler::sample_trees((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_4cdec_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_trees); - /* "cdec/hypergraph.pxi":176 + /* "hypergraph.pxi":176 * hypergraph.sample_trees(self.hg[0], n, self._rng(), trees) * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -12959,7 +12990,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato */ /*try:*/ { - /* "cdec/hypergraph.pxi":177 + /* "hypergraph.pxi":177 * cdef unsigned k * try: * for k in range(trees.size()): # <<<<<<<<<<<<<< @@ -12970,7 +13001,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_3; - /* "cdec/hypergraph.pxi":178 + /* "hypergraph.pxi":178 * try: * for k in range(trees.size()): * yield unicode(trees[0][k].c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -13006,7 +13037,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato } } - /* "cdec/hypergraph.pxi":180 + /* "hypergraph.pxi":180 * yield unicode(trees[0][k].c_str(), 'utf8') * finally: * del trees # <<<<<<<<<<<<<< @@ -13052,7 +13083,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato __pyx_L6:; } - /* "cdec/hypergraph.pxi":171 + /* "hypergraph.pxi":171 * del hypos * * def sample_trees(self, unsigned n): # <<<<<<<<<<<<<< @@ -13075,7 +13106,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_38generator12(__pyx_Generato return NULL; } -/* "cdec/hypergraph.pxi":182 +/* "hypergraph.pxi":182 * del trees * * def intersect(self, inp): # <<<<<<<<<<<<<< @@ -13110,7 +13141,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("intersect", 0); - /* "cdec/hypergraph.pxi":185 + /* "hypergraph.pxi":185 * """hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference.""" * cdef Lattice lat * if isinstance(inp, Lattice): # <<<<<<<<<<<<<< @@ -13121,7 +13152,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/hypergraph.pxi":186 + /* "hypergraph.pxi":186 * cdef Lattice lat * if isinstance(inp, Lattice): * lat = <Lattice> inp # <<<<<<<<<<<<<< @@ -13135,7 +13166,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj goto __pyx_L3; } - /* "cdec/hypergraph.pxi":187 + /* "hypergraph.pxi":187 * if isinstance(inp, Lattice): * lat = <Lattice> inp * elif isinstance(inp, basestring): # <<<<<<<<<<<<<< @@ -13146,7 +13177,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "cdec/hypergraph.pxi":188 + /* "hypergraph.pxi":188 * lat = <Lattice> inp * elif isinstance(inp, basestring): * lat = Lattice(inp) # <<<<<<<<<<<<<< @@ -13167,7 +13198,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj } /*else*/ { - /* "cdec/hypergraph.pxi":190 + /* "hypergraph.pxi":190 * lat = Lattice(inp) * else: * raise TypeError('cannot intersect hypergraph with %s' % type(inp)) # <<<<<<<<<<<<<< @@ -13190,7 +13221,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj } __pyx_L3:; - /* "cdec/hypergraph.pxi":191 + /* "hypergraph.pxi":191 * else: * raise TypeError('cannot intersect hypergraph with %s' % type(inp)) * return hypergraph.Intersect(lat.lattice[0], self.hg) # <<<<<<<<<<<<<< @@ -13204,7 +13235,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj __pyx_t_4 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":182 + /* "hypergraph.pxi":182 * del trees * * def intersect(self, inp): # <<<<<<<<<<<<<< @@ -13225,7 +13256,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_39intersect(struct __pyx_obj return __pyx_r; } -/* "cdec/hypergraph.pxi":193 +/* "hypergraph.pxi":193 * return hypergraph.Intersect(lat.lattice[0], self.hg) * * def prune(self, beam_alpha=0, density=0, **kwargs): # <<<<<<<<<<<<<< @@ -13319,7 +13350,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd int __pyx_clineno = 0; __Pyx_RefNannySetupContext("prune", 0); - /* "cdec/hypergraph.pxi":197 + /* "hypergraph.pxi":197 * beam_alpha: use beam pruning * density: use density pruning""" * cdef hypergraph.EdgeMask* preserve_mask = NULL # <<<<<<<<<<<<<< @@ -13328,7 +13359,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd */ __pyx_v_preserve_mask = NULL; - /* "cdec/hypergraph.pxi":198 + /* "hypergraph.pxi":198 * density: use density pruning""" * cdef hypergraph.EdgeMask* preserve_mask = NULL * if 'csplit_preserve_full_word' in kwargs: # <<<<<<<<<<<<<< @@ -13339,7 +13370,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/hypergraph.pxi":199 + /* "hypergraph.pxi":199 * cdef hypergraph.EdgeMask* preserve_mask = NULL * if 'csplit_preserve_full_word' in kwargs: * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) # <<<<<<<<<<<<<< @@ -13348,7 +13379,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd */ __pyx_v_preserve_mask = new std::vector<bool>(__pyx_v_self->hg->edges_.size()); - /* "cdec/hypergraph.pxi":200 + /* "hypergraph.pxi":200 * if 'csplit_preserve_full_word' in kwargs: * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True # <<<<<<<<<<<<<< @@ -13360,7 +13391,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd } __pyx_L3:; - /* "cdec/hypergraph.pxi":201 + /* "hypergraph.pxi":201 * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) # <<<<<<<<<<<<<< @@ -13371,7 +13402,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd __pyx_t_4 = __pyx_PyFloat_AsDouble(__pyx_v_density); if (unlikely((__pyx_t_4 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->hg->PruneInsideOutside(__pyx_t_3, __pyx_t_4, __pyx_v_preserve_mask, 0, 1.0, 0); - /* "cdec/hypergraph.pxi":202 + /* "hypergraph.pxi":202 * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) * if preserve_mask: # <<<<<<<<<<<<<< @@ -13381,7 +13412,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd __pyx_t_2 = (__pyx_v_preserve_mask != 0); if (__pyx_t_2) { - /* "cdec/hypergraph.pxi":203 + /* "hypergraph.pxi":203 * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) * if preserve_mask: * del preserve_mask # <<<<<<<<<<<<<< @@ -13393,7 +13424,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd } __pyx_L4:; - /* "cdec/hypergraph.pxi":193 + /* "hypergraph.pxi":193 * return hypergraph.Intersect(lat.lattice[0], self.hg) * * def prune(self, beam_alpha=0, density=0, **kwargs): # <<<<<<<<<<<<<< @@ -13413,7 +13444,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_41prune(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/hypergraph.pxi":205 +/* "hypergraph.pxi":205 * del preserve_mask * * def lattice(self): # TODO direct hg -> lattice conversion in cdec # <<<<<<<<<<<<<< @@ -13447,7 +13478,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_43lattice(struct __pyx_obj_4 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lattice", 0); - /* "cdec/hypergraph.pxi":207 + /* "hypergraph.pxi":207 * def lattice(self): # TODO direct hg -> lattice conversion in cdec * """hg.lattice() -> Lattice corresponding to the hypergraph.""" * cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str() # <<<<<<<<<<<<<< @@ -13459,7 +13490,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_43lattice(struct __pyx_obj_4 __pyx_v_plf = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":208 + /* "hypergraph.pxi":208 * """hg.lattice() -> Lattice corresponding to the hypergraph.""" * cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str() * return Lattice(eval(plf)) # <<<<<<<<<<<<<< @@ -13503,7 +13534,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_43lattice(struct __pyx_obj_4 __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":205 + /* "hypergraph.pxi":205 * del preserve_mask * * def lattice(self): # TODO direct hg -> lattice conversion in cdec # <<<<<<<<<<<<<< @@ -13525,7 +13556,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_43lattice(struct __pyx_obj_4 return __pyx_r; } -/* "cdec/hypergraph.pxi":210 +/* "hypergraph.pxi":210 * return Lattice(eval(plf)) * * def plf(self): # <<<<<<<<<<<<<< @@ -13557,7 +13588,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_45plf(struct __pyx_obj_4cdec int __pyx_clineno = 0; __Pyx_RefNannySetupContext("plf", 0); - /* "cdec/hypergraph.pxi":212 + /* "hypergraph.pxi":212 * def plf(self): * """hg.plf() -> Lattice PLF representation corresponding to the hypergraph.""" * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) # <<<<<<<<<<<<<< @@ -13579,7 +13610,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_45plf(struct __pyx_obj_4cdec __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":210 + /* "hypergraph.pxi":210 * return Lattice(eval(plf)) * * def plf(self): # <<<<<<<<<<<<<< @@ -13599,7 +13630,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_45plf(struct __pyx_obj_4cdec return __pyx_r; } -/* "cdec/hypergraph.pxi":214 +/* "hypergraph.pxi":214 * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) * * def reweight(self, weights): # <<<<<<<<<<<<<< @@ -13633,7 +13664,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("reweight", 0); - /* "cdec/hypergraph.pxi":216 + /* "hypergraph.pxi":216 * def reweight(self, weights): * """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector.""" * if isinstance(weights, SparseVector): # <<<<<<<<<<<<<< @@ -13644,7 +13675,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/hypergraph.pxi":217 + /* "hypergraph.pxi":217 * """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector.""" * if isinstance(weights, SparseVector): * self.hg.Reweight((<SparseVector> weights).vector[0]) # <<<<<<<<<<<<<< @@ -13655,7 +13686,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ goto __pyx_L3; } - /* "cdec/hypergraph.pxi":218 + /* "hypergraph.pxi":218 * if isinstance(weights, SparseVector): * self.hg.Reweight((<SparseVector> weights).vector[0]) * elif isinstance(weights, DenseVector): # <<<<<<<<<<<<<< @@ -13666,7 +13697,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "cdec/hypergraph.pxi":219 + /* "hypergraph.pxi":219 * self.hg.Reweight((<SparseVector> weights).vector[0]) * elif isinstance(weights, DenseVector): * self.hg.Reweight((<DenseVector> weights).vector[0]) # <<<<<<<<<<<<<< @@ -13678,7 +13709,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ } /*else*/ { - /* "cdec/hypergraph.pxi":221 + /* "hypergraph.pxi":221 * self.hg.Reweight((<DenseVector> weights).vector[0]) * else: * raise TypeError('cannot reweight hypergraph with %s' % type(weights)) # <<<<<<<<<<<<<< @@ -13701,7 +13732,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ } __pyx_L3:; - /* "cdec/hypergraph.pxi":214 + /* "hypergraph.pxi":214 * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) * * def reweight(self, weights): # <<<<<<<<<<<<<< @@ -13724,7 +13755,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_47reweight(struct __pyx_obj_ } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5edges_2generator13(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":224 +/* "hypergraph.pxi":224 * * property edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -13802,7 +13833,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5edges_2generator13(__pyx_Ge __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":226 + /* "hypergraph.pxi":226 * def __get__(self): * cdef unsigned i * for i in range(self.hg.edges_.size()): # <<<<<<<<<<<<<< @@ -13813,7 +13844,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5edges_2generator13(__pyx_Ge for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/hypergraph.pxi":227 + /* "hypergraph.pxi":227 * cdef unsigned i * for i in range(self.hg.edges_.size()): * yield HypergraphEdge().init(self.hg, i) # <<<<<<<<<<<<<< @@ -13840,7 +13871,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5edges_2generator13(__pyx_Ge if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/hypergraph.pxi":224 + /* "hypergraph.pxi":224 * * property edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -13864,7 +13895,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5edges_2generator13(__pyx_Ge } static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5nodes_2generator14(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":230 +/* "hypergraph.pxi":230 * * property nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -13942,7 +13973,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5nodes_2generator14(__pyx_Ge __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":232 + /* "hypergraph.pxi":232 * def __get__(self): * cdef unsigned i * for i in range(self.hg.nodes_.size()): # <<<<<<<<<<<<<< @@ -13953,7 +13984,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5nodes_2generator14(__pyx_Ge for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/hypergraph.pxi":233 + /* "hypergraph.pxi":233 * cdef unsigned i * for i in range(self.hg.nodes_.size()): * yield HypergraphNode().init(self.hg, i) # <<<<<<<<<<<<<< @@ -13980,7 +14011,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5nodes_2generator14(__pyx_Ge if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 233; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/hypergraph.pxi":230 + /* "hypergraph.pxi":230 * * property nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -14003,7 +14034,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_10Hypergraph_5nodes_2generator14(__pyx_Ge return NULL; } -/* "cdec/hypergraph.pxi":236 +/* "hypergraph.pxi":236 * * property goal: * def __get__(self): # <<<<<<<<<<<<<< @@ -14034,7 +14065,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4goal___get__(struct __pyx_o int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":237 + /* "hypergraph.pxi":237 * property goal: * def __get__(self): * return HypergraphNode().init(self.hg, self.hg.GoalNode()) # <<<<<<<<<<<<<< @@ -14051,7 +14082,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4goal___get__(struct __pyx_o __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":236 + /* "hypergraph.pxi":236 * * property goal: * def __get__(self): # <<<<<<<<<<<<<< @@ -14071,7 +14102,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_4goal___get__(struct __pyx_o return __pyx_r; } -/* "cdec/hypergraph.pxi":240 +/* "hypergraph.pxi":240 * * property npaths: * def __get__(self): # <<<<<<<<<<<<<< @@ -14101,7 +14132,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6npaths___get__(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":241 + /* "hypergraph.pxi":241 * property npaths: * def __get__(self): * return self.hg.NumberOfPaths() # <<<<<<<<<<<<<< @@ -14115,7 +14146,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6npaths___get__(struct __pyx __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":240 + /* "hypergraph.pxi":240 * * property npaths: * def __get__(self): # <<<<<<<<<<<<<< @@ -14134,7 +14165,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_6npaths___get__(struct __pyx return __pyx_r; } -/* "cdec/hypergraph.pxi":243 +/* "hypergraph.pxi":243 * return self.hg.NumberOfPaths() * * def inside_outside(self): # <<<<<<<<<<<<<< @@ -14172,7 +14203,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("inside_outside", 0); - /* "cdec/hypergraph.pxi":245 + /* "hypergraph.pxi":245 * def inside_outside(self): * """hg.inside_outside() -> SparseVector with inside-outside scores for each feature.""" * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() # <<<<<<<<<<<<<< @@ -14181,7 +14212,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ __pyx_v_result = new FastSparseVector<prob_t> (); - /* "cdec/hypergraph.pxi":246 + /* "hypergraph.pxi":246 * """hg.inside_outside() -> SparseVector with inside-outside scores for each feature.""" * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) # <<<<<<<<<<<<<< @@ -14190,7 +14221,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ __pyx_v_z = InsideOutside<prob_t, EdgeProb, SparseVector<prob_t>, EdgeFeaturesAndProbWeightFunction>((__pyx_v_self->hg[0]), __pyx_v_result); - /* "cdec/hypergraph.pxi":247 + /* "hypergraph.pxi":247 * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) * result[0] /= z # <<<<<<<<<<<<<< @@ -14199,7 +14230,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ (__pyx_v_result[0]) /= __pyx_v_z; - /* "cdec/hypergraph.pxi":248 + /* "hypergraph.pxi":248 * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) * result[0] /= z * cdef SparseVector vector = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -14212,7 +14243,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py __pyx_v_vector = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":249 + /* "hypergraph.pxi":249 * result[0] /= z * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double]() # <<<<<<<<<<<<<< @@ -14221,7 +14252,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ __pyx_v_vector->vector = new FastSparseVector<double> (); - /* "cdec/hypergraph.pxi":250 + /* "hypergraph.pxi":250 * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double]() * cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False) # <<<<<<<<<<<<<< @@ -14230,7 +14261,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ __pyx_v_it = new FastSparseVector<prob_t> ::const_iterator((__pyx_v_result[0]), 0); - /* "cdec/hypergraph.pxi":252 + /* "hypergraph.pxi":252 * cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False) * cdef unsigned i * for i in range(result.size()): # <<<<<<<<<<<<<< @@ -14241,7 +14272,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/hypergraph.pxi":253 + /* "hypergraph.pxi":253 * cdef unsigned i * for i in range(result.size()): * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) # <<<<<<<<<<<<<< @@ -14250,7 +14281,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ __pyx_v_vector->vector->set_value((__pyx_v_it[0]).operator->()->first, log((__pyx_v_it[0]).operator->()->second)); - /* "cdec/hypergraph.pxi":254 + /* "hypergraph.pxi":254 * for i in range(result.size()): * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) * pinc(it[0]) # ++it # <<<<<<<<<<<<<< @@ -14260,7 +14291,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py (++(__pyx_v_it[0])); } - /* "cdec/hypergraph.pxi":255 + /* "hypergraph.pxi":255 * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) * pinc(it[0]) # ++it * del it # <<<<<<<<<<<<<< @@ -14269,7 +14300,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ delete __pyx_v_it; - /* "cdec/hypergraph.pxi":256 + /* "hypergraph.pxi":256 * pinc(it[0]) # ++it * del it * del result # <<<<<<<<<<<<<< @@ -14278,7 +14309,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py */ delete __pyx_v_result; - /* "cdec/hypergraph.pxi":257 + /* "hypergraph.pxi":257 * del it * del result * return vector # <<<<<<<<<<<<<< @@ -14290,7 +14321,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py __pyx_r = ((PyObject *)__pyx_v_vector); goto __pyx_L0; - /* "cdec/hypergraph.pxi":243 + /* "hypergraph.pxi":243 * return self.hg.NumberOfPaths() * * def inside_outside(self): # <<<<<<<<<<<<<< @@ -14310,7 +14341,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_10Hypergraph_49inside_outside(struct __py return __pyx_r; } -/* "cdec/hypergraph.pxi":264 +/* "hypergraph.pxi":264 * cdef public TRule trule * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -14327,7 +14358,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd int __pyx_clineno = 0; __Pyx_RefNannySetupContext("init", 0); - /* "cdec/hypergraph.pxi":265 + /* "hypergraph.pxi":265 * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg # <<<<<<<<<<<<<< @@ -14336,7 +14367,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd */ __pyx_v_self->hg = __pyx_v_hg; - /* "cdec/hypergraph.pxi":266 + /* "hypergraph.pxi":266 * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg * self.edge = &hg.edges_[i] # <<<<<<<<<<<<<< @@ -14345,7 +14376,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd */ __pyx_v_self->edge = (&(__pyx_v_hg->edges_[__pyx_v_i])); - /* "cdec/hypergraph.pxi":267 + /* "hypergraph.pxi":267 * self.hg = hg * self.edge = &hg.edges_[i] * self.trule = TRule.__new__(TRule) # <<<<<<<<<<<<<< @@ -14361,7 +14392,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd __pyx_v_self->trule = ((struct __pyx_obj_4cdec_5_cdec_TRule *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":268 + /* "hypergraph.pxi":268 * self.edge = &hg.edges_[i] * self.trule = TRule.__new__(TRule) * self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_) # <<<<<<<<<<<<<< @@ -14370,7 +14401,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd */ __pyx_v_self->trule->rule = new boost::shared_ptr<TRule> (__pyx_v_self->edge->rule_); - /* "cdec/hypergraph.pxi":269 + /* "hypergraph.pxi":269 * self.trule = TRule.__new__(TRule) * self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_) * return self # <<<<<<<<<<<<<< @@ -14382,7 +14413,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/hypergraph.pxi":264 + /* "hypergraph.pxi":264 * cdef public TRule trule * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -14401,7 +14432,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphEdge_init(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/hypergraph.pxi":271 +/* "hypergraph.pxi":271 * return self * * def __len__(self): # <<<<<<<<<<<<<< @@ -14427,7 +14458,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_14HypergraphEdge___len__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/hypergraph.pxi":272 + /* "hypergraph.pxi":272 * * def __len__(self): * return self.edge.tail_nodes_.size() # <<<<<<<<<<<<<< @@ -14437,7 +14468,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_14HypergraphEdge___len__(struct __pyx_ob __pyx_r = __pyx_v_self->edge->tail_nodes_.size(); goto __pyx_L0; - /* "cdec/hypergraph.pxi":271 + /* "hypergraph.pxi":271 * return self * * def __len__(self): # <<<<<<<<<<<<<< @@ -14451,7 +14482,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_14HypergraphEdge___len__(struct __pyx_ob return __pyx_r; } -/* "cdec/hypergraph.pxi":275 +/* "hypergraph.pxi":275 * * property head_node: * def __get__(self): # <<<<<<<<<<<<<< @@ -14482,7 +14513,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_9head_node___get__(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":276 + /* "hypergraph.pxi":276 * property head_node: * def __get__(self): * return HypergraphNode().init(self.hg, self.edge.head_node_) # <<<<<<<<<<<<<< @@ -14499,7 +14530,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_9head_node___get__(struc __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":275 + /* "hypergraph.pxi":275 * * property head_node: * def __get__(self): # <<<<<<<<<<<<<< @@ -14520,7 +14551,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_9head_node___get__(struc } static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphEdge_10tail_nodes_2generator15(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":279 +/* "hypergraph.pxi":279 * * property tail_nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -14598,7 +14629,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphEdge_10tail_nodes_2generator1 __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":281 + /* "hypergraph.pxi":281 * def __get__(self): * cdef unsigned i * for i in range(self.edge.tail_nodes_.size()): # <<<<<<<<<<<<<< @@ -14609,7 +14640,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphEdge_10tail_nodes_2generator1 for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/hypergraph.pxi":282 + /* "hypergraph.pxi":282 * cdef unsigned i * for i in range(self.edge.tail_nodes_.size()): * yield HypergraphNode().init(self.hg, self.edge.tail_nodes_[i]) # <<<<<<<<<<<<<< @@ -14636,7 +14667,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphEdge_10tail_nodes_2generator1 if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/hypergraph.pxi":279 + /* "hypergraph.pxi":279 * * property tail_nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -14659,7 +14690,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphEdge_10tail_nodes_2generator1 return NULL; } -/* "cdec/hypergraph.pxi":285 +/* "hypergraph.pxi":285 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -14691,7 +14722,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4span___get__(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":286 + /* "hypergraph.pxi":286 * property span: * def __get__(self): * return (self.edge.i_, self.edge.j_) # <<<<<<<<<<<<<< @@ -14715,7 +14746,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4span___get__(struct __p __pyx_t_3 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":285 + /* "hypergraph.pxi":285 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -14736,7 +14767,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4span___get__(struct __p return __pyx_r; } -/* "cdec/hypergraph.pxi":289 +/* "hypergraph.pxi":289 * * property src_span: * def __get__(self): # <<<<<<<<<<<<<< @@ -14768,7 +14799,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_8src_span___get__(struct int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":290 + /* "hypergraph.pxi":290 * property src_span: * def __get__(self): * return (self.edge.prev_i_, self.edge.prev_j_) # <<<<<<<<<<<<<< @@ -14792,7 +14823,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_8src_span___get__(struct __pyx_t_3 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":289 + /* "hypergraph.pxi":289 * * property src_span: * def __get__(self): # <<<<<<<<<<<<<< @@ -14813,7 +14844,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_8src_span___get__(struct return __pyx_r; } -/* "cdec/hypergraph.pxi":293 +/* "hypergraph.pxi":293 * * property feature_values: * def __get__(self): # <<<<<<<<<<<<<< @@ -14844,7 +14875,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_14feature_values___get__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":294 + /* "hypergraph.pxi":294 * property feature_values: * def __get__(self): * cdef SparseVector vector = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -14857,7 +14888,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_14feature_values___get__ __pyx_v_vector = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/hypergraph.pxi":295 + /* "hypergraph.pxi":295 * def __get__(self): * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double](self.edge.feature_values_) # <<<<<<<<<<<<<< @@ -14866,7 +14897,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_14feature_values___get__ */ __pyx_v_vector->vector = new FastSparseVector<double> (__pyx_v_self->edge->feature_values_); - /* "cdec/hypergraph.pxi":296 + /* "hypergraph.pxi":296 * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double](self.edge.feature_values_) * return vector # <<<<<<<<<<<<<< @@ -14878,7 +14909,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_14feature_values___get__ __pyx_r = ((PyObject *)__pyx_v_vector); goto __pyx_L0; - /* "cdec/hypergraph.pxi":293 + /* "hypergraph.pxi":293 * * property feature_values: * def __get__(self): # <<<<<<<<<<<<<< @@ -14898,7 +14929,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_14feature_values___get__ return __pyx_r; } -/* "cdec/hypergraph.pxi":299 +/* "hypergraph.pxi":299 * * property prob: * def __get__(self): # <<<<<<<<<<<<<< @@ -14928,7 +14959,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4prob___get__(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":300 + /* "hypergraph.pxi":300 * property prob: * def __get__(self): * return self.edge.edge_prob_.as_float() # <<<<<<<<<<<<<< @@ -14942,7 +14973,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4prob___get__(struct __p __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":299 + /* "hypergraph.pxi":299 * * property prob: * def __get__(self): # <<<<<<<<<<<<<< @@ -14961,7 +14992,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_4prob___get__(struct __p return __pyx_r; } -/* "cdec/hypergraph.pxi":302 +/* "hypergraph.pxi":302 * return self.edge.edge_prob_.as_float() * * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): # <<<<<<<<<<<<<< @@ -15001,7 +15032,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__richcmp__", 0); - /* "cdec/hypergraph.pxi":305 + /* "hypergraph.pxi":305 * if op == 2: # == * return x.edge == y.edge * elif op == 3: # != # <<<<<<<<<<<<<< @@ -15010,7 +15041,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py */ switch (__pyx_v_op) { - /* "cdec/hypergraph.pxi":303 + /* "hypergraph.pxi":303 * * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): * if op == 2: # == # <<<<<<<<<<<<<< @@ -15019,7 +15050,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py */ case 2: - /* "cdec/hypergraph.pxi":304 + /* "hypergraph.pxi":304 * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): * if op == 2: # == * return x.edge == y.edge # <<<<<<<<<<<<<< @@ -15034,7 +15065,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py goto __pyx_L0; break; - /* "cdec/hypergraph.pxi":305 + /* "hypergraph.pxi":305 * if op == 2: # == * return x.edge == y.edge * elif op == 3: # != # <<<<<<<<<<<<<< @@ -15043,7 +15074,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py */ case 3: - /* "cdec/hypergraph.pxi":306 + /* "hypergraph.pxi":306 * return x.edge == y.edge * elif op == 3: # != * return not (x == y) # <<<<<<<<<<<<<< @@ -15063,7 +15094,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py default: break; } - /* "cdec/hypergraph.pxi":307 + /* "hypergraph.pxi":307 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphEdge') # <<<<<<<<<<<<<< @@ -15076,7 +15107,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; {__pyx_filename = __pyx_f[3]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":302 + /* "hypergraph.pxi":302 * return self.edge.edge_prob_.as_float() * * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): # <<<<<<<<<<<<<< @@ -15095,7 +15126,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphEdge_2__richcmp__(struct __py return __pyx_r; } -/* "cdec/hypergraph.pxi":262 +/* "hypergraph.pxi":262 * cdef hypergraph.Hypergraph* hg * cdef hypergraph.HypergraphEdge* edge * cdef public TRule trule # <<<<<<<<<<<<<< @@ -15203,7 +15234,7 @@ static int __pyx_pf_4cdec_5_cdec_14HypergraphEdge_5trule_4__del__(struct __pyx_o return __pyx_r; } -/* "cdec/hypergraph.pxi":313 +/* "hypergraph.pxi":313 * cdef hypergraph.HypergraphNode* node * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -15216,7 +15247,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphNode_init(struct __pyx_obj_4cd __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("init", 0); - /* "cdec/hypergraph.pxi":314 + /* "hypergraph.pxi":314 * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg # <<<<<<<<<<<<<< @@ -15225,7 +15256,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphNode_init(struct __pyx_obj_4cd */ __pyx_v_self->hg = __pyx_v_hg; - /* "cdec/hypergraph.pxi":315 + /* "hypergraph.pxi":315 * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg * self.node = &hg.nodes_[i] # <<<<<<<<<<<<<< @@ -15234,7 +15265,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphNode_init(struct __pyx_obj_4cd */ __pyx_v_self->node = (&(__pyx_v_hg->nodes_[__pyx_v_i])); - /* "cdec/hypergraph.pxi":316 + /* "hypergraph.pxi":316 * self.hg = hg * self.node = &hg.nodes_[i] * return self # <<<<<<<<<<<<<< @@ -15246,7 +15277,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphNode_init(struct __pyx_obj_4cd __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/hypergraph.pxi":313 + /* "hypergraph.pxi":313 * cdef hypergraph.HypergraphNode* node * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -15261,7 +15292,7 @@ static PyObject *__pyx_f_4cdec_5_cdec_14HypergraphNode_init(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/hypergraph.pxi":319 +/* "hypergraph.pxi":319 * * property id: * def __get__(self): # <<<<<<<<<<<<<< @@ -15291,7 +15322,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_2id___get__(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":320 + /* "hypergraph.pxi":320 * property id: * def __get__(self): * return self.node.id_ # <<<<<<<<<<<<<< @@ -15305,7 +15336,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_2id___get__(struct __pyx __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":319 + /* "hypergraph.pxi":319 * * property id: * def __get__(self): # <<<<<<<<<<<<<< @@ -15325,7 +15356,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_2id___get__(struct __pyx } static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_8in_edges_2generator16(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":323 +/* "hypergraph.pxi":323 * * property in_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -15403,7 +15434,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_8in_edges_2generator16(_ __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":325 + /* "hypergraph.pxi":325 * def __get__(self): * cdef unsigned i * for i in range(self.node.in_edges_.size()): # <<<<<<<<<<<<<< @@ -15414,7 +15445,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_8in_edges_2generator16(_ for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/hypergraph.pxi":326 + /* "hypergraph.pxi":326 * cdef unsigned i * for i in range(self.node.in_edges_.size()): * yield HypergraphEdge().init(self.hg, self.node.in_edges_[i]) # <<<<<<<<<<<<<< @@ -15441,7 +15472,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_8in_edges_2generator16(_ if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/hypergraph.pxi":323 + /* "hypergraph.pxi":323 * * property in_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -15465,7 +15496,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_8in_edges_2generator16(_ } static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_9out_edges_2generator17(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/hypergraph.pxi":329 +/* "hypergraph.pxi":329 * * property out_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -15543,7 +15574,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_9out_edges_2generator17( __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":331 + /* "hypergraph.pxi":331 * def __get__(self): * cdef unsigned i * for i in range(self.node.out_edges_.size()): # <<<<<<<<<<<<<< @@ -15554,7 +15585,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_9out_edges_2generator17( for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/hypergraph.pxi":332 + /* "hypergraph.pxi":332 * cdef unsigned i * for i in range(self.node.out_edges_.size()): * yield HypergraphEdge().init(self.hg, self.node.out_edges_[i]) # <<<<<<<<<<<<<< @@ -15581,7 +15612,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_9out_edges_2generator17( if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/hypergraph.pxi":329 + /* "hypergraph.pxi":329 * * property out_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -15604,7 +15635,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_14HypergraphNode_9out_edges_2generator17( return NULL; } -/* "cdec/hypergraph.pxi":335 +/* "hypergraph.pxi":335 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -15635,7 +15666,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_4span___get__(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":336 + /* "hypergraph.pxi":336 * property span: * def __get__(self): * return next(self.in_edges).span # <<<<<<<<<<<<<< @@ -15655,7 +15686,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_4span___get__(struct __p __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/hypergraph.pxi":335 + /* "hypergraph.pxi":335 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -15675,7 +15706,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_4span___get__(struct __p return __pyx_r; } -/* "cdec/hypergraph.pxi":339 +/* "hypergraph.pxi":339 * * property cat: * def __get__(self): # <<<<<<<<<<<<<< @@ -15707,7 +15738,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_3cat___get__(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/hypergraph.pxi":340 + /* "hypergraph.pxi":340 * property cat: * def __get__(self): * if self.node.cat_: # <<<<<<<<<<<<<< @@ -15717,7 +15748,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_3cat___get__(struct __py __pyx_t_1 = (__pyx_v_self->node->cat_ != 0); if (__pyx_t_1) { - /* "cdec/hypergraph.pxi":341 + /* "hypergraph.pxi":341 * def __get__(self): * if self.node.cat_: * return str(TDConvert(-self.node.cat_).c_str()) # <<<<<<<<<<<<<< @@ -15740,7 +15771,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_3cat___get__(struct __py goto __pyx_L0; } - /* "cdec/hypergraph.pxi":339 + /* "hypergraph.pxi":339 * * property cat: * def __get__(self): # <<<<<<<<<<<<<< @@ -15762,7 +15793,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode_3cat___get__(struct __py return __pyx_r; } -/* "cdec/hypergraph.pxi":343 +/* "hypergraph.pxi":343 * return str(TDConvert(-self.node.cat_).c_str()) * * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): # <<<<<<<<<<<<<< @@ -15802,7 +15833,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__richcmp__", 0); - /* "cdec/hypergraph.pxi":346 + /* "hypergraph.pxi":346 * if op == 2: # == * return x.node == y.node * elif op == 3: # != # <<<<<<<<<<<<<< @@ -15811,7 +15842,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx */ switch (__pyx_v_op) { - /* "cdec/hypergraph.pxi":344 + /* "hypergraph.pxi":344 * * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): * if op == 2: # == # <<<<<<<<<<<<<< @@ -15820,7 +15851,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx */ case 2: - /* "cdec/hypergraph.pxi":345 + /* "hypergraph.pxi":345 * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): * if op == 2: # == * return x.node == y.node # <<<<<<<<<<<<<< @@ -15835,7 +15866,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx goto __pyx_L0; break; - /* "cdec/hypergraph.pxi":346 + /* "hypergraph.pxi":346 * if op == 2: # == * return x.node == y.node * elif op == 3: # != # <<<<<<<<<<<<<< @@ -15844,7 +15875,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx */ case 3: - /* "cdec/hypergraph.pxi":347 + /* "hypergraph.pxi":347 * return x.node == y.node * elif op == 3: # != * return not (x == y) # <<<<<<<<<<<<<< @@ -15863,7 +15894,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx default: break; } - /* "cdec/hypergraph.pxi":348 + /* "hypergraph.pxi":348 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphNode') # <<<<<<<<<<<<<< @@ -15874,7 +15905,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; {__pyx_filename = __pyx_f[3]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/hypergraph.pxi":343 + /* "hypergraph.pxi":343 * return str(TDConvert(-self.node.cat_).c_str()) * * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): # <<<<<<<<<<<<<< @@ -15893,7 +15924,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_14HypergraphNode___richcmp__(struct __pyx return __pyx_r; } -/* "cdec/lattice.pxi":6 +/* "lattice.pxi":6 * cdef lattice.Lattice* lattice * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -15922,7 +15953,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice___cinit__(struct __pyx_obj_4cdec_5_cde __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "cdec/lattice.pxi":7 + /* "lattice.pxi":7 * * def __cinit__(self): * self.lattice = new lattice.Lattice() # <<<<<<<<<<<<<< @@ -15931,7 +15962,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice___cinit__(struct __pyx_obj_4cdec_5_cde */ __pyx_v_self->lattice = new Lattice(); - /* "cdec/lattice.pxi":6 + /* "lattice.pxi":6 * cdef lattice.Lattice* lattice * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -15945,7 +15976,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice___cinit__(struct __pyx_obj_4cdec_5_cde return __pyx_r; } -/* "cdec/lattice.pxi":9 +/* "lattice.pxi":9 * self.lattice = new lattice.Lattice() * * def __init__(self, inp): # <<<<<<<<<<<<<< @@ -16027,7 +16058,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "cdec/lattice.pxi":12 + /* "lattice.pxi":12 * """Lattice(tuple) -> Lattice from node list. * Lattice(string) -> Lattice from PLF representation.""" * if isinstance(inp, tuple): # <<<<<<<<<<<<<< @@ -16038,7 +16069,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/lattice.pxi":13 + /* "lattice.pxi":13 * Lattice(string) -> Lattice from PLF representation.""" * if isinstance(inp, tuple): * self.lattice.resize(len(inp)) # <<<<<<<<<<<<<< @@ -16048,7 +16079,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde __pyx_t_3 = PyObject_Length(__pyx_v_inp); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->lattice->resize(__pyx_t_3); - /* "cdec/lattice.pxi":14 + /* "lattice.pxi":14 * if isinstance(inp, tuple): * self.lattice.resize(len(inp)) * for i, arcs in enumerate(inp): # <<<<<<<<<<<<<< @@ -16104,7 +16135,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde __pyx_t_4 = __pyx_t_7; __pyx_t_7 = 0; - /* "cdec/lattice.pxi":15 + /* "lattice.pxi":15 * self.lattice.resize(len(inp)) * for i, arcs in enumerate(inp): * self[i] = arcs # <<<<<<<<<<<<<< @@ -16113,7 +16144,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde */ if (unlikely(PyObject_SetItem(((PyObject *)__pyx_v_self), __pyx_v_i, __pyx_v_arcs) < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":14 + /* "lattice.pxi":14 * if isinstance(inp, tuple): * self.lattice.resize(len(inp)) * for i, arcs in enumerate(inp): # <<<<<<<<<<<<<< @@ -16126,7 +16157,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde goto __pyx_L3; } - /* "cdec/lattice.pxi":16 + /* "lattice.pxi":16 * for i, arcs in enumerate(inp): * self[i] = arcs * elif isinstance(inp, basestring): # <<<<<<<<<<<<<< @@ -16137,7 +16168,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "cdec/lattice.pxi":17 + /* "lattice.pxi":17 * self[i] = arcs * elif isinstance(inp, basestring): * lattice.ConvertTextOrPLF(as_str(inp), self.lattice) # <<<<<<<<<<<<<< @@ -16146,14 +16177,14 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde */ __pyx_t_4 = __pyx_f_4cdec_5_cdec_as_str(__pyx_v_inp, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_8 = __pyx_convert_string_from_py_(__pyx_t_4); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __pyx_convert_string_from_py_std__in_string(__pyx_t_4); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; LatticeTools::ConvertTextOrPLF(__pyx_t_8, __pyx_v_self->lattice); goto __pyx_L3; } /*else*/ { - /* "cdec/lattice.pxi":19 + /* "lattice.pxi":19 * lattice.ConvertTextOrPLF(as_str(inp), self.lattice) * else: * raise TypeError('cannot create lattice from %s' % type(inp)) # <<<<<<<<<<<<<< @@ -16176,7 +16207,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde } __pyx_L3:; - /* "cdec/lattice.pxi":9 + /* "lattice.pxi":9 * self.lattice = new lattice.Lattice() * * def __init__(self, inp): # <<<<<<<<<<<<<< @@ -16200,7 +16231,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_2__init__(struct __pyx_obj_4cdec_5_cde return __pyx_r; } -/* "cdec/lattice.pxi":21 +/* "lattice.pxi":21 * raise TypeError('cannot create lattice from %s' % type(inp)) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -16223,7 +16254,7 @@ static void __pyx_pf_4cdec_5_cdec_7Lattice_4__dealloc__(struct __pyx_obj_4cdec_5 __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/lattice.pxi":22 + /* "lattice.pxi":22 * * def __dealloc__(self): * del self.lattice # <<<<<<<<<<<<<< @@ -16232,7 +16263,7 @@ static void __pyx_pf_4cdec_5_cdec_7Lattice_4__dealloc__(struct __pyx_obj_4cdec_5 */ delete __pyx_v_self->lattice; - /* "cdec/lattice.pxi":21 + /* "lattice.pxi":21 * raise TypeError('cannot create lattice from %s' % type(inp)) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -16244,7 +16275,7 @@ static void __pyx_pf_4cdec_5_cdec_7Lattice_4__dealloc__(struct __pyx_obj_4cdec_5 __Pyx_RefNannyFinishContext(); } -/* "cdec/lattice.pxi":24 +/* "lattice.pxi":24 * del self.lattice * * def __getitem__(self, int index): # <<<<<<<<<<<<<< @@ -16282,6 +16313,8 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c PyObject *__pyx_v_arcs = NULL; std::vector<LatticeArc> __pyx_v_arc_vector; LatticeArc *__pyx_v_arc; + FastSparseVector<double> *__pyx_v_vp; + struct __pyx_obj_4cdec_5_cdec_SparseVector *__pyx_v_v = 0; unsigned int __pyx_v_i; PyObject *__pyx_v_label = NULL; PyObject *__pyx_r = NULL; @@ -16293,14 +16326,13 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c size_t __pyx_t_5; unsigned int __pyx_t_6; PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_t_9; + int __pyx_t_8; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "cdec/lattice.pxi":25 + /* "lattice.pxi":25 * * def __getitem__(self, int index): * if not 0 <= index < len(self): # <<<<<<<<<<<<<< @@ -16315,7 +16347,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c __pyx_t_3 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_3) { - /* "cdec/lattice.pxi":26 + /* "lattice.pxi":26 * def __getitem__(self, int index): * if not 0 <= index < len(self): * raise IndexError('lattice index out of range') # <<<<<<<<<<<<<< @@ -16329,7 +16361,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c {__pyx_filename = __pyx_f[4]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/lattice.pxi":27 + /* "lattice.pxi":27 * if not 0 <= index < len(self): * raise IndexError('lattice index out of range') * arcs = [] # <<<<<<<<<<<<<< @@ -16341,45 +16373,76 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c __pyx_v_arcs = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/lattice.pxi":28 + /* "lattice.pxi":28 * raise IndexError('lattice index out of range') * arcs = [] * cdef vector[lattice.LatticeArc] arc_vector = self.lattice[0][index] # <<<<<<<<<<<<<< * cdef lattice.LatticeArc* arc - * cdef unsigned i + * cdef FastSparseVector[double]* vp */ __pyx_v_arc_vector = ((__pyx_v_self->lattice[0])[__pyx_v_index]); - /* "cdec/lattice.pxi":31 + /* "lattice.pxi":31 * cdef lattice.LatticeArc* arc + * cdef FastSparseVector[double]* vp + * cdef SparseVector v = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< + * cdef unsigned i + * for i in range(arc_vector.size()): + */ + __pyx_t_4 = __pyx_tp_new_4cdec_5_cdec_SparseVector(((PyTypeObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_SparseVector)), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_4cdec_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_v = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_4); + __pyx_t_4 = 0; + + /* "lattice.pxi":33 + * cdef SparseVector v = SparseVector.__new__(SparseVector) * cdef unsigned i * for i in range(arc_vector.size()): # <<<<<<<<<<<<<< * arc = &arc_vector[i] - * label = unicode(TDConvert(arc.label).c_str(), 'utf8') + * vp = new FastSparseVector[double](arc.features) */ __pyx_t_5 = __pyx_v_arc_vector.size(); for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "cdec/lattice.pxi":32 + /* "lattice.pxi":34 * cdef unsigned i * for i in range(arc_vector.size()): * arc = &arc_vector[i] # <<<<<<<<<<<<<< - * label = unicode(TDConvert(arc.label).c_str(), 'utf8') - * arcs.append((label, arc.cost, arc.dist2next)) + * vp = new FastSparseVector[double](arc.features) + * v.vector = vp */ __pyx_v_arc = (&(__pyx_v_arc_vector[__pyx_v_i])); - /* "cdec/lattice.pxi":33 + /* "lattice.pxi":35 * for i in range(arc_vector.size()): * arc = &arc_vector[i] + * vp = new FastSparseVector[double](arc.features) # <<<<<<<<<<<<<< + * v.vector = vp + * label = unicode(TDConvert(arc.label).c_str(), 'utf8') + */ + __pyx_v_vp = new FastSparseVector<double> (__pyx_v_arc->features); + + /* "lattice.pxi":36 + * arc = &arc_vector[i] + * vp = new FastSparseVector[double](arc.features) + * v.vector = vp # <<<<<<<<<<<<<< + * label = unicode(TDConvert(arc.label).c_str(), 'utf8') + * arcs.append((label, v, arc.dist2next)) + */ + __pyx_v_v->vector = __pyx_v_vp; + + /* "lattice.pxi":37 + * vp = new FastSparseVector[double](arc.features) + * v.vector = vp * label = unicode(TDConvert(arc.label).c_str(), 'utf8') # <<<<<<<<<<<<<< - * arcs.append((label, arc.cost, arc.dist2next)) + * arcs.append((label, v, arc.dist2next)) * return tuple(arcs) */ - __pyx_t_4 = __Pyx_PyBytes_FromString(TD::Convert(__pyx_v_arc->label).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyBytes_FromString(TD::Convert(__pyx_v_arc->label).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); @@ -16387,53 +16450,51 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_n_s_utf8); __Pyx_GIVEREF(__pyx_n_s_utf8); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), __pyx_t_7, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), __pyx_t_7, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF_SET(__pyx_v_label, ((PyObject*)__pyx_t_4)); __pyx_t_4 = 0; - /* "cdec/lattice.pxi":34 - * arc = &arc_vector[i] + /* "lattice.pxi":38 + * v.vector = vp * label = unicode(TDConvert(arc.label).c_str(), 'utf8') - * arcs.append((label, arc.cost, arc.dist2next)) # <<<<<<<<<<<<<< + * arcs.append((label, v, arc.dist2next)) # <<<<<<<<<<<<<< * return tuple(arcs) * */ - __pyx_t_4 = PyFloat_FromDouble(__pyx_v_arc->cost); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_arc->dist2next); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = __Pyx_PyInt_From_int(__pyx_v_arc->dist2next); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyTuple_New(3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); __Pyx_INCREF(__pyx_v_label); - PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_v_label); + PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_v_label); __Pyx_GIVEREF(__pyx_v_label); - PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_4); + __Pyx_INCREF(((PyObject *)__pyx_v_v)); + PyTuple_SET_ITEM(__pyx_t_7, 1, ((PyObject *)__pyx_v_v)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_v)); + PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); __pyx_t_4 = 0; - __pyx_t_7 = 0; - __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_arcs, __pyx_t_8); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_arcs, __pyx_t_7); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } - /* "cdec/lattice.pxi":35 + /* "lattice.pxi":39 * label = unicode(TDConvert(arc.label).c_str(), 'utf8') - * arcs.append((label, arc.cost, arc.dist2next)) + * arcs.append((label, v, arc.dist2next)) * return tuple(arcs) # <<<<<<<<<<<<<< * * def __setitem__(self, int index, tuple arcs): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = PyList_AsTuple(__pyx_v_arcs); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_r = __pyx_t_8; - __pyx_t_8 = 0; + __pyx_t_7 = PyList_AsTuple(__pyx_v_arcs); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_r = __pyx_t_7; + __pyx_t_7 = 0; goto __pyx_L0; - /* "cdec/lattice.pxi":24 + /* "lattice.pxi":24 * del self.lattice * * def __getitem__(self, int index): # <<<<<<<<<<<<<< @@ -16445,18 +16506,18 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_6__getitem__(struct __pyx_obj_4c __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); __Pyx_AddTraceback("cdec._cdec.Lattice.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_arcs); + __Pyx_XDECREF((PyObject *)__pyx_v_v); __Pyx_XDECREF(__pyx_v_label); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "cdec/lattice.pxi":37 +/* "lattice.pxi":41 * return tuple(arcs) * * def __setitem__(self, int index, tuple arcs): # <<<<<<<<<<<<<< @@ -16475,7 +16536,7 @@ static int __pyx_pw_4cdec_5_cdec_7Lattice_9__setitem__(PyObject *__pyx_v_self, P __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__setitem__ (wrapper)", 0); assert(__pyx_arg_index); { - __pyx_v_index = __Pyx_PyInt_As_int(__pyx_arg_index); if (unlikely((__pyx_v_index == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_index = __Pyx_PyInt_As_int(__pyx_arg_index); if (unlikely((__pyx_v_index == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -16483,7 +16544,7 @@ static int __pyx_pw_4cdec_5_cdec_7Lattice_9__setitem__(PyObject *__pyx_v_self, P __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_arcs), (&PyTuple_Type), 1, "arcs", 1))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_arcs), (&PyTuple_Type), 1, "arcs", 1))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(((struct __pyx_obj_4cdec_5_cdec_Lattice *)__pyx_v_self), ((int)__pyx_v_index), ((PyObject*)__pyx_v_arcs)); /* function exit code */ @@ -16497,8 +16558,10 @@ static int __pyx_pw_4cdec_5_cdec_7Lattice_9__setitem__(PyObject *__pyx_v_self, P static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_cdec_Lattice *__pyx_v_self, int __pyx_v_index, PyObject *__pyx_v_arcs) { LatticeArc *__pyx_v_arc; + FastSparseVector<double> *__pyx_v_vp; + struct __pyx_obj_4cdec_5_cdec_SparseVector *__pyx_v_v = 0; PyObject *__pyx_v_label = NULL; - PyObject *__pyx_v_cost = NULL; + PyObject *__pyx_v_features = NULL; PyObject *__pyx_v_dist2next = NULL; PyObject *__pyx_v_label_str = NULL; int __pyx_r; @@ -16513,15 +16576,15 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ PyObject *__pyx_t_8 = NULL; PyObject *__pyx_t_9 = NULL; PyObject *(*__pyx_t_10)(PyObject *); - char *__pyx_t_11; - double __pyx_t_12; + FastSparseVector<weight_t> *__pyx_t_11; + char *__pyx_t_12; int __pyx_t_13; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "cdec/lattice.pxi":38 + /* "lattice.pxi":42 * * def __setitem__(self, int index, tuple arcs): * if not 0 <= index < len(self): # <<<<<<<<<<<<<< @@ -16530,44 +16593,57 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ */ __pyx_t_1 = (0 <= __pyx_v_index); if (__pyx_t_1) { - __pyx_t_2 = PyObject_Length(((PyObject *)__pyx_v_self)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(((PyObject *)__pyx_v_self)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_1 = (__pyx_v_index < __pyx_t_2); } __pyx_t_3 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_3) { - /* "cdec/lattice.pxi":39 + /* "lattice.pxi":43 * def __setitem__(self, int index, tuple arcs): * if not 0 <= index < len(self): * raise IndexError('lattice index out of range') # <<<<<<<<<<<<<< * cdef lattice.LatticeArc* arc - * for (label, cost, dist2next) in arcs: + * cdef FastSparseVector[double]* vp */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_IndexError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_IndexError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/lattice.pxi":41 - * raise IndexError('lattice index out of range') + /* "lattice.pxi":46 * cdef lattice.LatticeArc* arc - * for (label, cost, dist2next) in arcs: # <<<<<<<<<<<<<< + * cdef FastSparseVector[double]* vp + * cdef SparseVector v = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< + * for (label, features, dist2next) in arcs: * label_str = as_str(label) - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) + */ + __pyx_t_4 = __pyx_tp_new_4cdec_5_cdec_SparseVector(((PyTypeObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_SparseVector)), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_4cdec_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_v = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_4); + __pyx_t_4 = 0; + + /* "lattice.pxi":47 + * cdef FastSparseVector[double]* vp + * cdef SparseVector v = SparseVector.__new__(SparseVector) + * for (label, features, dist2next) in arcs: # <<<<<<<<<<<<<< + * label_str = as_str(label) + * v = features */ if (unlikely(__pyx_v_arcs == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_4 = __pyx_v_arcs; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0; for (;;) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif if ((likely(PyTuple_CheckExact(__pyx_t_5))) || (PyList_CheckExact(__pyx_t_5))) { PyObject* sequence = __pyx_t_5; @@ -16579,7 +16655,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -16595,17 +16671,17 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(__pyx_t_8); #else - __pyx_t_6 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); #endif __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else { Py_ssize_t index = -1; - __pyx_t_9 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = Py_TYPE(__pyx_t_9)->tp_iternext; @@ -16615,7 +16691,7 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ __Pyx_GOTREF(__pyx_t_7); index = 2; __pyx_t_8 = __pyx_t_10(__pyx_t_9); if (unlikely(!__pyx_t_8)) goto __pyx_L6_unpacking_failed; __Pyx_GOTREF(__pyx_t_8); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_10(__pyx_t_9), 3) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_10(__pyx_t_9), 3) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_10 = NULL; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L7_unpacking_done; @@ -16623,51 +16699,78 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_10 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L7_unpacking_done:; } __Pyx_XDECREF_SET(__pyx_v_label, __pyx_t_6); __pyx_t_6 = 0; - __Pyx_XDECREF_SET(__pyx_v_cost, __pyx_t_7); + __Pyx_XDECREF_SET(__pyx_v_features, __pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF_SET(__pyx_v_dist2next, __pyx_t_8); __pyx_t_8 = 0; - /* "cdec/lattice.pxi":42 - * cdef lattice.LatticeArc* arc - * for (label, cost, dist2next) in arcs: + /* "lattice.pxi":48 + * cdef SparseVector v = SparseVector.__new__(SparseVector) + * for (label, features, dist2next) in arcs: * label_str = as_str(label) # <<<<<<<<<<<<<< - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) - * self.lattice[0][index].push_back(arc[0]) + * v = features + * vp = v.vector */ - __pyx_t_5 = __pyx_f_4cdec_5_cdec_as_str(__pyx_v_label, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __pyx_f_4cdec_5_cdec_as_str(__pyx_v_label, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_label_str, ((PyObject*)__pyx_t_5)); __pyx_t_5 = 0; - /* "cdec/lattice.pxi":43 - * for (label, cost, dist2next) in arcs: + /* "lattice.pxi":49 + * for (label, features, dist2next) in arcs: + * label_str = as_str(label) + * v = features # <<<<<<<<<<<<<< + * vp = v.vector + * arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) + */ + if (!(likely(((__pyx_v_features) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_features, __pyx_ptype_4cdec_5_cdec_SparseVector))))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __pyx_v_features; + __Pyx_INCREF(__pyx_t_5); + __Pyx_DECREF_SET(__pyx_v_v, ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_5)); + __pyx_t_5 = 0; + + /* "lattice.pxi":50 * label_str = as_str(label) - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) # <<<<<<<<<<<<<< + * v = features + * vp = v.vector # <<<<<<<<<<<<<< + * arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) + * self.lattice[0][index].push_back(arc[0]) + */ + __pyx_t_11 = __pyx_v_v->vector; + __pyx_v_vp = __pyx_t_11; + + /* "lattice.pxi":51 + * v = features + * vp = v.vector + * arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) # <<<<<<<<<<<<<< * self.lattice[0][index].push_back(arc[0]) * del arc */ - __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_label_str); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_12 = __pyx_PyFloat_AsDouble(__pyx_v_cost); if (unlikely((__pyx_t_12 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_13 = __Pyx_PyInt_As_int(__pyx_v_dist2next); if (unlikely((__pyx_t_13 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_arc = new LatticeArc(TD::Convert(__pyx_t_11), __pyx_t_12, __pyx_t_13); + __pyx_t_12 = __Pyx_PyObject_AsString(__pyx_v_label_str); if (unlikely((!__pyx_t_12) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_int(__pyx_v_dist2next); if (unlikely((__pyx_t_13 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_arc = new LatticeArc(TD::Convert(__pyx_t_12), (__pyx_v_vp[0]), __pyx_t_13); - /* "cdec/lattice.pxi":44 - * label_str = as_str(label) - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) + /* "lattice.pxi":52 + * vp = v.vector + * arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) * self.lattice[0][index].push_back(arc[0]) # <<<<<<<<<<<<<< * del arc * */ - ((__pyx_v_self->lattice[0])[__pyx_v_index]).push_back((__pyx_v_arc[0])); + try { + ((__pyx_v_self->lattice[0])[__pyx_v_index]).push_back((__pyx_v_arc[0])); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/lattice.pxi":45 - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) + /* "lattice.pxi":53 + * arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) * self.lattice[0][index].push_back(arc[0]) * del arc # <<<<<<<<<<<<<< * @@ -16675,17 +16778,17 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ */ delete __pyx_v_arc; - /* "cdec/lattice.pxi":41 - * raise IndexError('lattice index out of range') - * cdef lattice.LatticeArc* arc - * for (label, cost, dist2next) in arcs: # <<<<<<<<<<<<<< + /* "lattice.pxi":47 + * cdef FastSparseVector[double]* vp + * cdef SparseVector v = SparseVector.__new__(SparseVector) + * for (label, features, dist2next) in arcs: # <<<<<<<<<<<<<< * label_str = as_str(label) - * arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) + * v = features */ } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "cdec/lattice.pxi":37 + /* "lattice.pxi":41 * return tuple(arcs) * * def __setitem__(self, int index, tuple arcs): # <<<<<<<<<<<<<< @@ -16706,15 +16809,16 @@ static int __pyx_pf_4cdec_5_cdec_7Lattice_8__setitem__(struct __pyx_obj_4cdec_5_ __Pyx_AddTraceback("cdec._cdec.Lattice.__setitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = -1; __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_v); __Pyx_XDECREF(__pyx_v_label); - __Pyx_XDECREF(__pyx_v_cost); + __Pyx_XDECREF(__pyx_v_features); __Pyx_XDECREF(__pyx_v_dist2next); __Pyx_XDECREF(__pyx_v_label_str); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "cdec/lattice.pxi":47 +/* "lattice.pxi":55 * del arc * * def __len__(self): # <<<<<<<<<<<<<< @@ -16740,7 +16844,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_7Lattice_10__len__(struct __pyx_obj_4cde __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/lattice.pxi":48 + /* "lattice.pxi":56 * * def __len__(self): * return self.lattice.size() # <<<<<<<<<<<<<< @@ -16750,7 +16854,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_7Lattice_10__len__(struct __pyx_obj_4cde __pyx_r = __pyx_v_self->lattice->size(); goto __pyx_L0; - /* "cdec/lattice.pxi":47 + /* "lattice.pxi":55 * del arc * * def __len__(self): # <<<<<<<<<<<<<< @@ -16764,7 +16868,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_7Lattice_10__len__(struct __pyx_obj_4cde return __pyx_r; } -/* "cdec/lattice.pxi":50 +/* "lattice.pxi":58 * return self.lattice.size() * * def __str__(self): # <<<<<<<<<<<<<< @@ -16795,7 +16899,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_12__str__(struct __pyx_obj_4cdec int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "cdec/lattice.pxi":51 + /* "lattice.pxi":59 * * def __str__(self): * return str(hypergraph.AsPLF(self.lattice[0], True).c_str()) # <<<<<<<<<<<<<< @@ -16803,21 +16907,21 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_12__str__(struct __pyx_obj_4cdec * def __unicode__(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->lattice[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->lattice[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/lattice.pxi":50 + /* "lattice.pxi":58 * return self.lattice.size() * * def __str__(self): # <<<<<<<<<<<<<< @@ -16837,7 +16941,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_12__str__(struct __pyx_obj_4cdec return __pyx_r; } -/* "cdec/lattice.pxi":53 +/* "lattice.pxi":61 * return str(hypergraph.AsPLF(self.lattice[0], True).c_str()) * * def __unicode__(self): # <<<<<<<<<<<<<< @@ -16868,7 +16972,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_14__unicode__(struct __pyx_obj_4 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__unicode__", 0); - /* "cdec/lattice.pxi":54 + /* "lattice.pxi":62 * * def __unicode__(self): * return unicode(str(self), 'utf8') # <<<<<<<<<<<<<< @@ -16876,15 +16980,15 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_14__unicode__(struct __pyx_obj_4 * def __iter__(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_self)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); @@ -16892,14 +16996,14 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_14__unicode__(struct __pyx_obj_4 PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_n_s_utf8); __Pyx_GIVEREF(__pyx_n_s_utf8); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/lattice.pxi":53 + /* "lattice.pxi":61 * return str(hypergraph.AsPLF(self.lattice[0], True).c_str()) * * def __unicode__(self): # <<<<<<<<<<<<<< @@ -16920,7 +17024,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_14__unicode__(struct __pyx_obj_4 } static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_18generator18(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/lattice.pxi":56 +/* "lattice.pxi":64 * return unicode(str(self), 'utf8') * * def __iter__(self): # <<<<<<<<<<<<<< @@ -16959,7 +17063,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_16__iter__(struct __pyx_obj_4cde __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_4cdec_5_cdec_7Lattice_18generator18, (PyObject *) __pyx_cur_scope, __pyx_n_s_iter, __pyx_n_s_Lattice___iter); if (unlikely(!gen)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_4cdec_5_cdec_7Lattice_18generator18, (PyObject *) __pyx_cur_scope, __pyx_n_s_iter, __pyx_n_s_Lattice___iter); if (unlikely(!gen)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -16995,27 +17099,27 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_18generator18(__pyx_GeneratorObj return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":58 + /* "lattice.pxi":66 * def __iter__(self): * cdef unsigned i * for i in range(len(self)): # <<<<<<<<<<<<<< * yield self[i] * */ - __pyx_t_1 = PyObject_Length(((PyObject *)__pyx_cur_scope->__pyx_v_self)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(((PyObject *)__pyx_cur_scope->__pyx_v_self)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/lattice.pxi":59 + /* "lattice.pxi":67 * cdef unsigned i * for i in range(len(self)): * yield self[i] # <<<<<<<<<<<<<< * * def todot(self): */ - __pyx_t_3 = __Pyx_GetItemInt(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_3 = __Pyx_GetItemInt(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; @@ -17029,10 +17133,10 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_18generator18(__pyx_GeneratorObj __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/lattice.pxi":56 + /* "lattice.pxi":64 * return unicode(str(self), 'utf8') * * def __iter__(self): # <<<<<<<<<<<<<< @@ -17054,7 +17158,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_18generator18(__pyx_GeneratorObj return NULL; } -/* "cdec/lattice.pxi":61 +/* "lattice.pxi":69 * yield self[i] * * def todot(self): # <<<<<<<<<<<<<< @@ -17077,7 +17181,7 @@ static PyObject *__pyx_pw_4cdec_5_cdec_7Lattice_20todot(PyObject *__pyx_v_self, } static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/lattice.pxi":63 +/* "lattice.pxi":71 * def todot(self): * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): # <<<<<<<<<<<<<< @@ -17117,7 +17221,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_5todot_lines(PyObject *__pyx_sel __Pyx_INCREF(((PyObject *)__pyx_cur_scope->__pyx_outer_scope)); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_outer_scope); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24, (PyObject *) __pyx_cur_scope, __pyx_n_s_lines, __pyx_n_s_todot_locals_lines); if (unlikely(!gen)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24, (PyObject *) __pyx_cur_scope, __pyx_n_s_lines, __pyx_n_s_todot_locals_lines); if (unlikely(!gen)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -17167,9 +17271,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":64 + /* "lattice.pxi":72 * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): * yield 'digraph lattice {' # <<<<<<<<<<<<<< @@ -17184,9 +17288,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __pyx_generator->resume_label = 1; return __pyx_r; __pyx_L4_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":65 + /* "lattice.pxi":73 * def lines(): * yield 'digraph lattice {' * yield 'rankdir = LR;' # <<<<<<<<<<<<<< @@ -17201,9 +17305,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __pyx_generator->resume_label = 2; return __pyx_r; __pyx_L5_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":66 + /* "lattice.pxi":74 * yield 'digraph lattice {' * yield 'rankdir = LR;' * yield 'node [shape=circle];' # <<<<<<<<<<<<<< @@ -17218,37 +17322,37 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __pyx_generator->resume_label = 3; return __pyx_r; __pyx_L6_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":67 + /* "lattice.pxi":75 * yield 'rankdir = LR;' * yield 'node [shape=circle];' * for i in range(len(self)): # <<<<<<<<<<<<<< * for label, weight, delta in self[i]: * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) */ - if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } + if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_1 = ((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self); __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_2 = 0; __pyx_t_4 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -17256,16 +17360,16 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -17274,7 +17378,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[4]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -17285,23 +17389,23 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/lattice.pxi":68 + /* "lattice.pxi":76 * yield 'node [shape=circle];' * for i in range(len(self)): * for label, weight, delta in self[i]: # <<<<<<<<<<<<<< * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) * yield '%d [shape=doublecircle]' % len(self) */ - if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } + __pyx_t_1 = PyObject_GetItem(((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_i); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_5 = __pyx_t_1; __Pyx_INCREF(__pyx_t_5); __pyx_t_6 = 0; __pyx_t_7 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -17309,16 +17413,16 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera if (likely(PyList_CheckExact(__pyx_t_5))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -17327,7 +17431,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -17343,7 +17447,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -17359,17 +17463,17 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_INCREF(__pyx_t_9); __Pyx_INCREF(__pyx_t_10); #else - __pyx_t_8 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_9 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_10 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); #endif __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { Py_ssize_t index = -1; - __pyx_t_11 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_12 = Py_TYPE(__pyx_t_11)->tp_iternext; @@ -17379,7 +17483,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_GOTREF(__pyx_t_9); index = 2; __pyx_t_10 = __pyx_t_12(__pyx_t_11); if (unlikely(!__pyx_t_10)) goto __pyx_L11_unpacking_failed; __Pyx_GOTREF(__pyx_t_10); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_12(__pyx_t_11), 3) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_12(__pyx_t_11), 3) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_12 = NULL; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; goto __pyx_L12_unpacking_done; @@ -17387,7 +17491,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_t_12 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[4]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L12_unpacking_done:; } __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_label); @@ -17403,21 +17507,21 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - /* "cdec/lattice.pxi":69 + /* "lattice.pxi":77 * for i in range(len(self)): * for label, weight, delta in self[i]: * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) # <<<<<<<<<<<<<< * yield '%d [shape=doublecircle]' % len(self) * yield '}' */ - __pyx_t_1 = PyNumber_Add(__pyx_cur_scope->__pyx_v_i, __pyx_cur_scope->__pyx_v_delta); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Add(__pyx_cur_scope->__pyx_v_i, __pyx_cur_scope->__pyx_v_delta); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_cur_scope->__pyx_v_label, __pyx_n_s_replace); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_cur_scope->__pyx_v_label, __pyx_n_s_replace); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_cur_scope->__pyx_v_i); @@ -17428,7 +17532,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_GIVEREF(__pyx_t_9); __pyx_t_1 = 0; __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_PyString_Format(__pyx_kp_s_d_d_label_s, __pyx_t_10); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyString_Format(__pyx_kp_s_d_d_label_s, __pyx_t_10); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_r = __pyx_t_9; @@ -17457,9 +17561,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __Pyx_XGOTREF(__pyx_t_5); __pyx_t_6 = __pyx_cur_scope->__pyx_t_4; __pyx_t_7 = __pyx_cur_scope->__pyx_t_5; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":68 + /* "lattice.pxi":76 * yield 'node [shape=circle];' * for i in range(len(self)): * for label, weight, delta in self[i]: # <<<<<<<<<<<<<< @@ -17469,7 +17573,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "cdec/lattice.pxi":67 + /* "lattice.pxi":75 * yield 'rankdir = LR;' * yield 'node [shape=circle];' * for i in range(len(self)): # <<<<<<<<<<<<<< @@ -17479,21 +17583,21 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/lattice.pxi":70 + /* "lattice.pxi":78 * for label, weight, delta in self[i]: * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) * yield '%d [shape=doublecircle]' % len(self) # <<<<<<<<<<<<<< * yield '}' * return '\n'.join(lines()).encode('utf8') */ - if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } + if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self)) { __Pyx_RaiseClosureNameError("self"); {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_3 = ((PyObject *)__pyx_cur_scope->__pyx_outer_scope->__pyx_v_self); __Pyx_INCREF(__pyx_t_3); - __pyx_t_2 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_PyString_Format(__pyx_kp_s_d_shape_doublecircle, __pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyString_Format(__pyx_kp_s_d_shape_doublecircle, __pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_5; @@ -17504,9 +17608,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __pyx_generator->resume_label = 5; return __pyx_r; __pyx_L14_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":71 + /* "lattice.pxi":79 * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) * yield '%d [shape=doublecircle]' % len(self) * yield '}' # <<<<<<<<<<<<<< @@ -17521,9 +17625,9 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera __pyx_generator->resume_label = 6; return __pyx_r; __pyx_L15_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":63 + /* "lattice.pxi":71 * def todot(self): * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): # <<<<<<<<<<<<<< @@ -17551,7 +17655,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_7Lattice_5todot_2generator24(__pyx_Genera return NULL; } -/* "cdec/lattice.pxi":61 +/* "lattice.pxi":69 * yield self[i] * * def todot(self): # <<<<<<<<<<<<<< @@ -17580,19 +17684,19 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_19todot(struct __pyx_obj_4cdec_5 __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - /* "cdec/lattice.pxi":63 + /* "lattice.pxi":71 * def todot(self): * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): # <<<<<<<<<<<<<< * yield 'digraph lattice {' * yield 'rankdir = LR;' */ - __pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_4cdec_5_cdec_7Lattice_5todot_1lines, 0, __pyx_n_s_todot_locals_lines, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cdec__cdec, __pyx_d, ((PyObject *)__pyx_codeobj__15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_4cdec_5_cdec_7Lattice_5todot_1lines, 0, __pyx_n_s_todot_locals_lines, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cdec__cdec, __pyx_d, ((PyObject *)__pyx_codeobj__15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_lines = __pyx_t_1; __pyx_t_1 = 0; - /* "cdec/lattice.pxi":72 + /* "lattice.pxi":80 * yield '%d [shape=doublecircle]' % len(self) * yield '}' * return '\n'.join(lines()).encode('utf8') # <<<<<<<<<<<<<< @@ -17600,22 +17704,22 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_19todot(struct __pyx_obj_4cdec_5 * def as_hypergraph(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_pf_4cdec_5_cdec_7Lattice_5todot_lines(__pyx_v_lines); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __pyx_pf_4cdec_5_cdec_7Lattice_5todot_lines(__pyx_v_lines); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyString_Join(__pyx_kp_s__16, __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyString_Join(__pyx_kp_s__16, __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_encode); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_encode); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/lattice.pxi":61 + /* "lattice.pxi":69 * yield self[i] * * def todot(self): # <<<<<<<<<<<<<< @@ -17637,7 +17741,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_19todot(struct __pyx_obj_4cdec_5 return __pyx_r; } -/* "cdec/lattice.pxi":74 +/* "lattice.pxi":82 * return '\n'.join(lines()).encode('utf8') * * def as_hypergraph(self): # <<<<<<<<<<<<<< @@ -17672,20 +17776,20 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("as_hypergraph", 0); - /* "cdec/lattice.pxi":76 + /* "lattice.pxi":84 * def as_hypergraph(self): * """lattice.as_hypergraph() -> Hypergraph representation of the lattice.""" * cdef Hypergraph result = Hypergraph.__new__(Hypergraph) # <<<<<<<<<<<<<< * result.hg = new hypergraph.Hypergraph() * cdef bytes plf = str(self) */ - __pyx_t_1 = __pyx_tp_new_4cdec_5_cdec_Hypergraph(((PyTypeObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_Hypergraph)), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __pyx_tp_new_4cdec_5_cdec_Hypergraph(((PyTypeObject *)((PyObject*)__pyx_ptype_4cdec_5_cdec_Hypergraph)), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_4cdec_5_cdec_Hypergraph)))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_4cdec_5_cdec_Hypergraph)))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_Hypergraph *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/lattice.pxi":77 + /* "lattice.pxi":85 * """lattice.as_hypergraph() -> Hypergraph representation of the lattice.""" * cdef Hypergraph result = Hypergraph.__new__(Hypergraph) * result.hg = new hypergraph.Hypergraph() # <<<<<<<<<<<<<< @@ -17694,35 +17798,35 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj */ __pyx_v_result->hg = new Hypergraph(); - /* "cdec/lattice.pxi":78 + /* "lattice.pxi":86 * cdef Hypergraph result = Hypergraph.__new__(Hypergraph) * result.hg = new hypergraph.Hypergraph() * cdef bytes plf = str(self) # <<<<<<<<<<<<<< * hypergraph.ReadFromPLF(plf, result.hg) * return result */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_self)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(PyBytes_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_plf = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0; - /* "cdec/lattice.pxi":79 + /* "lattice.pxi":87 * result.hg = new hypergraph.Hypergraph() * cdef bytes plf = str(self) * hypergraph.ReadFromPLF(plf, result.hg) # <<<<<<<<<<<<<< * return result */ - __pyx_t_3 = __pyx_convert_string_from_py_(__pyx_v_plf); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_convert_string_from_py_std__in_string(__pyx_v_plf); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} HypergraphIO::ReadFromPLF(__pyx_t_3, __pyx_v_result->hg); - /* "cdec/lattice.pxi":80 + /* "lattice.pxi":88 * cdef bytes plf = str(self) * hypergraph.ReadFromPLF(plf, result.hg) * return result # <<<<<<<<<<<<<< @@ -17732,7 +17836,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/lattice.pxi":74 + /* "lattice.pxi":82 * return '\n'.join(lines()).encode('utf8') * * def as_hypergraph(self): # <<<<<<<<<<<<<< @@ -17754,7 +17858,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Lattice_21as_hypergraph(struct __pyx_obj return __pyx_r; } -/* "cdec/mteval.pxi":3 +/* "mteval.pxi":3 * cimport mteval * * cdef SufficientStats as_stats(x, y): # <<<<<<<<<<<<<< @@ -17776,7 +17880,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("as_stats", 0); - /* "cdec/mteval.pxi":4 + /* "mteval.pxi":4 * * cdef SufficientStats as_stats(x, y): * if isinstance(x, SufficientStats): # <<<<<<<<<<<<<< @@ -17787,7 +17891,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/mteval.pxi":5 + /* "mteval.pxi":5 * cdef SufficientStats as_stats(x, y): * if isinstance(x, SufficientStats): * return x # <<<<<<<<<<<<<< @@ -17801,7 +17905,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st goto __pyx_L0; } - /* "cdec/mteval.pxi":6 + /* "mteval.pxi":6 * if isinstance(x, SufficientStats): * return x * elif x == 0 and isinstance(y, SufficientStats): # <<<<<<<<<<<<<< @@ -17812,19 +17916,17 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_1) { - goto __pyx_L5_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L4_bool_binop_done; } - __pyx_L5_next_and:; __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_y, ((PyObject*)__pyx_ptype_4cdec_5_cdec_SufficientStats)); __pyx_t_4 = (__pyx_t_1 != 0); __pyx_t_2 = __pyx_t_4; __pyx_L4_bool_binop_done:; if (__pyx_t_2) { - /* "cdec/mteval.pxi":7 + /* "mteval.pxi":7 * return x * elif x == 0 and isinstance(y, SufficientStats): * stats = SufficientStats() # <<<<<<<<<<<<<< @@ -17836,7 +17938,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st __pyx_v_stats = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":8 + /* "mteval.pxi":8 * elif x == 0 and isinstance(y, SufficientStats): * stats = SufficientStats() * stats.stats = new mteval.SufficientStats() # <<<<<<<<<<<<<< @@ -17845,7 +17947,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st */ __pyx_v_stats->stats = new SufficientStats(); - /* "cdec/mteval.pxi":9 + /* "mteval.pxi":9 * stats = SufficientStats() * stats.stats = new mteval.SufficientStats() * stats.metric = (<SufficientStats> y).metric # <<<<<<<<<<<<<< @@ -17855,7 +17957,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st __pyx_t_5 = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_v_y)->metric; __pyx_v_stats->metric = __pyx_t_5; - /* "cdec/mteval.pxi":10 + /* "mteval.pxi":10 * stats.stats = new mteval.SufficientStats() * stats.metric = (<SufficientStats> y).metric * return stats # <<<<<<<<<<<<<< @@ -17868,7 +17970,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st goto __pyx_L0; } - /* "cdec/mteval.pxi":3 + /* "mteval.pxi":3 * cimport mteval * * cdef SufficientStats as_stats(x, y): # <<<<<<<<<<<<<< @@ -17890,7 +17992,7 @@ static struct __pyx_obj_4cdec_5_cdec_SufficientStats *__pyx_f_4cdec_5_cdec_as_st return __pyx_r; } -/* "cdec/mteval.pxi":17 +/* "mteval.pxi":17 * * property words: * def __get__(self): # <<<<<<<<<<<<<< @@ -17922,7 +18024,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_5words___get__(struct __pyx_ob int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/mteval.pxi":18 + /* "mteval.pxi":18 * property words: * def __get__(self): * return unicode(GetString(self.candidate.ewords).c_str(), encoding='utf8') # <<<<<<<<<<<<<< @@ -17948,7 +18050,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_5words___get__(struct __pyx_ob __pyx_t_3 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":17 + /* "mteval.pxi":17 * * property words: * def __get__(self): # <<<<<<<<<<<<<< @@ -17969,7 +18071,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_5words___get__(struct __pyx_ob return __pyx_r; } -/* "cdec/mteval.pxi":21 +/* "mteval.pxi":21 * * property fmap: * def __get__(self): # <<<<<<<<<<<<<< @@ -18000,7 +18102,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/mteval.pxi":22 + /* "mteval.pxi":22 * property fmap: * def __get__(self): * cdef SparseVector fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< @@ -18013,7 +18115,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj __pyx_v_fmap = ((struct __pyx_obj_4cdec_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":23 + /* "mteval.pxi":23 * def __get__(self): * cdef SparseVector fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap) # <<<<<<<<<<<<<< @@ -18022,7 +18124,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj */ __pyx_v_fmap->vector = new FastSparseVector<weight_t> (__pyx_v_self->candidate->fmap); - /* "cdec/mteval.pxi":24 + /* "mteval.pxi":24 * cdef SparseVector fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](self.candidate.fmap) * return fmap # <<<<<<<<<<<<<< @@ -18034,7 +18136,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj __pyx_r = ((PyObject *)__pyx_v_fmap); goto __pyx_L0; - /* "cdec/mteval.pxi":21 + /* "mteval.pxi":21 * * property fmap: * def __get__(self): # <<<<<<<<<<<<<< @@ -18054,7 +18156,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_9Candidate_4fmap___get__(struct __pyx_obj return __pyx_r; } -/* "cdec/mteval.pxi":14 +/* "mteval.pxi":14 * cdef class Candidate: * cdef mteval.const_Candidate* candidate * cdef public float score # <<<<<<<<<<<<<< @@ -18136,7 +18238,7 @@ static int __pyx_pf_4cdec_5_cdec_9Candidate_5score_2__set__(struct __pyx_obj_4cd return __pyx_r; } -/* "cdec/mteval.pxi":30 +/* "mteval.pxi":30 * cdef mteval.EvaluationMetric* metric * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -18165,7 +18267,7 @@ static int __pyx_pf_4cdec_5_cdec_15SufficientStats___cinit__(struct __pyx_obj_4c __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "cdec/mteval.pxi":31 + /* "mteval.pxi":31 * * def __cinit__(self): * self.stats = new mteval.SufficientStats() # <<<<<<<<<<<<<< @@ -18174,7 +18276,7 @@ static int __pyx_pf_4cdec_5_cdec_15SufficientStats___cinit__(struct __pyx_obj_4c */ __pyx_v_self->stats = new SufficientStats(); - /* "cdec/mteval.pxi":30 + /* "mteval.pxi":30 * cdef mteval.EvaluationMetric* metric * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -18188,7 +18290,7 @@ static int __pyx_pf_4cdec_5_cdec_15SufficientStats___cinit__(struct __pyx_obj_4c return __pyx_r; } -/* "cdec/mteval.pxi":33 +/* "mteval.pxi":33 * self.stats = new mteval.SufficientStats() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -18211,7 +18313,7 @@ static void __pyx_pf_4cdec_5_cdec_15SufficientStats_2__dealloc__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/mteval.pxi":34 + /* "mteval.pxi":34 * * def __dealloc__(self): * del self.stats # <<<<<<<<<<<<<< @@ -18220,7 +18322,7 @@ static void __pyx_pf_4cdec_5_cdec_15SufficientStats_2__dealloc__(struct __pyx_ob */ delete __pyx_v_self->stats; - /* "cdec/mteval.pxi":33 + /* "mteval.pxi":33 * self.stats = new mteval.SufficientStats() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -18232,7 +18334,7 @@ static void __pyx_pf_4cdec_5_cdec_15SufficientStats_2__dealloc__(struct __pyx_ob __Pyx_RefNannyFinishContext(); } -/* "cdec/mteval.pxi":37 +/* "mteval.pxi":37 * * property score: * def __get__(self): # <<<<<<<<<<<<<< @@ -18262,7 +18364,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_5score___get__(struct _ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/mteval.pxi":38 + /* "mteval.pxi":38 * property score: * def __get__(self): * return self.metric.ComputeScore(self.stats[0]) # <<<<<<<<<<<<<< @@ -18276,7 +18378,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_5score___get__(struct _ __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":37 + /* "mteval.pxi":37 * * property score: * def __get__(self): # <<<<<<<<<<<<<< @@ -18295,7 +18397,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_5score___get__(struct _ return __pyx_r; } -/* "cdec/mteval.pxi":41 +/* "mteval.pxi":41 * * property detail: * def __get__(self): # <<<<<<<<<<<<<< @@ -18326,7 +18428,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_6detail___get__(struct int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "cdec/mteval.pxi":42 + /* "mteval.pxi":42 * property detail: * def __get__(self): * return str(self.metric.DetailedScore(self.stats[0]).c_str()) # <<<<<<<<<<<<<< @@ -18348,7 +18450,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_6detail___get__(struct __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":41 + /* "mteval.pxi":41 * * property detail: * def __get__(self): # <<<<<<<<<<<<<< @@ -18368,7 +18470,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_6detail___get__(struct return __pyx_r; } -/* "cdec/mteval.pxi":44 +/* "mteval.pxi":44 * return str(self.metric.DetailedScore(self.stats[0]).c_str()) * * def __len__(self): # <<<<<<<<<<<<<< @@ -18394,7 +18496,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_15SufficientStats_4__len__(struct __pyx_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/mteval.pxi":45 + /* "mteval.pxi":45 * * def __len__(self): * return self.stats.size() # <<<<<<<<<<<<<< @@ -18404,7 +18506,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_15SufficientStats_4__len__(struct __pyx_ __pyx_r = __pyx_v_self->stats->size(); goto __pyx_L0; - /* "cdec/mteval.pxi":44 + /* "mteval.pxi":44 * return str(self.metric.DetailedScore(self.stats[0]).c_str()) * * def __len__(self): # <<<<<<<<<<<<<< @@ -18419,7 +18521,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_15SufficientStats_4__len__(struct __pyx_ } static PyObject *__pyx_gb_4cdec_5_cdec_15SufficientStats_8generator19(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/mteval.pxi":47 +/* "mteval.pxi":47 * return self.stats.size() * * def __iter__(self): # <<<<<<<<<<<<<< @@ -18496,7 +18598,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_15SufficientStats_8generator19(__pyx_Gene __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/mteval.pxi":48 + /* "mteval.pxi":48 * * def __iter__(self): * for i in range(len(self)): # <<<<<<<<<<<<<< @@ -18507,7 +18609,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_15SufficientStats_8generator19(__pyx_Gene for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/mteval.pxi":49 + /* "mteval.pxi":49 * def __iter__(self): * for i in range(len(self)): * yield self[i] # <<<<<<<<<<<<<< @@ -18531,7 +18633,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_15SufficientStats_8generator19(__pyx_Gene if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/mteval.pxi":47 + /* "mteval.pxi":47 * return self.stats.size() * * def __iter__(self): # <<<<<<<<<<<<<< @@ -18553,7 +18655,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_15SufficientStats_8generator19(__pyx_Gene return NULL; } -/* "cdec/mteval.pxi":51 +/* "mteval.pxi":51 * yield self[i] * * def __getitem__(self, int index): # <<<<<<<<<<<<<< @@ -18599,7 +18701,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_9__getitem__(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "cdec/mteval.pxi":52 + /* "mteval.pxi":52 * * def __getitem__(self, int index): * if not 0 <= index < len(self): # <<<<<<<<<<<<<< @@ -18614,7 +18716,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_9__getitem__(struct __p __pyx_t_3 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_3) { - /* "cdec/mteval.pxi":53 + /* "mteval.pxi":53 * def __getitem__(self, int index): * if not 0 <= index < len(self): * raise IndexError('sufficient stats vector index out of range') # <<<<<<<<<<<<<< @@ -18628,7 +18730,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_9__getitem__(struct __p {__pyx_filename = __pyx_f[5]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/mteval.pxi":54 + /* "mteval.pxi":54 * if not 0 <= index < len(self): * raise IndexError('sufficient stats vector index out of range') * return self.stats[0][index] # <<<<<<<<<<<<<< @@ -18642,7 +18744,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_9__getitem__(struct __p __pyx_t_4 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":51 + /* "mteval.pxi":51 * yield self[i] * * def __getitem__(self, int index): # <<<<<<<<<<<<<< @@ -18661,7 +18763,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_9__getitem__(struct __p return __pyx_r; } -/* "cdec/mteval.pxi":56 +/* "mteval.pxi":56 * return self.stats[0][index] * * def __iadd__(SufficientStats self, SufficientStats other): # <<<<<<<<<<<<<< @@ -18695,7 +18797,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_11__iadd__(struct __pyx __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__iadd__", 0); - /* "cdec/mteval.pxi":57 + /* "mteval.pxi":57 * * def __iadd__(SufficientStats self, SufficientStats other): * self.stats[0] += other.stats[0] # <<<<<<<<<<<<<< @@ -18704,7 +18806,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_11__iadd__(struct __pyx */ (__pyx_v_self->stats[0]) += (__pyx_v_other->stats[0]); - /* "cdec/mteval.pxi":58 + /* "mteval.pxi":58 * def __iadd__(SufficientStats self, SufficientStats other): * self.stats[0] += other.stats[0] * return self # <<<<<<<<<<<<<< @@ -18716,7 +18818,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_11__iadd__(struct __pyx __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "cdec/mteval.pxi":56 + /* "mteval.pxi":56 * return self.stats[0][index] * * def __iadd__(SufficientStats self, SufficientStats other): # <<<<<<<<<<<<<< @@ -18731,7 +18833,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_11__iadd__(struct __pyx return __pyx_r; } -/* "cdec/mteval.pxi":60 +/* "mteval.pxi":60 * return self * * def __add__(x, y): # <<<<<<<<<<<<<< @@ -18765,7 +18867,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__add__", 0); - /* "cdec/mteval.pxi":61 + /* "mteval.pxi":61 * * def __add__(x, y): * cdef SufficientStats sx = as_stats(x, y) # <<<<<<<<<<<<<< @@ -18777,7 +18879,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p __pyx_v_sx = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":62 + /* "mteval.pxi":62 * def __add__(x, y): * cdef SufficientStats sx = as_stats(x, y) * cdef SufficientStats sy = as_stats(y, x) # <<<<<<<<<<<<<< @@ -18789,7 +18891,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p __pyx_v_sy = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":63 + /* "mteval.pxi":63 * cdef SufficientStats sx = as_stats(x, y) * cdef SufficientStats sy = as_stats(y, x) * cdef SufficientStats result = SufficientStats() # <<<<<<<<<<<<<< @@ -18801,7 +18903,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p __pyx_v_result = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":64 + /* "mteval.pxi":64 * cdef SufficientStats sy = as_stats(y, x) * cdef SufficientStats result = SufficientStats() * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) # <<<<<<<<<<<<<< @@ -18810,7 +18912,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p */ __pyx_v_result->stats = new SufficientStats(operator+((__pyx_v_sx->stats[0]), (__pyx_v_sy->stats[0]))); - /* "cdec/mteval.pxi":65 + /* "mteval.pxi":65 * cdef SufficientStats result = SufficientStats() * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) * result.metric = sx.metric # <<<<<<<<<<<<<< @@ -18820,7 +18922,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p __pyx_t_2 = __pyx_v_sx->metric; __pyx_v_result->metric = __pyx_t_2; - /* "cdec/mteval.pxi":66 + /* "mteval.pxi":66 * result.stats = new mteval.SufficientStats(mteval.add(sx.stats[0], sy.stats[0])) * result.metric = sx.metric * return result # <<<<<<<<<<<<<< @@ -18832,7 +18934,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "cdec/mteval.pxi":60 + /* "mteval.pxi":60 * return self * * def __add__(x, y): # <<<<<<<<<<<<<< @@ -18854,7 +18956,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_15SufficientStats_13__add__(PyObject *__p return __pyx_r; } -/* "cdec/mteval.pxi":73 +/* "mteval.pxi":73 * cdef mteval.CandidateSet* cs * * def __cinit__(self, SegmentEvaluator evaluator): # <<<<<<<<<<<<<< @@ -18925,7 +19027,7 @@ static int __pyx_pf_4cdec_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_4cdec EvaluationMetric *__pyx_t_1; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "cdec/mteval.pxi":74 + /* "mteval.pxi":74 * * def __cinit__(self, SegmentEvaluator evaluator): * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) # <<<<<<<<<<<<<< @@ -18934,7 +19036,7 @@ static int __pyx_pf_4cdec_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_4cdec */ __pyx_v_self->scorer = new boost::shared_ptr<SegmentEvaluator> ((__pyx_v_evaluator->scorer[0])); - /* "cdec/mteval.pxi":75 + /* "mteval.pxi":75 * def __cinit__(self, SegmentEvaluator evaluator): * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) * self.metric = evaluator.metric # <<<<<<<<<<<<<< @@ -18944,7 +19046,7 @@ static int __pyx_pf_4cdec_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_4cdec __pyx_t_1 = __pyx_v_evaluator->metric; __pyx_v_self->metric = __pyx_t_1; - /* "cdec/mteval.pxi":76 + /* "mteval.pxi":76 * self.scorer = new shared_ptr[mteval.SegmentEvaluator](evaluator.scorer[0]) * self.metric = evaluator.metric * self.cs = new mteval.CandidateSet() # <<<<<<<<<<<<<< @@ -18953,7 +19055,7 @@ static int __pyx_pf_4cdec_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_4cdec */ __pyx_v_self->cs = new training::CandidateSet(); - /* "cdec/mteval.pxi":73 + /* "mteval.pxi":73 * cdef mteval.CandidateSet* cs * * def __cinit__(self, SegmentEvaluator evaluator): # <<<<<<<<<<<<<< @@ -18967,7 +19069,7 @@ static int __pyx_pf_4cdec_5_cdec_12CandidateSet___cinit__(struct __pyx_obj_4cdec return __pyx_r; } -/* "cdec/mteval.pxi":78 +/* "mteval.pxi":78 * self.cs = new mteval.CandidateSet() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -18990,7 +19092,7 @@ static void __pyx_pf_4cdec_5_cdec_12CandidateSet_2__dealloc__(struct __pyx_obj_4 __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/mteval.pxi":79 + /* "mteval.pxi":79 * * def __dealloc__(self): * del self.scorer # <<<<<<<<<<<<<< @@ -18999,7 +19101,7 @@ static void __pyx_pf_4cdec_5_cdec_12CandidateSet_2__dealloc__(struct __pyx_obj_4 */ delete __pyx_v_self->scorer; - /* "cdec/mteval.pxi":80 + /* "mteval.pxi":80 * def __dealloc__(self): * del self.scorer * del self.cs # <<<<<<<<<<<<<< @@ -19008,7 +19110,7 @@ static void __pyx_pf_4cdec_5_cdec_12CandidateSet_2__dealloc__(struct __pyx_obj_4 */ delete __pyx_v_self->cs; - /* "cdec/mteval.pxi":78 + /* "mteval.pxi":78 * self.cs = new mteval.CandidateSet() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -19020,7 +19122,7 @@ static void __pyx_pf_4cdec_5_cdec_12CandidateSet_2__dealloc__(struct __pyx_obj_4 __Pyx_RefNannyFinishContext(); } -/* "cdec/mteval.pxi":82 +/* "mteval.pxi":82 * del self.cs * * def __len__(self): # <<<<<<<<<<<<<< @@ -19046,7 +19148,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12CandidateSet_4__len__(struct __pyx_obj __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "cdec/mteval.pxi":83 + /* "mteval.pxi":83 * * def __len__(self): * return self.cs.size() # <<<<<<<<<<<<<< @@ -19056,7 +19158,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12CandidateSet_4__len__(struct __pyx_obj __pyx_r = __pyx_v_self->cs->size(); goto __pyx_L0; - /* "cdec/mteval.pxi":82 + /* "mteval.pxi":82 * del self.cs * * def __len__(self): # <<<<<<<<<<<<<< @@ -19070,7 +19172,7 @@ static Py_ssize_t __pyx_pf_4cdec_5_cdec_12CandidateSet_4__len__(struct __pyx_obj return __pyx_r; } -/* "cdec/mteval.pxi":85 +/* "mteval.pxi":85 * return self.cs.size() * * def __getitem__(self,int k): # <<<<<<<<<<<<<< @@ -19116,7 +19218,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "cdec/mteval.pxi":86 + /* "mteval.pxi":86 * * def __getitem__(self,int k): * if not 0 <= k < self.cs.size(): # <<<<<<<<<<<<<< @@ -19130,7 +19232,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_2) { - /* "cdec/mteval.pxi":87 + /* "mteval.pxi":87 * def __getitem__(self,int k): * if not 0 <= k < self.cs.size(): * raise IndexError('candidate set index out of range') # <<<<<<<<<<<<<< @@ -19144,7 +19246,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ {__pyx_filename = __pyx_f[5]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/mteval.pxi":88 + /* "mteval.pxi":88 * if not 0 <= k < self.cs.size(): * raise IndexError('candidate set index out of range') * cdef Candidate candidate = Candidate() # <<<<<<<<<<<<<< @@ -19156,7 +19258,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ __pyx_v_candidate = ((struct __pyx_obj_4cdec_5_cdec_Candidate *)__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":89 + /* "mteval.pxi":89 * raise IndexError('candidate set index out of range') * cdef Candidate candidate = Candidate() * candidate.candidate = &self.cs[0][k] # <<<<<<<<<<<<<< @@ -19165,7 +19267,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ */ __pyx_v_candidate->candidate = (&((__pyx_v_self->cs[0])[__pyx_v_k])); - /* "cdec/mteval.pxi":90 + /* "mteval.pxi":90 * cdef Candidate candidate = Candidate() * candidate.candidate = &self.cs[0][k] * candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) # <<<<<<<<<<<<<< @@ -19174,7 +19276,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ */ __pyx_v_candidate->score = __pyx_v_self->metric->ComputeScore(((__pyx_v_self->cs[0])[__pyx_v_k]).eval_feats); - /* "cdec/mteval.pxi":91 + /* "mteval.pxi":91 * candidate.candidate = &self.cs[0][k] * candidate.score = self.metric.ComputeScore(self.cs[0][k].eval_feats) * return candidate # <<<<<<<<<<<<<< @@ -19186,7 +19288,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ __pyx_r = ((PyObject *)__pyx_v_candidate); goto __pyx_L0; - /* "cdec/mteval.pxi":85 + /* "mteval.pxi":85 * return self.cs.size() * * def __getitem__(self,int k): # <<<<<<<<<<<<<< @@ -19207,7 +19309,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_6__getitem__(struct __pyx_ } static PyObject *__pyx_gb_4cdec_5_cdec_12CandidateSet_10generator20(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "cdec/mteval.pxi":93 +/* "mteval.pxi":93 * return candidate * * def __iter__(self): # <<<<<<<<<<<<<< @@ -19284,7 +19386,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12CandidateSet_10generator20(__pyx_Genera __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/mteval.pxi":95 + /* "mteval.pxi":95 * def __iter__(self): * cdef unsigned i * for i in range(len(self)): # <<<<<<<<<<<<<< @@ -19295,7 +19397,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12CandidateSet_10generator20(__pyx_Genera for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "cdec/mteval.pxi":96 + /* "mteval.pxi":96 * cdef unsigned i * for i in range(len(self)): * yield self[i] # <<<<<<<<<<<<<< @@ -19319,7 +19421,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12CandidateSet_10generator20(__pyx_Genera if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "cdec/mteval.pxi":93 + /* "mteval.pxi":93 * return candidate * * def __iter__(self): # <<<<<<<<<<<<<< @@ -19341,7 +19443,7 @@ static PyObject *__pyx_gb_4cdec_5_cdec_12CandidateSet_10generator20(__pyx_Genera return NULL; } -/* "cdec/mteval.pxi":98 +/* "mteval.pxi":98 * yield self[i] * * def add_kbest(self, Hypergraph hypergraph, unsigned k): # <<<<<<<<<<<<<< @@ -19421,7 +19523,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_11add_kbest(struct __pyx_o __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("add_kbest", 0); - /* "cdec/mteval.pxi":101 + /* "mteval.pxi":101 * """cs.add_kbest(Hypergraph hypergraph, int k) -> Extract K-best hypotheses * from the hypergraph and add them to the candidate set.""" * self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) # <<<<<<<<<<<<<< @@ -19430,7 +19532,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_11add_kbest(struct __pyx_o */ __pyx_v_self->cs->AddKBestCandidates((__pyx_v_hypergraph->hg[0]), __pyx_v_k, __pyx_v_self->scorer->get()); - /* "cdec/mteval.pxi":98 + /* "mteval.pxi":98 * yield self[i] * * def add_kbest(self, Hypergraph hypergraph, unsigned k): # <<<<<<<<<<<<<< @@ -19445,7 +19547,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_12CandidateSet_11add_kbest(struct __pyx_o return __pyx_r; } -/* "cdec/mteval.pxi":107 +/* "mteval.pxi":107 * cdef mteval.EvaluationMetric* metric * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -19468,7 +19570,7 @@ static void __pyx_pf_4cdec_5_cdec_16SegmentEvaluator___dealloc__(struct __pyx_ob __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/mteval.pxi":108 + /* "mteval.pxi":108 * * def __dealloc__(self): * del self.scorer # <<<<<<<<<<<<<< @@ -19477,7 +19579,7 @@ static void __pyx_pf_4cdec_5_cdec_16SegmentEvaluator___dealloc__(struct __pyx_ob */ delete __pyx_v_self->scorer; - /* "cdec/mteval.pxi":107 + /* "mteval.pxi":107 * cdef mteval.EvaluationMetric* metric * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -19489,7 +19591,7 @@ static void __pyx_pf_4cdec_5_cdec_16SegmentEvaluator___dealloc__(struct __pyx_ob __Pyx_RefNannyFinishContext(); } -/* "cdec/mteval.pxi":110 +/* "mteval.pxi":110 * del self.scorer * * def evaluate(self, sentence): # <<<<<<<<<<<<<< @@ -19526,7 +19628,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("evaluate", 0); - /* "cdec/mteval.pxi":113 + /* "mteval.pxi":113 * """se.evaluate(sentence) -> SufficientStats for the given hypothesis.""" * cdef vector[WordID] hyp * cdef SufficientStats sf = SufficientStats() # <<<<<<<<<<<<<< @@ -19538,7 +19640,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx __pyx_v_sf = ((struct __pyx_obj_4cdec_5_cdec_SufficientStats *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":114 + /* "mteval.pxi":114 * cdef vector[WordID] hyp * cdef SufficientStats sf = SufficientStats() * sf.metric = self.metric # <<<<<<<<<<<<<< @@ -19548,7 +19650,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx __pyx_t_2 = __pyx_v_self->metric; __pyx_v_sf->metric = __pyx_t_2; - /* "cdec/mteval.pxi":115 + /* "mteval.pxi":115 * cdef SufficientStats sf = SufficientStats() * sf.metric = self.metric * sf.stats = new mteval.SufficientStats() # <<<<<<<<<<<<<< @@ -19557,7 +19659,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx */ __pyx_v_sf->stats = new SufficientStats(); - /* "cdec/mteval.pxi":116 + /* "mteval.pxi":116 * sf.metric = self.metric * sf.stats = new mteval.SufficientStats() * ConvertSentence(as_str(sentence.strip()), &hyp) # <<<<<<<<<<<<<< @@ -19587,11 +19689,11 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx __pyx_t_3 = __pyx_f_4cdec_5_cdec_as_str(__pyx_t_1, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __pyx_convert_string_from_py_(__pyx_t_3); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __pyx_convert_string_from_py_std__in_string(__pyx_t_3); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; TD::ConvertSentence(__pyx_t_5, (&__pyx_v_hyp)); - /* "cdec/mteval.pxi":117 + /* "mteval.pxi":117 * sf.stats = new mteval.SufficientStats() * ConvertSentence(as_str(sentence.strip()), &hyp) * self.scorer.get().Evaluate(hyp, sf.stats) # <<<<<<<<<<<<<< @@ -19600,7 +19702,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx */ __pyx_v_self->scorer->get()->Evaluate(__pyx_v_hyp, __pyx_v_sf->stats); - /* "cdec/mteval.pxi":118 + /* "mteval.pxi":118 * ConvertSentence(as_str(sentence.strip()), &hyp) * self.scorer.get().Evaluate(hyp, sf.stats) * return sf # <<<<<<<<<<<<<< @@ -19612,7 +19714,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx __pyx_r = ((PyObject *)__pyx_v_sf); goto __pyx_L0; - /* "cdec/mteval.pxi":110 + /* "mteval.pxi":110 * del self.scorer * * def evaluate(self, sentence): # <<<<<<<<<<<<<< @@ -19634,7 +19736,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_2evaluate(struct __pyx return __pyx_r; } -/* "cdec/mteval.pxi":120 +/* "mteval.pxi":120 * return sf * * def candidate_set(self): # <<<<<<<<<<<<<< @@ -19666,7 +19768,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_4candidate_set(struct int __pyx_clineno = 0; __Pyx_RefNannySetupContext("candidate_set", 0); - /* "cdec/mteval.pxi":122 + /* "mteval.pxi":122 * def candidate_set(self): * """se.candidate_set() -> Candidate set using this segment evaluator for scoring.""" * return CandidateSet(self) # <<<<<<<<<<<<<< @@ -19686,7 +19788,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_4candidate_set(struct __pyx_t_2 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":120 + /* "mteval.pxi":120 * return sf * * def candidate_set(self): # <<<<<<<<<<<<<< @@ -19706,7 +19808,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_16SegmentEvaluator_4candidate_set(struct return __pyx_r; } -/* "cdec/mteval.pxi":128 +/* "mteval.pxi":128 * cdef mteval.EvaluationMetric* metric * * def __cinit__(self, bytes name=None): # <<<<<<<<<<<<<< @@ -19787,7 +19889,7 @@ static int __pyx_pf_4cdec_5_cdec_6Scorer___cinit__(struct __pyx_obj_4cdec_5_cdec int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "cdec/mteval.pxi":129 + /* "mteval.pxi":129 * * def __cinit__(self, bytes name=None): * if name: # <<<<<<<<<<<<<< @@ -19797,7 +19899,7 @@ static int __pyx_pf_4cdec_5_cdec_6Scorer___cinit__(struct __pyx_obj_4cdec_5_cdec __pyx_t_1 = (__pyx_v_name != Py_None) && (PyBytes_GET_SIZE(__pyx_v_name) != 0); if (__pyx_t_1) { - /* "cdec/mteval.pxi":130 + /* "mteval.pxi":130 * def __cinit__(self, bytes name=None): * if name: * self.name = new string(name) # <<<<<<<<<<<<<< @@ -19813,7 +19915,7 @@ static int __pyx_pf_4cdec_5_cdec_6Scorer___cinit__(struct __pyx_obj_4cdec_5_cdec } __pyx_v_self->name = __pyx_t_3; - /* "cdec/mteval.pxi":131 + /* "mteval.pxi":131 * if name: * self.name = new string(name) * self.metric = mteval.MetricInstance(self.name[0]) # <<<<<<<<<<<<<< @@ -19825,7 +19927,7 @@ static int __pyx_pf_4cdec_5_cdec_6Scorer___cinit__(struct __pyx_obj_4cdec_5_cdec } __pyx_L3:; - /* "cdec/mteval.pxi":128 + /* "mteval.pxi":128 * cdef mteval.EvaluationMetric* metric * * def __cinit__(self, bytes name=None): # <<<<<<<<<<<<<< @@ -19844,7 +19946,7 @@ static int __pyx_pf_4cdec_5_cdec_6Scorer___cinit__(struct __pyx_obj_4cdec_5_cdec return __pyx_r; } -/* "cdec/mteval.pxi":133 +/* "mteval.pxi":133 * self.metric = mteval.MetricInstance(self.name[0]) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -19867,7 +19969,7 @@ static void __pyx_pf_4cdec_5_cdec_6Scorer_2__dealloc__(struct __pyx_obj_4cdec_5_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "cdec/mteval.pxi":134 + /* "mteval.pxi":134 * * def __dealloc__(self): * del self.name # <<<<<<<<<<<<<< @@ -19876,7 +19978,7 @@ static void __pyx_pf_4cdec_5_cdec_6Scorer_2__dealloc__(struct __pyx_obj_4cdec_5_ */ delete __pyx_v_self->name; - /* "cdec/mteval.pxi":133 + /* "mteval.pxi":133 * self.metric = mteval.MetricInstance(self.name[0]) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -19888,7 +19990,7 @@ static void __pyx_pf_4cdec_5_cdec_6Scorer_2__dealloc__(struct __pyx_obj_4cdec_5_ __Pyx_RefNannyFinishContext(); } -/* "cdec/mteval.pxi":136 +/* "mteval.pxi":136 * del self.name * * def __call__(self, refs): # <<<<<<<<<<<<<< @@ -19973,7 +20075,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __Pyx_RefNannySetupContext("__call__", 0); __Pyx_INCREF(__pyx_v_refs); - /* "cdec/mteval.pxi":137 + /* "mteval.pxi":137 * * def __call__(self, refs): * if isinstance(refs, basestring): # <<<<<<<<<<<<<< @@ -19984,7 +20086,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cdec/mteval.pxi":138 + /* "mteval.pxi":138 * def __call__(self, refs): * if isinstance(refs, basestring): * refs = [refs] # <<<<<<<<<<<<<< @@ -20002,7 +20104,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ } __pyx_L3:; - /* "cdec/mteval.pxi":139 + /* "mteval.pxi":139 * if isinstance(refs, basestring): * refs = [refs] * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() # <<<<<<<<<<<<<< @@ -20017,7 +20119,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ } __pyx_v_refsv = __pyx_t_4; - /* "cdec/mteval.pxi":141 + /* "mteval.pxi":141 * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() * cdef vector[WordID]* refv * for ref in refs: # <<<<<<<<<<<<<< @@ -20064,7 +20166,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __Pyx_XDECREF_SET(__pyx_v_ref, __pyx_t_7); __pyx_t_7 = 0; - /* "cdec/mteval.pxi":142 + /* "mteval.pxi":142 * cdef vector[WordID]* refv * for ref in refs: * refv = new vector[WordID]() # <<<<<<<<<<<<<< @@ -20079,7 +20181,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ } __pyx_v_refv = __pyx_t_8; - /* "cdec/mteval.pxi":143 + /* "mteval.pxi":143 * for ref in refs: * refv = new vector[WordID]() * ConvertSentence(as_str(ref.strip()), refv) # <<<<<<<<<<<<<< @@ -20109,20 +20211,25 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __pyx_t_9 = __pyx_f_4cdec_5_cdec_as_str(__pyx_t_7, NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_11 = __pyx_convert_string_from_py_(__pyx_t_9); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __pyx_convert_string_from_py_std__in_string(__pyx_t_9); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; TD::ConvertSentence(__pyx_t_11, __pyx_v_refv); - /* "cdec/mteval.pxi":144 + /* "mteval.pxi":144 * refv = new vector[WordID]() * ConvertSentence(as_str(ref.strip()), refv) * refsv.push_back(refv[0]) # <<<<<<<<<<<<<< * del refv * cdef unsigned i */ - __pyx_v_refsv->push_back((__pyx_v_refv[0])); + try { + __pyx_v_refsv->push_back((__pyx_v_refv[0])); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[5]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/mteval.pxi":145 + /* "mteval.pxi":145 * ConvertSentence(as_str(ref.strip()), refv) * refsv.push_back(refv[0]) * del refv # <<<<<<<<<<<<<< @@ -20131,7 +20238,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ */ delete __pyx_v_refv; - /* "cdec/mteval.pxi":141 + /* "mteval.pxi":141 * cdef vector[vector[WordID]]* refsv = new vector[vector[WordID]]() * cdef vector[WordID]* refv * for ref in refs: # <<<<<<<<<<<<<< @@ -20141,7 +20248,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":147 + /* "mteval.pxi":147 * del refv * cdef unsigned i * cdef SegmentEvaluator evaluator = SegmentEvaluator() # <<<<<<<<<<<<<< @@ -20153,7 +20260,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __pyx_v_evaluator = ((struct __pyx_obj_4cdec_5_cdec_SegmentEvaluator *)__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":148 + /* "mteval.pxi":148 * cdef unsigned i * cdef SegmentEvaluator evaluator = SegmentEvaluator() * evaluator.metric = self.metric # <<<<<<<<<<<<<< @@ -20163,7 +20270,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __pyx_t_12 = __pyx_v_self->metric; __pyx_v_evaluator->metric = __pyx_t_12; - /* "cdec/mteval.pxi":149 + /* "mteval.pxi":149 * cdef SegmentEvaluator evaluator = SegmentEvaluator() * evaluator.metric = self.metric * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( # <<<<<<<<<<<<<< @@ -20172,7 +20279,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ */ __pyx_v_evaluator->scorer = new boost::shared_ptr<SegmentEvaluator> (__pyx_v_self->metric->CreateSegmentEvaluator((__pyx_v_refsv[0]))); - /* "cdec/mteval.pxi":151 + /* "mteval.pxi":151 * evaluator.scorer = new shared_ptr[mteval.SegmentEvaluator]( * self.metric.CreateSegmentEvaluator(refsv[0])) * del refsv # in theory should not delete but store in SegmentEvaluator # <<<<<<<<<<<<<< @@ -20181,7 +20288,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ */ delete __pyx_v_refsv; - /* "cdec/mteval.pxi":152 + /* "mteval.pxi":152 * self.metric.CreateSegmentEvaluator(refsv[0])) * del refsv # in theory should not delete but store in SegmentEvaluator * return evaluator # <<<<<<<<<<<<<< @@ -20193,7 +20300,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ __pyx_r = ((PyObject *)__pyx_v_evaluator); goto __pyx_L0; - /* "cdec/mteval.pxi":136 + /* "mteval.pxi":136 * del self.name * * def __call__(self, refs): # <<<<<<<<<<<<<< @@ -20218,7 +20325,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_4__call__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/mteval.pxi":154 +/* "mteval.pxi":154 * return evaluator * * def __str__(self): # <<<<<<<<<<<<<< @@ -20249,7 +20356,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_6__str__(struct __pyx_obj_4cdec_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "cdec/mteval.pxi":155 + /* "mteval.pxi":155 * * def __str__(self): * return str(self.name.c_str()) # <<<<<<<<<<<<<< @@ -20271,7 +20378,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_6__str__(struct __pyx_obj_4cdec_5 __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":154 + /* "mteval.pxi":154 * return evaluator * * def __str__(self): # <<<<<<<<<<<<<< @@ -20291,7 +20398,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Scorer_6__str__(struct __pyx_obj_4cdec_5 return __pyx_r; } -/* "cdec/mteval.pxi":157 +/* "mteval.pxi":157 * return str(self.name.c_str()) * * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): # <<<<<<<<<<<<<< @@ -20318,7 +20425,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_compute_score", 0); - /* "cdec/mteval.pxi":158 + /* "mteval.pxi":158 * * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): * cdef Metric metric = <Metric> metric_ # <<<<<<<<<<<<<< @@ -20330,7 +20437,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie __pyx_v_metric = ((struct __pyx_obj_4cdec_5_cdec_Metric *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":159 + /* "mteval.pxi":159 * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): * cdef Metric metric = <Metric> metric_ * cdef list ss = [] # <<<<<<<<<<<<<< @@ -20342,7 +20449,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie __pyx_v_ss = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":161 + /* "mteval.pxi":161 * cdef list ss = [] * cdef unsigned i * for i in range(stats.size()): # <<<<<<<<<<<<<< @@ -20353,7 +20460,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/mteval.pxi":162 + /* "mteval.pxi":162 * cdef unsigned i * for i in range(stats.size()): * ss.append(stats[0][i]) # <<<<<<<<<<<<<< @@ -20366,7 +20473,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } - /* "cdec/mteval.pxi":163 + /* "mteval.pxi":163 * for i in range(stats.size()): * ss.append(stats[0][i]) * return metric.score(ss) # <<<<<<<<<<<<<< @@ -20405,7 +20512,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie __pyx_r = __pyx_t_8; goto __pyx_L0; - /* "cdec/mteval.pxi":157 + /* "mteval.pxi":157 * return str(self.name.c_str()) * * cdef float _compute_score(void* metric_, mteval.SufficientStats* stats): # <<<<<<<<<<<<<< @@ -20428,7 +20535,7 @@ static float __pyx_f_4cdec_5_cdec__compute_score(void *__pyx_v_metric_, Sufficie return __pyx_r; } -/* "cdec/mteval.pxi":165 +/* "mteval.pxi":165 * return metric.score(ss) * * cdef void _compute_sufficient_stats(void* metric_, # <<<<<<<<<<<<<< @@ -20457,7 +20564,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_compute_sufficient_stats", 0); - /* "cdec/mteval.pxi":169 + /* "mteval.pxi":169 * vector[string]* refs, * mteval.SufficientStats* out): * cdef Metric metric = <Metric> metric_ # <<<<<<<<<<<<<< @@ -20469,7 +20576,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ __pyx_v_metric = ((struct __pyx_obj_4cdec_5_cdec_Metric *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":170 + /* "mteval.pxi":170 * mteval.SufficientStats* out): * cdef Metric metric = <Metric> metric_ * cdef list refs_ = [] # <<<<<<<<<<<<<< @@ -20481,7 +20588,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ __pyx_v_refs_ = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":172 + /* "mteval.pxi":172 * cdef list refs_ = [] * cdef unsigned i * for i in range(refs.size()): # <<<<<<<<<<<<<< @@ -20492,7 +20599,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/mteval.pxi":173 + /* "mteval.pxi":173 * cdef unsigned i * for i in range(refs.size()): * refs_.append(str(refs[0][i].c_str())) # <<<<<<<<<<<<<< @@ -20513,7 +20620,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } - /* "cdec/mteval.pxi":174 + /* "mteval.pxi":174 * for i in range(refs.size()): * refs_.append(str(refs[0][i].c_str())) * cdef list ss = metric.evaluate(str(hyp.c_str()), refs_) # <<<<<<<<<<<<<< @@ -20563,7 +20670,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ __pyx_v_ss = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":175 + /* "mteval.pxi":175 * refs_.append(str(refs[0][i].c_str())) * cdef list ss = metric.evaluate(str(hyp.c_str()), refs_) * out.fields.resize(len(ss)) # <<<<<<<<<<<<<< @@ -20575,9 +20682,14 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ {__pyx_filename = __pyx_f[5]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_8 = PyList_GET_SIZE(__pyx_v_ss); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_out->fields.resize(__pyx_t_8); + try { + __pyx_v_out->fields.resize(__pyx_t_8); + } catch(...) { + __Pyx_CppExn2PyErr(); + {__pyx_filename = __pyx_f[5]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "cdec/mteval.pxi":176 + /* "mteval.pxi":176 * cdef list ss = metric.evaluate(str(hyp.c_str()), refs_) * out.fields.resize(len(ss)) * for i in range(len(ss)): # <<<<<<<<<<<<<< @@ -20592,7 +20704,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "cdec/mteval.pxi":177 + /* "mteval.pxi":177 * out.fields.resize(len(ss)) * for i in range(len(ss)): * out.fields[i] = ss[i] # <<<<<<<<<<<<<< @@ -20610,7 +20722,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ (__pyx_v_out->fields[__pyx_v_i]) = __pyx_t_10; } - /* "cdec/mteval.pxi":165 + /* "mteval.pxi":165 * return metric.score(ss) * * cdef void _compute_sufficient_stats(void* metric_, # <<<<<<<<<<<<<< @@ -20634,7 +20746,7 @@ static void __pyx_f_4cdec_5_cdec__compute_sufficient_stats(void *__pyx_v_metric_ __Pyx_RefNannyFinishContext(); } -/* "cdec/mteval.pxi":181 +/* "mteval.pxi":181 * cdef class Metric: * cdef Scorer scorer * def __cinit__(self): # <<<<<<<<<<<<<< @@ -20671,7 +20783,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "cdec/mteval.pxi":182 + /* "mteval.pxi":182 * cdef Scorer scorer * def __cinit__(self): * self.scorer = Scorer() # <<<<<<<<<<<<<< @@ -20686,7 +20798,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec __pyx_v_self->scorer = ((struct __pyx_obj_4cdec_5_cdec_Scorer *)__pyx_t_1); __pyx_t_1 = 0; - /* "cdec/mteval.pxi":183 + /* "mteval.pxi":183 * def __cinit__(self): * self.scorer = Scorer() * cdef bytes class_name = self.__class__.__name__ # <<<<<<<<<<<<<< @@ -20702,7 +20814,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec __pyx_v_class_name = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0; - /* "cdec/mteval.pxi":184 + /* "mteval.pxi":184 * self.scorer = Scorer() * cdef bytes class_name = self.__class__.__name__ * self.scorer.name = new string(class_name) # <<<<<<<<<<<<<< @@ -20718,7 +20830,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec } __pyx_v_self->scorer->name = __pyx_t_4; - /* "cdec/mteval.pxi":185 + /* "mteval.pxi":185 * cdef bytes class_name = self.__class__.__name__ * self.scorer.name = new string(class_name) * self.scorer.metric = mteval.PyMetricInstance(self.scorer.name[0], # <<<<<<<<<<<<<< @@ -20727,7 +20839,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec */ __pyx_v_self->scorer->metric = PythonEvaluationMetric::Instance((__pyx_v_self->scorer->name[0]), ((void *)__pyx_v_self), __pyx_f_4cdec_5_cdec__compute_sufficient_stats, __pyx_f_4cdec_5_cdec__compute_score); - /* "cdec/mteval.pxi":181 + /* "mteval.pxi":181 * cdef class Metric: * cdef Scorer scorer * def __cinit__(self): # <<<<<<<<<<<<<< @@ -20749,7 +20861,7 @@ static int __pyx_pf_4cdec_5_cdec_6Metric___cinit__(struct __pyx_obj_4cdec_5_cdec return __pyx_r; } -/* "cdec/mteval.pxi":188 +/* "mteval.pxi":188 * <void*> self, _compute_sufficient_stats, _compute_score) * * def __call__(self, refs): # <<<<<<<<<<<<<< @@ -20821,7 +20933,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_2__call__(struct __pyx_obj_4cdec_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__call__", 0); - /* "cdec/mteval.pxi":189 + /* "mteval.pxi":189 * * def __call__(self, refs): * return self.scorer(refs) # <<<<<<<<<<<<<< @@ -20859,7 +20971,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_2__call__(struct __pyx_obj_4cdec_ __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":188 + /* "mteval.pxi":188 * <void*> self, _compute_sufficient_stats, _compute_score) * * def __call__(self, refs): # <<<<<<<<<<<<<< @@ -20881,7 +20993,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_2__call__(struct __pyx_obj_4cdec_ return __pyx_r; } -/* "cdec/mteval.pxi":191 +/* "mteval.pxi":191 * return self.scorer(refs) * * def score(SufficientStats stats): # <<<<<<<<<<<<<< @@ -20907,7 +21019,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("score", 0); - /* "cdec/mteval.pxi":192 + /* "mteval.pxi":192 * * def score(SufficientStats stats): * return 0 # <<<<<<<<<<<<<< @@ -20919,7 +21031,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "cdec/mteval.pxi":191 + /* "mteval.pxi":191 * return self.scorer(refs) * * def score(SufficientStats stats): # <<<<<<<<<<<<<< @@ -20934,7 +21046,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_4score(CYTHON_UNUSED struct __pyx return __pyx_r; } -/* "cdec/mteval.pxi":194 +/* "mteval.pxi":194 * return 0 * * def evaluate(self, hyp, refs): # <<<<<<<<<<<<<< @@ -21012,7 +21124,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_6evaluate(CYTHON_UNUSED struct __ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("evaluate", 0); - /* "cdec/mteval.pxi":195 + /* "mteval.pxi":195 * * def evaluate(self, hyp, refs): * return [] # <<<<<<<<<<<<<< @@ -21026,7 +21138,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_6Metric_6evaluate(CYTHON_UNUSED struct __ __pyx_t_1 = 0; goto __pyx_L0; - /* "cdec/mteval.pxi":194 + /* "mteval.pxi":194 * return 0 * * def evaluate(self, hyp, refs): # <<<<<<<<<<<<<< @@ -22042,60 +22154,46 @@ static int __pyx_pf_4cdec_5_cdec_7Decoder___init__(struct __pyx_obj_4cdec_5_cdec __pyx_t_3 = __pyx_v_formalism; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_scfg, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L12_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L12_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_fst, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L11_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L11_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_lextrans, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L10_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L10_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_pb, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L9_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L9_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_csplit, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L8_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L8_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_tagger, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L7_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L7_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_lexalign, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - goto __pyx_L6_next_and; } else { __pyx_t_2 = __pyx_t_1; goto __pyx_L5_bool_binop_done; } - __pyx_L6_next_and:; __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_t_3, __pyx_n_s_t2s, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = __pyx_t_1; __pyx_L5_bool_binop_done:; @@ -23429,7 +23527,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Decoder_6translate(struct __pyx_obj_4cde */ __pyx_t_3 = __pyx_f_4cdec_5_cdec_as_str(__pyx_v_grammar, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = __pyx_convert_string_from_py_(__pyx_t_3); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __pyx_convert_string_from_py_std__in_string(__pyx_t_3); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_self->dec->AddSupplementalGrammarFromString(__pyx_t_6); goto __pyx_L5; @@ -23475,7 +23573,7 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Decoder_6translate(struct __pyx_obj_4cde * if observer.hypergraph == NULL: * raise ParseFailed() */ - __pyx_t_6 = __pyx_convert_string_from_py_(__pyx_v_input_str); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __pyx_convert_string_from_py_std__in_string(__pyx_v_input_str); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->dec->Decode(__pyx_t_6, (&__pyx_v_observer)); /* "cdec/_cdec.pyx":113 @@ -23575,13 +23673,13 @@ static PyObject *__pyx_pf_4cdec_5_cdec_7Decoder_6translate(struct __pyx_obj_4cde /* "string.from_py":13 * - * @cname("__pyx_convert_string_from_py_") - * cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<< + * @cname("__pyx_convert_string_from_py_std__in_string") + * cdef string __pyx_convert_string_from_py_std__in_string(object o) except *: # <<<<<<<<<<<<<< * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) */ -static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { +static std::string __pyx_convert_string_from_py_std__in_string(PyObject *__pyx_v_o) { Py_ssize_t __pyx_v_length; char *__pyx_v_data; std::string __pyx_r; @@ -23590,10 +23688,10 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_string_from_py_", 0); + __Pyx_RefNannySetupContext("__pyx_convert_string_from_py_std__in_string", 0); /* "string.from_py":15 - * cdef string __pyx_convert_string_from_py_(object o) except *: + * cdef string __pyx_convert_string_from_py_std__in_string(object o) except *: * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) # <<<<<<<<<<<<<< * return string(data, length) @@ -23614,15 +23712,15 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { /* "string.from_py":13 * - * @cname("__pyx_convert_string_from_py_") - * cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<< + * @cname("__pyx_convert_string_from_py_std__in_string") + * cdef string __pyx_convert_string_from_py_std__in_string(object o) except *: # <<<<<<<<<<<<<< * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) */ /* function exit code */ __pyx_L1_error:; - __Pyx_AddTraceback("string.from_py.__pyx_convert_string_from_py_", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("string.from_py.__pyx_convert_string_from_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; @@ -29343,9 +29441,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_TER, __pyx_k_TER, sizeof(__pyx_k_TER), 0, 0, 1, 1}, {&__pyx_n_s_TRule___get, __pyx_k_TRule___get, sizeof(__pyx_k_TRule___get), 0, 0, 1, 1}, {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, - {&__pyx_kp_s_Users_waziz_workspace_mtm14_my, __pyx_k_Users_waziz_workspace_mtm14_my, sizeof(__pyx_k_Users_waziz_workspace_mtm14_my), 0, 0, 1, 0}, - {&__pyx_kp_s_Users_waziz_workspace_mtm14_my_2, __pyx_k_Users_waziz_workspace_mtm14_my_2, sizeof(__pyx_k_Users_waziz_workspace_mtm14_my_2), 0, 0, 1, 0}, - {&__pyx_kp_s_Users_waziz_workspace_mtm14_my_3, __pyx_k_Users_waziz_workspace_mtm14_my_3, sizeof(__pyx_k_Users_waziz_workspace_mtm14_my_3), 0, 0, 1, 0}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, {&__pyx_kp_s__10, __pyx_k__10, sizeof(__pyx_k__10), 0, 0, 1, 0}, {&__pyx_kp_s__11, __pyx_k__11, sizeof(__pyx_k__11), 0, 0, 1, 0}, @@ -29403,6 +29498,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1}, {&__pyx_n_s_get_2, __pyx_k_get_2, sizeof(__pyx_k_get_2), 0, 0, 1, 1}, {&__pyx_n_s_grammar, __pyx_k_grammar, sizeof(__pyx_k_grammar), 0, 0, 1, 1}, + {&__pyx_kp_s_home_pks_src_cdec_dtrain_python, __pyx_k_home_pks_src_cdec_dtrain_python, sizeof(__pyx_k_home_pks_src_cdec_dtrain_python), 0, 0, 1, 0}, + {&__pyx_kp_s_home_pks_src_cdec_dtrain_python_2, __pyx_k_home_pks_src_cdec_dtrain_python_2, sizeof(__pyx_k_home_pks_src_cdec_dtrain_python_2), 0, 0, 1, 0}, + {&__pyx_kp_s_home_pks_src_cdec_dtrain_python_3, __pyx_k_home_pks_src_cdec_dtrain_python_3, sizeof(__pyx_k_home_pks_src_cdec_dtrain_python_3), 0, 0, 1, 0}, {&__pyx_n_s_hyp, __pyx_k_hyp, sizeof(__pyx_k_hyp), 0, 0, 1, 1}, {&__pyx_n_s_hypergraph, __pyx_k_hypergraph, sizeof(__pyx_k_hypergraph), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, @@ -29523,7 +29621,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - /* "cdec/vectors.pxi":95 + /* "vectors.pxi":95 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for SparseVector') # <<<<<<<<<<<<<< @@ -29534,7 +29632,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "cdec/grammar.pxi":6 + /* "grammar.pxi":6 * * def _phrase(phrase): * return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase) # <<<<<<<<<<<<<< @@ -29545,7 +29643,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - /* "cdec/grammar.pxi":232 + /* "grammar.pxi":232 * trule = convert_rule(trule) * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< @@ -29555,7 +29653,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__5); __Pyx_GIVEREF(__pyx_tuple__5); - /* "cdec/hypergraph.pxi":307 + /* "hypergraph.pxi":307 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphEdge') # <<<<<<<<<<<<<< @@ -29566,7 +29664,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - /* "cdec/hypergraph.pxi":348 + /* "hypergraph.pxi":348 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphNode') # <<<<<<<<<<<<<< @@ -29575,7 +29673,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "cdec/lattice.pxi":26 + /* "lattice.pxi":26 * def __getitem__(self, int index): * if not 0 <= index < len(self): * raise IndexError('lattice index out of range') # <<<<<<<<<<<<<< @@ -29586,52 +29684,52 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "cdec/lattice.pxi":39 + /* "lattice.pxi":43 * def __setitem__(self, int index, tuple arcs): * if not 0 <= index < len(self): * raise IndexError('lattice index out of range') # <<<<<<<<<<<<<< * cdef lattice.LatticeArc* arc - * for (label, cost, dist2next) in arcs: + * cdef FastSparseVector[double]* vp */ - __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_s_lattice_index_out_of_range); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_s_lattice_index_out_of_range); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "cdec/lattice.pxi":69 + /* "lattice.pxi":77 * for i in range(len(self)): * for label, weight, delta in self[i]: * yield '%d -> %d [label="%s"];' % (i, i+delta, label.replace('"', '\\"')) # <<<<<<<<<<<<<< * yield '%d [shape=doublecircle]' % len(self) * yield '}' */ - __pyx_tuple__12 = PyTuple_Pack(2, __pyx_kp_s__10, __pyx_kp_s__11); if (unlikely(!__pyx_tuple__12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__12 = PyTuple_Pack(2, __pyx_kp_s__10, __pyx_kp_s__11); if (unlikely(!__pyx_tuple__12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__12); __Pyx_GIVEREF(__pyx_tuple__12); - /* "cdec/lattice.pxi":63 + /* "lattice.pxi":71 * def todot(self): * """lattice.todot() -> Representation of the lattice in GraphViz dot format.""" * def lines(): # <<<<<<<<<<<<<< * yield 'digraph lattice {' * yield 'rankdir = LR;' */ - __pyx_tuple__14 = PyTuple_Pack(4, __pyx_n_s_i, __pyx_n_s_label, __pyx_n_s_weight, __pyx_n_s_delta); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(4, __pyx_n_s_i, __pyx_n_s_label, __pyx_n_s_weight, __pyx_n_s_delta); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(0, 0, 4, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_waziz_workspace_mtm14_my, __pyx_n_s_lines, 63, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(0, 0, 4, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_pks_src_cdec_dtrain_python, __pyx_n_s_lines, 71, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/lattice.pxi":72 + /* "lattice.pxi":80 * yield '%d [shape=doublecircle]' % len(self) * yield '}' * return '\n'.join(lines()).encode('utf8') # <<<<<<<<<<<<<< * * def as_hypergraph(self): */ - __pyx_tuple__17 = PyTuple_Pack(1, __pyx_n_s_utf8); if (unlikely(!__pyx_tuple__17)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__17 = PyTuple_Pack(1, __pyx_n_s_utf8); if (unlikely(!__pyx_tuple__17)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - /* "cdec/mteval.pxi":53 + /* "mteval.pxi":53 * def __getitem__(self, int index): * if not 0 <= index < len(self): * raise IndexError('sufficient stats vector index out of range') # <<<<<<<<<<<<<< @@ -29642,7 +29740,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - /* "cdec/mteval.pxi":87 + /* "mteval.pxi":87 * def __getitem__(self,int k): * if not 0 <= k < self.cs.size(): * raise IndexError('candidate set index out of range') # <<<<<<<<<<<<<< @@ -29675,7 +29773,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__22); __Pyx_GIVEREF(__pyx_tuple__22); - /* "cdec/grammar.pxi":5 + /* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -29685,9 +29783,9 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__24 = PyTuple_Pack(3, __pyx_n_s_phrase_2, __pyx_n_s_genexpr, __pyx_n_s_genexpr); if (unlikely(!__pyx_tuple__24)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__24); __Pyx_GIVEREF(__pyx_tuple__24); - __pyx_codeobj__25 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__24, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_waziz_workspace_mtm14_my_2, __pyx_n_s_phrase, 5, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__25)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__25 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__24, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_pks_src_cdec_dtrain_python_2, __pyx_n_s_phrase, 5, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__25)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "cdec/mteval.pxi":197 + /* "mteval.pxi":197 * return [] * * BLEU = Scorer('IBM_BLEU') # <<<<<<<<<<<<<< @@ -29698,7 +29796,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__26); __Pyx_GIVEREF(__pyx_tuple__26); - /* "cdec/mteval.pxi":198 + /* "mteval.pxi":198 * * BLEU = Scorer('IBM_BLEU') * QCRI = Scorer('QCRI_BLEU') # <<<<<<<<<<<<<< @@ -29709,7 +29807,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__27); __Pyx_GIVEREF(__pyx_tuple__27); - /* "cdec/mteval.pxi":199 + /* "mteval.pxi":199 * BLEU = Scorer('IBM_BLEU') * QCRI = Scorer('QCRI_BLEU') * TER = Scorer('TER') # <<<<<<<<<<<<<< @@ -29720,7 +29818,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__28); __Pyx_GIVEREF(__pyx_tuple__28); - /* "cdec/mteval.pxi":200 + /* "mteval.pxi":200 * QCRI = Scorer('QCRI_BLEU') * TER = Scorer('TER') * CER = Scorer('CER') # <<<<<<<<<<<<<< @@ -29730,7 +29828,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__29); __Pyx_GIVEREF(__pyx_tuple__29); - /* "cdec/mteval.pxi":201 + /* "mteval.pxi":201 * TER = Scorer('TER') * CER = Scorer('CER') * SSK = Scorer('SSK') # <<<<<<<<<<<<<< @@ -29749,7 +29847,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__31 = PyTuple_Pack(1, __pyx_n_s_yn); if (unlikely(!__pyx_tuple__31)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__31); __Pyx_GIVEREF(__pyx_tuple__31); - __pyx_codeobj__32 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__31, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_waziz_workspace_mtm14_my_3, __pyx_n_s_set_silent, 28, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__32)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__32 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__31, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_pks_src_cdec_dtrain_python_3, __pyx_n_s_set_silent, 28, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__32)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "cdec/_cdec.pyx":32 * SetSilent(yn) @@ -29761,7 +29859,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__33 = PyTuple_Pack(5, __pyx_n_s_config, __pyx_n_s_key, __pyx_n_s_value, __pyx_n_s_name, __pyx_n_s_info); if (unlikely(!__pyx_tuple__33)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__33); __Pyx_GIVEREF(__pyx_tuple__33); - __pyx_codeobj__34 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__33, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_waziz_workspace_mtm14_my_3, __pyx_n_s_make_config, 32, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__34)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__34 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__33, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_pks_src_cdec_dtrain_python_3, __pyx_n_s_make_config, 32, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__34)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -30106,13 +30204,13 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_21___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_type_4cdec_5_cdec___pyx_scope_struct_21___get__.tp_print = 0; __pyx_ptype_4cdec_5_cdec___pyx_scope_struct_21___get__ = &__pyx_type_4cdec_5_cdec___pyx_scope_struct_21___get__; - if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_22___iter__) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_22___iter__) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_type_4cdec_5_cdec___pyx_scope_struct_22___iter__.tp_print = 0; __pyx_ptype_4cdec_5_cdec___pyx_scope_struct_22___iter__ = &__pyx_type_4cdec_5_cdec___pyx_scope_struct_22___iter__; - if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_23_todot) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_23_todot) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_type_4cdec_5_cdec___pyx_scope_struct_23_todot.tp_print = 0; __pyx_ptype_4cdec_5_cdec___pyx_scope_struct_23_todot = &__pyx_type_4cdec_5_cdec___pyx_scope_struct_23_todot; - if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_24_lines) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_24_lines) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_type_4cdec_5_cdec___pyx_scope_struct_24_lines.tp_print = 0; __pyx_ptype_4cdec_5_cdec___pyx_scope_struct_24_lines = &__pyx_type_4cdec_5_cdec___pyx_scope_struct_24_lines; if (PyType_Ready(&__pyx_type_4cdec_5_cdec___pyx_scope_struct_25___iter__) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -30149,7 +30247,7 @@ PyMODINIT_FUNC PyInit__cdec(void) Py_DECREF(__pyx_t_1); __pyx_t_1 = 0; /*--- Execution code ---*/ - /* "cdec/grammar.pxi":3 + /* "grammar.pxi":3 * cimport grammar * cimport cdec.sa._sa as _sa * import cdec.sa._sa as _sa # <<<<<<<<<<<<<< @@ -30167,7 +30265,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_sa, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/grammar.pxi":5 + /* "grammar.pxi":5 * import cdec.sa._sa as _sa * * def _phrase(phrase): # <<<<<<<<<<<<<< @@ -30179,7 +30277,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_phrase, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":197 + /* "mteval.pxi":197 * return [] * * BLEU = Scorer('IBM_BLEU') # <<<<<<<<<<<<<< @@ -30191,7 +30289,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_BLEU, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 197; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":198 + /* "mteval.pxi":198 * * BLEU = Scorer('IBM_BLEU') * QCRI = Scorer('QCRI_BLEU') # <<<<<<<<<<<<<< @@ -30203,7 +30301,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_QCRI, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":199 + /* "mteval.pxi":199 * BLEU = Scorer('IBM_BLEU') * QCRI = Scorer('QCRI_BLEU') * TER = Scorer('TER') # <<<<<<<<<<<<<< @@ -30215,7 +30313,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_TER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":200 + /* "mteval.pxi":200 * QCRI = Scorer('QCRI_BLEU') * TER = Scorer('TER') * CER = Scorer('CER') # <<<<<<<<<<<<<< @@ -30226,7 +30324,7 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_CER, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "cdec/mteval.pxi":201 + /* "mteval.pxi":201 * TER = Scorer('TER') * CER = Scorer('CER') * SSK = Scorer('SSK') # <<<<<<<<<<<<<< @@ -30338,8 +30436,8 @@ PyMODINIT_FUNC PyInit__cdec(void) /* "string.from_py":13 * - * @cname("__pyx_convert_string_from_py_") - * cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<< + * @cname("__pyx_convert_string_from_py_std__in_string") + * cdef string __pyx_convert_string_from_py_std__in_string(object o) except *: # <<<<<<<<<<<<<< * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) */ @@ -30356,7 +30454,6 @@ PyMODINIT_FUNC PyInit__cdec(void) if (__pyx_m) { if (__pyx_d) { __Pyx_AddTraceback("init cdec._cdec", __pyx_clineno, __pyx_lineno, __pyx_filename); - Py_DECREF(__pyx_d); __pyx_d = 0; } Py_DECREF(__pyx_m); __pyx_m = 0; } else if (!PyErr_Occurred()) { @@ -30371,7 +30468,7 @@ PyMODINIT_FUNC PyInit__cdec(void) #endif } -/* Runtime support code */ +/* --- Runtime support code --- */ #if CYTHON_REFNANNY static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { PyObject *m = NULL, *p = NULL; @@ -30635,6 +30732,13 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject } PyErr_SetObject(type, value); if (tb) { +#if CYTHON_COMPILING_IN_PYPY + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(tmp_type, tmp_value, tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#else PyThreadState *tstate = PyThreadState_GET(); PyObject* tmp_tb = tstate->curexc_traceback; if (tb != tmp_tb) { @@ -30642,6 +30746,7 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject tstate->curexc_traceback = tb; Py_XDECREF(tmp_tb); } +#endif } bad: Py_XDECREF(owned_instance); @@ -31564,9 +31669,6 @@ static PyGetSetDef __pyx_CyFunction_getsets[] = { {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, {0, 0, 0, 0, 0} }; -#ifndef PY_WRITE_RESTRICTED -#define PY_WRITE_RESTRICTED WRITE_RESTRICTED -#endif static PyMemberDef __pyx_CyFunction_members[] = { {(char *) "__module__", T_OBJECT, offsetof(__pyx_CyFunctionObject, func.m_module), PY_WRITE_RESTRICTED, 0}, {0, 0, 0, 0, 0} @@ -31685,12 +31787,11 @@ static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObj if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) { if (type == NULL) type = (PyObject *)(Py_TYPE(obj)); - return PyMethod_New(func, - type, (PyObject *)(Py_TYPE(type))); + return __Pyx_PyMethod_New(func, type, (PyObject *)(Py_TYPE(type))); } if (obj == Py_None) obj = NULL; - return PyMethod_New(func, obj, type); + return __Pyx_PyMethod_New(func, obj, type); } static PyObject* __Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) @@ -31722,7 +31823,7 @@ static PyObject * __Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject if (size == 0) return (*meth)(self, NULL); PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%zd given)", + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", f->m_ml->ml_name, size); return NULL; } @@ -31733,7 +31834,7 @@ static PyObject * __Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject if (size == 1) return (*meth)(self, PyTuple_GET_ITEM(arg, 0)); PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%zd given)", + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", f->m_ml->ml_name, size); return NULL; } @@ -32333,32 +32434,6 @@ bad: Py_XDECREF(py_frame); } -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { - const unsigned int neg_one = (unsigned int) -1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(unsigned int) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(unsigned int) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); - } else if (sizeof(unsigned int) <= sizeof(unsigned long long)) { - return PyLong_FromUnsignedLongLong((unsigned long long) value); - } - } else { - if (sizeof(unsigned int) <= sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(unsigned int) <= sizeof(long long)) { - return PyLong_FromLongLong((long long) value); - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; - return _PyLong_FromByteArray(bytes, sizeof(unsigned int), - little, !is_unsigned); - } -} - #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value) \ { \ func_type value = func_value; \ @@ -32380,101 +32455,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) #endif #endif -static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) { - const unsigned int neg_one = (unsigned int) -1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if (sizeof(unsigned int) < sizeof(long)) { - __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (unsigned int) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 - #if CYTHON_USE_PYLONG_INTERNALS - switch (Py_SIZE(x)) { - case 0: return 0; - case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, ((PyLongObject*)x)->ob_digit[0]); - } - #endif -#endif - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } - if (sizeof(unsigned int) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, PyLong_AsUnsignedLong(x)) - } else if (sizeof(unsigned int) <= sizeof(unsigned long long)) { - __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long long, PyLong_AsUnsignedLongLong(x)) - } - } else { -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 - #if CYTHON_USE_PYLONG_INTERNALS - switch (Py_SIZE(x)) { - case 0: return 0; - case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, +(((PyLongObject*)x)->ob_digit[0])); - case -1: __PYX_VERIFY_RETURN_INT(unsigned int, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); - } - #endif -#endif - if (sizeof(unsigned int) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT(unsigned int, long, PyLong_AsLong(x)) - } else if (sizeof(unsigned int) <= sizeof(long long)) { - __PYX_VERIFY_RETURN_INT(unsigned int, long long, PyLong_AsLongLong(x)) - } - } - { -#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) - PyErr_SetString(PyExc_RuntimeError, - "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); -#else - unsigned int val; - PyObject *v = __Pyx_PyNumber_Int(x); - #if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } - #endif - if (likely(v)) { - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - int ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); - Py_DECREF(v); - if (likely(!ret)) - return val; - } -#endif - return (unsigned int) -1; - } - } else { - unsigned int val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (unsigned int) -1; - val = __Pyx_PyInt_As_unsigned_int(tmp); - Py_DECREF(tmp); - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to unsigned int"); - return (unsigned int) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned int"); - return (unsigned int) -1; -} - static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; @@ -32643,6 +32623,127 @@ bad: return module; } +static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) { + const unsigned int neg_one = (unsigned int) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(unsigned int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (unsigned int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, ((PyLongObject*)x)->ob_digit[0]); + } + #endif +#endif + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } + if (sizeof(unsigned int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, PyLong_AsUnsignedLong(x)) + } else if (sizeof(unsigned int) <= sizeof(unsigned long long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long long, PyLong_AsUnsignedLongLong(x)) + } + } else { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned int, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(unsigned int, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); + } + #endif +#endif + if (sizeof(unsigned int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, PyLong_AsLong(x)) + } else if (sizeof(unsigned int) <= sizeof(long long)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long long, PyLong_AsLongLong(x)) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + unsigned int val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (unsigned int) -1; + } + } else { + unsigned int val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (unsigned int) -1; + val = __Pyx_PyInt_As_unsigned_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to unsigned int"); + return (unsigned int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned int"); + return (unsigned int) -1; +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { + const unsigned int neg_one = (unsigned int) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(unsigned int) <= sizeof(unsigned long long)) { + return PyLong_FromUnsignedLongLong((unsigned long long) value); + } + } else { + if (sizeof(unsigned int) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned int) <= sizeof(long long)) { + return PyLong_FromLongLong((long long) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(unsigned int), + little, !is_unsigned); + } +} + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; diff --git a/python/cdec/lattice.pxd b/python/cdec/lattice.pxd index 8ad710e5..2ba9ac31 100644 --- a/python/cdec/lattice.pxd +++ b/python/cdec/lattice.pxd @@ -1,14 +1,14 @@ from libcpp.vector cimport vector from libcpp.string cimport string -from utils cimport WordID +from utils cimport * cdef extern from "decoder/lattice.h": cdef cppclass LatticeArc: WordID label - double cost + FastSparseVector[double] features int dist2next LatticeArc() - LatticeArc(WordID w, double c, int i) + LatticeArc(WordID w, FastSparseVector[double] c, int i) cdef cppclass Lattice(vector): # (vector[vector[LatticeArc]]) Lattice() diff --git a/python/cdec/lattice.pxi b/python/cdec/lattice.pxi index 8000b61e..077bfe69 100644 --- a/python/cdec/lattice.pxi +++ b/python/cdec/lattice.pxi @@ -27,20 +27,28 @@ cdef class Lattice: arcs = [] cdef vector[lattice.LatticeArc] arc_vector = self.lattice[0][index] cdef lattice.LatticeArc* arc + cdef FastSparseVector[double]* vp + cdef SparseVector v = SparseVector.__new__(SparseVector) cdef unsigned i for i in range(arc_vector.size()): arc = &arc_vector[i] + vp = new FastSparseVector[double](arc.features) + v.vector = vp label = unicode(TDConvert(arc.label).c_str(), 'utf8') - arcs.append((label, arc.cost, arc.dist2next)) + arcs.append((label, v, arc.dist2next)) return tuple(arcs) def __setitem__(self, int index, tuple arcs): if not 0 <= index < len(self): raise IndexError('lattice index out of range') cdef lattice.LatticeArc* arc - for (label, cost, dist2next) in arcs: + cdef FastSparseVector[double]* vp + cdef SparseVector v = SparseVector.__new__(SparseVector) + for (label, features, dist2next) in arcs: label_str = as_str(label) - arc = new lattice.LatticeArc(TDConvert(label_str), cost, dist2next) + v = features + vp = v.vector + arc = new lattice.LatticeArc(TDConvert(label_str), vp[0], dist2next) self.lattice[0][index].push_back(arc[0]) del arc diff --git a/training/dtrain/CMakeLists.txt b/training/dtrain/CMakeLists.txt index 027c80e4..eac7fc72 100644 --- a/training/dtrain/CMakeLists.txt +++ b/training/dtrain/CMakeLists.txt @@ -5,12 +5,10 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../decoder) set(dtrain_SRCS dtrain.cc - score.cc dtrain.h - kbestget.h - ksampler.h - pairsampling.h - score.h) + sample.h + score.h + update.h) add_executable(dtrain ${dtrain_SRCS}) target_link_libraries(dtrain libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} ${LIBLZMA_LIBRARIES} ${LIBDL_LIBRARIES}) diff --git a/training/dtrain/README.md b/training/dtrain/README.md index aa1ab3e7..dc473568 100644 --- a/training/dtrain/README.md +++ b/training/dtrain/README.md @@ -1,35 +1,46 @@ This is a simple (and parallelizable) tuning method for cdec -which is able to train the weights of very many (sparse) features -on the training set. +which enables training weights of very many (sparse) features +on the full training set. -It was used in these papers: +Please cite as: > "Joint Feature Selection in Distributed Stochastic > Learning for Large-Scale Discriminative Training in > SMT" (Simianer, Riezler, Dyer; ACL 2012) > -> "Multi-Task Learning for Improved Discriminative -> Training in SMT" (Simianer, Riezler; WMT 2013) -> - Building -------- -Builds when building cdec, see ../BUILDING . -To build only parts needed for dtrain do -``` - autoreconf -ifv - ./configure - cd training/dtrain/; make -``` +Builds when building cdec, see ../../BUILDING . Running ------- -See directories under examples/ . +Download runnable examples for all use cases from [1] and extract here. + +TODO +---- + * "stop_after" stop after X inputs + * "select_weights" average, best, last + * "rescale" rescale weight vector + * implement SVM objective? + * other variants of l1 regularization? + * l2 regularization? + * l1/l2 regularization? + * scale updates by bleu difference + * AdaGrad, per-coordinate learning rates + * batch update + * "repeat" iterate over k-best lists + * show k-best loss improvement + * "quiet" + * "verbose" + * fix output Legal ----- -Copyright (c) 2012-2013 by Patrick Simianer <p@simianer.de> +Copyright (c) 2012-2015 by Patrick Simianer <p@simianer.de> See the file LICENSE.txt in the root folder for the licensing terms that this software is released under. + +[1] http://simianer.de/dtrain-example.tar.gz + diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index ccb50af2..b488e661 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -1,698 +1,434 @@ #include "dtrain.h" +#include "sample.h" #include "score.h" -#include "kbestget.h" -#include "ksampler.h" -#include "pairsampling.h" +#include "update.h" using namespace dtrain; - -bool -dtrain_init(int argc, char** argv, po::variables_map* cfg) -{ - po::options_description ini("Configuration File Options"); - ini.add_options() - ("input", po::value<string>(), "input file (src)") - ("refs,r", po::value<string>(), "references") - ("bitext,b", po::value<string>(), "bitext: 'src ||| tgt'") - ("output", po::value<string>()->default_value("-"), "output weights file, '-' for STDOUT") - ("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)") - ("decoder_config", po::value<string>(), "configuration file for cdec") - ("print_weights", po::value<string>(), "weights to print on each iteration") - ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") - ("keep", po::value<bool>()->zero_tokens(), "keep weights files for each iteration") - ("epochs", po::value<unsigned>()->default_value(10), "# of iterations T (per shard)") - ("k", po::value<unsigned>()->default_value(100), "how many translations to sample") - ("sample_from", po::value<string>()->default_value("kbest"), "where to sample translations from: 'kbest', 'forest'") - ("filter", po::value<string>()->default_value("uniq"), "filter kbest list: 'not', 'uniq'") - ("pair_sampling", po::value<string>()->default_value("XYX"), "how to sample pairs: 'all', 'XYX' or 'PRO'") - ("hi_lo", po::value<float>()->default_value(0.1), "hi and lo (X) for XYX (default 0.1), <= 0.5") - ("pair_threshold", po::value<score_t>()->default_value(0.), "bleu [0,1] threshold to filter pairs") - ("N", po::value<unsigned>()->default_value(4), "N for Ngrams (BLEU)") - ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_, lc_") - ("learning_rate", po::value<weight_t>()->default_value(1.0), "learning rate") - ("gamma", po::value<weight_t>()->default_value(0.), "gamma for SVM (0 for perceptron)") - ("select_weights", po::value<string>()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") - ("rescale", po::value<bool>()->zero_tokens(), "rescale weight vector after each input") - ("l1_reg", po::value<string>()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010) UNTESTED") - ("l1_reg_strength", po::value<weight_t>(), "l1 regularization strength") - ("fselect", po::value<weight_t>()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO - ("approx_bleu_d", po::value<score_t>()->default_value(0.9), "discount for approx. BLEU") - ("scale_bleu_diff", po::value<bool>()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") - ("loss_margin", po::value<weight_t>()->default_value(0.), "update if no error in pref pair but model scores this near") - ("max_pairs", po::value<unsigned>()->default_value(std::numeric_limits<unsigned>::max()), "max. # of pairs per Sent.") - ("pclr", po::value<string>()->default_value("no"), "use a (simple|adagrad) per-coordinate learning rate") - ("batch", po::value<bool>()->zero_tokens(), "do batch optimization") - ("repeat", po::value<unsigned>()->default_value(1), "repeat optimization over kbest list this number of times") - ("check", po::value<bool>()->zero_tokens(), "produce list of loss differentials") - ("noup", po::value<bool>()->zero_tokens(), "do not update weights"); - po::options_description cl("Command Line Options"); - cl.add_options() - ("config,c", po::value<string>(), "dtrain config file") - ("quiet,q", po::value<bool>()->zero_tokens(), "be quiet") - ("verbose,v", po::value<bool>()->zero_tokens(), "be verbose"); - cl.add(ini); - po::store(parse_command_line(argc, argv, cl), *cfg); - if (cfg->count("config")) { - ifstream ini_f((*cfg)["config"].as<string>().c_str()); - po::store(po::parse_config_file(ini_f, ini), *cfg); - } - po::notify(*cfg); - if (!cfg->count("decoder_config")) { - cerr << cl << endl; - return false; - } - if ((*cfg)["sample_from"].as<string>() != "kbest" - && (*cfg)["sample_from"].as<string>() != "forest") { - cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as<string>() << "', use 'kbest' or 'forest'." << endl; - return false; - } - if ((*cfg)["sample_from"].as<string>() == "kbest" && (*cfg)["filter"].as<string>() != "uniq" && - (*cfg)["filter"].as<string>() != "not") { - cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as<string>() << "', use 'uniq' or 'not'." << endl; - return false; - } - if ((*cfg)["pair_sampling"].as<string>() != "all" && (*cfg)["pair_sampling"].as<string>() != "XYX" && - (*cfg)["pair_sampling"].as<string>() != "PRO") { - cerr << "Wrong 'pair_sampling' param: '" << (*cfg)["pair_sampling"].as<string>() << "'." << endl; - return false; - } - if (cfg->count("hi_lo") && (*cfg)["pair_sampling"].as<string>() != "XYX") { - cerr << "Warning: hi_lo only works with pair_sampling XYX." << endl; - } - if ((*cfg)["hi_lo"].as<float>() > 0.5 || (*cfg)["hi_lo"].as<float>() < 0.01) { - cerr << "hi_lo must lie in [0.01, 0.5]" << endl; - return false; - } - if ((cfg->count("input")>0 || cfg->count("refs")>0) && cfg->count("bitext")>0) { - cerr << "Provide 'input' and 'refs' or 'bitext', not both." << endl; - return false; - } - if ((*cfg)["pair_threshold"].as<score_t>() < 0) { - cerr << "The threshold must be >= 0!" << endl; - return false; - } - if ((*cfg)["select_weights"].as<string>() != "last" && (*cfg)["select_weights"].as<string>() != "best" && - (*cfg)["select_weights"].as<string>() != "avg" && (*cfg)["select_weights"].as<string>() != "VOID") { - cerr << "Wrong 'select_weights' param: '" << (*cfg)["select_weights"].as<string>() << "', use 'last' or 'best'." << endl; - return false; - } - return true; -} - int main(int argc, char** argv) { - // handle most parameters - po::variables_map cfg; - if (!dtrain_init(argc, argv, &cfg)) exit(1); // something is wrong - bool quiet = false; - if (cfg.count("quiet")) quiet = true; - bool verbose = false; - if (cfg.count("verbose")) verbose = true; - bool noup = false; - if (cfg.count("noup")) noup = true; - bool rescale = false; - if (cfg.count("rescale")) rescale = true; - bool keep = false; - if (cfg.count("keep")) keep = true; - - const unsigned k = cfg["k"].as<unsigned>(); - const unsigned N = cfg["N"].as<unsigned>(); - const unsigned T = cfg["epochs"].as<unsigned>(); - const unsigned stop_after = cfg["stop_after"].as<unsigned>(); - const string filter_type = cfg["filter"].as<string>(); - const string sample_from = cfg["sample_from"].as<string>(); - const string pair_sampling = cfg["pair_sampling"].as<string>(); - const score_t pair_threshold = cfg["pair_threshold"].as<score_t>(); - const string select_weights = cfg["select_weights"].as<string>(); - const float hi_lo = cfg["hi_lo"].as<float>(); - const score_t approx_bleu_d = cfg["approx_bleu_d"].as<score_t>(); - const unsigned max_pairs = cfg["max_pairs"].as<unsigned>(); - int repeat = cfg["repeat"].as<unsigned>(); - bool check = false; - if (cfg.count("check")) check = true; - weight_t loss_margin = cfg["loss_margin"].as<weight_t>(); - bool batch = false; - if (cfg.count("batch")) batch = true; - if (loss_margin > 9998.) loss_margin = std::numeric_limits<float>::max(); - bool scale_bleu_diff = false; - if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true; - const string pclr = cfg["pclr"].as<string>(); - bool average = false; - if (select_weights == "avg") - average = true; + // get configuration + po::variables_map conf; + if (!dtrain_init(argc, argv, &conf)) + return 1; + const size_t k = conf["k"].as<size_t>(); + const bool unique_kbest = conf["unique_kbest"].as<bool>(); + const bool forest_sample = conf["forest_sample"].as<bool>(); + const string score_name = conf["score"].as<string>(); + const weight_t nakov_fix = conf["nakov_fix"].as<weight_t>(); + const weight_t chiang_decay = conf["chiang_decay"].as<weight_t>(); + const size_t N = conf["N"].as<size_t>(); + const size_t T = conf["iterations"].as<size_t>(); + const weight_t eta = conf["learning_rate"].as<weight_t>(); + const weight_t margin = conf["margin"].as<weight_t>(); + const weight_t cut = conf["cut"].as<weight_t>(); + const bool adjust_cut = conf["adjust"].as<bool>(); + const bool all_pairs = cut==0; + const bool average = conf["average"].as<bool>(); + const bool pro = conf["pro_sampling"].as<bool>(); + const bool structured = conf["structured"].as<bool>(); + const weight_t threshold = conf["threshold"].as<weight_t>(); + const size_t max_up = conf["max_pairs"].as<size_t>(); + const weight_t l1_reg = conf["l1_reg"].as<weight_t>(); + const bool keep = conf["keep"].as<bool>(); + const bool noup = conf["disable_learning"].as<bool>(); + const string output_fn = conf["output"].as<string>(); vector<string> print_weights; - if (cfg.count("print_weights")) - boost::split(print_weights, cfg["print_weights"].as<string>(), boost::is_any_of(" ")); + boost::split(print_weights, conf["print_weights"].as<string>(), + boost::is_any_of(" ")); + const string output_updates_fn = conf["output_updates"].as<string>(); + const bool output_updates = output_updates_fn!=""; + const string output_raw_fn = conf["output_raw"].as<string>(); + const bool output_raw = output_raw_fn!=""; + const bool use_adadelta = conf["adadelta"].as<bool>(); + const weight_t adadelta_decay = conf["adadelta_decay"].as<weight_t>(); + const weight_t adadelta_eta = 0.000001; + const string adadelta_input = conf["adadelta_input"].as<string>(); + const string adadelta_output = conf["adadelta_output"].as<string>(); + const size_t max_input = conf["stop_after"].as<size_t>(); + const bool batch = conf["batch"].as<bool>(); // setup decoder register_feature_functions(); SetSilent(true); - ReadFile ini_rf(cfg["decoder_config"].as<string>()); - if (!quiet) - cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl; - Decoder decoder(ini_rf.stream()); - - // scoring metric/scorer - string scorer_str = cfg["scorer"].as<string>(); - LocalScorer* scorer; - if (scorer_str == "bleu") { - scorer = static_cast<BleuScorer*>(new BleuScorer); - } else if (scorer_str == "stupid_bleu") { - scorer = static_cast<StupidBleuScorer*>(new StupidBleuScorer); - } else if (scorer_str == "fixed_stupid_bleu") { - scorer = static_cast<FixedStupidBleuScorer*>(new FixedStupidBleuScorer); - } else if (scorer_str == "smooth_bleu") { - scorer = static_cast<SmoothBleuScorer*>(new SmoothBleuScorer); - } else if (scorer_str == "sum_bleu") { - scorer = static_cast<SumBleuScorer*>(new SumBleuScorer); - } else if (scorer_str == "sumexp_bleu") { - scorer = static_cast<SumExpBleuScorer*>(new SumExpBleuScorer); - } else if (scorer_str == "sumwhatever_bleu") { - scorer = static_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer); - } else if (scorer_str == "approx_bleu") { - scorer = static_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d)); - } else if (scorer_str == "lc_bleu") { - scorer = static_cast<LinearBleuScorer*>(new LinearBleuScorer(N)); + ReadFile f(conf["decoder_conf"].as<string>()); + Decoder decoder(f.stream()); + + // setup scorer & observer + Scorer* scorer; + if (score_name == "nakov") { + scorer = static_cast<NakovBleuScorer*>(new NakovBleuScorer(N, nakov_fix)); + } else if (score_name == "papineni") { + scorer = static_cast<PapineniBleuScorer*>(new PapineniBleuScorer(N)); + } else if (score_name == "lin") { + scorer = static_cast<LinBleuScorer*>(new LinBleuScorer(N)); + } else if (score_name == "liang") { + scorer = static_cast<LiangBleuScorer*>(new LiangBleuScorer(N)); + } else if (score_name == "chiang") { + scorer = static_cast<ChiangBleuScorer*>(new ChiangBleuScorer(N)); + } else if (score_name == "sum") { + scorer = static_cast<SumBleuScorer*>(new SumBleuScorer(N)); } else { - cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; - exit(1); + assert(false); } - vector<score_t> bleu_weights; - scorer->Init(N, bleu_weights); - - // setup decoder observer - MT19937 rng; // random number generator, only for forest sampling HypSampler* observer; - if (sample_from == "kbest") - observer = static_cast<KBestGetter*>(new KBestGetter(k, filter_type)); + if (forest_sample) + observer = new KSampler(k, scorer); + else if (unique_kbest) + observer = new KBestSampler(k, scorer); else - observer = static_cast<KSampler*>(new KSampler(k, &rng)); - observer->SetScorer(scorer); + observer = new KBestNoFilterSampler(k, scorer); - // init weights + // weights vector<weight_t>& decoder_weights = decoder.CurrentWeightVector(); - SparseVector<weight_t> lambdas, cumulative_penalties, w_average; - if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as<string>(), &decoder_weights); - Weights::InitSparseVector(decoder_weights, &lambdas); - - // meta params for perceptron, SVM - weight_t eta = cfg["learning_rate"].as<weight_t>(); - weight_t gamma = cfg["gamma"].as<weight_t>(); - - // faster perceptron: consider only misranked pairs, see - bool faster_perceptron = false; - if (gamma==0 && loss_margin==0) faster_perceptron = true; - - // l1 regularization - bool l1naive = false; - bool l1clip = false; - bool l1cumul = false; - weight_t l1_reg = 0; - if (cfg["l1_reg"].as<string>() != "none") { - string s = cfg["l1_reg"].as<string>(); - if (s == "naive") l1naive = true; - else if (s == "clip") l1clip = true; - else if (s == "cumul") l1cumul = true; - l1_reg = cfg["l1_reg_strength"].as<weight_t>(); + SparseVector<weight_t> lambdas, w_average; + if (conf.count("input_weights")) { + Weights::InitFromFile(conf["input_weights"].as<string>(), &decoder_weights); + Weights::InitSparseVector(decoder_weights, &lambdas); } - // output - string output_fn = cfg["output"].as<string>(); // input - bool read_bitext = false; - string input_fn; - if (cfg.count("bitext")) { - read_bitext = true; - input_fn = cfg["bitext"].as<string>(); + string input_fn = conf["bitext"].as<string>(); + ReadFile input(input_fn); + vector<string> buf; // decoder only accepts strings as input + vector<vector<Ngrams> > buffered_ngrams; // compute ngrams and lengths of references + vector<vector<size_t> > buffered_lengths; // (just once) + size_t input_sz = 0; + + // output configuration + cerr << fixed << setprecision(4); + cerr << "Parameters:" << endl; + cerr << setw(25) << "bitext " << "'" << input_fn << "'" << endl; + cerr << setw(25) << "k " << k << endl; + if (unique_kbest && !forest_sample) + cerr << setw(25) << "unique k-best " << unique_kbest << endl; + if (forest_sample) + cerr << setw(25) << "forest " << forest_sample << endl; + if (all_pairs) + cerr << setw(25) << "all pairs " << all_pairs << endl; + else if (pro) + cerr << setw(25) << "PRO " << pro << endl; + cerr << setw(25) << "score " << "'" << score_name << "'" << endl; + if (score_name == "nakov") + cerr << setw(25) << "nakov fix " << nakov_fix << endl; + if (score_name == "chiang") + cerr << setw(25) << "chiang decay " << chiang_decay << endl; + cerr << setw(25) << "N " << N << endl; + cerr << setw(25) << "T " << T << endl; + cerr << scientific << setw(25) << "learning rate " << eta << endl; + cerr << setw(25) << "margin " << margin << endl; + if (!structured) { + cerr << fixed << setw(25) << "cut " << round(cut*100) << "%" << endl; + cerr << setw(25) << "adjust " << adjust_cut << endl; } else { - input_fn = cfg["input"].as<string>(); + cerr << setw(25) << "struct. obj " << structured << endl; } - ReadFile input(input_fn); - // buffer input for t > 0 - vector<string> src_str_buf; // source strings (decoder takes only strings) - vector<vector<WordID> > ref_ids_buf; // references as WordID vecs - ReadFile refs; - string refs_fn; - if (!read_bitext) { - refs_fn = cfg["refs"].as<string>(); - refs.Init(refs_fn); + if (threshold > 0) + cerr << setw(25) << "threshold " << threshold << endl; + if (max_up != numeric_limits<size_t>::max()) + cerr << setw(25) << "max up. " << max_up << endl; + if (noup) + cerr << setw(25) << "no up. " << noup << endl; + cerr << setw(25) << "average " << average << endl; + cerr << scientific << setw(25) << "l1 reg. " << l1_reg << endl; + cerr << setw(25) << "decoder conf " << "'" + << conf["decoder_conf"].as<string>() << "'" << endl; + cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; + cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; + if (conf.count("input_weights")) { + cerr << setw(25) << "weights in " << "'" + << conf["input_weights"].as<string>() << "'" << endl; } + cerr << setw(25) << "batch " << batch << endl; + if (noup) + cerr << setw(25) << "no updates!" << endl; + if (use_adadelta) { + cerr << setw(25) << "adadelta " << use_adadelta << endl; + cerr << setw(25) << " decay " << adadelta_decay << endl; + if (adadelta_input != "") + cerr << setw(25) << "-input " << adadelta_input << endl; + if (adadelta_output != "") + cerr << setw(25) << "-output " << adadelta_output << endl; + } + cerr << "(1 dot per processed input)" << endl; + + // meta + weight_t best=0., gold_prev=0.; + size_t best_iteration = 0; + time_t total_time = 0.; - unsigned in_sz = std::numeric_limits<unsigned>::max(); // input index, input size - vector<pair<score_t, score_t> > all_scores; - score_t max_score = 0.; - unsigned best_it = 0; - float overall_time = 0.; - - // output cfg - if (!quiet) { - cerr << _p5; - cerr << endl << "dtrain" << endl << "Parameters:" << endl; - cerr << setw(25) << "k " << k << endl; - cerr << setw(25) << "N " << N << endl; - cerr << setw(25) << "T " << T << endl; - cerr << setw(25) << "batch " << batch << endl; - cerr << setw(26) << "scorer '" << scorer_str << "'" << endl; - if (scorer_str == "approx_bleu") - cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; - cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; - if (sample_from == "kbest") - cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; - if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl; - else cerr << setw(25) << "learning rate " << "bleu diff" << endl; - cerr << setw(25) << "gamma " << gamma << endl; - cerr << setw(25) << "loss margin " << loss_margin << endl; - cerr << setw(25) << "faster perceptron " << faster_perceptron << endl; - cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; - if (pair_sampling == "XYX") - cerr << setw(25) << "hi lo " << hi_lo << endl; - cerr << setw(25) << "pair threshold " << pair_threshold << endl; - cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; - if (cfg.count("l1_reg")) - cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as<string>() << "'" << endl; - if (rescale) - cerr << setw(25) << "rescale " << rescale << endl; - cerr << setw(25) << "pclr " << pclr << endl; - cerr << setw(25) << "max pairs " << max_pairs << endl; - cerr << setw(25) << "repeat " << repeat << endl; - //cerr << setw(25) << "test k-best " << test_k_best << endl; - cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as<string>() << "'" << endl; - cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; - if (!read_bitext) - cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; - cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; - if (cfg.count("input_weights")) - cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as<string>() << "'" << endl; - if (stop_after > 0) - cerr << setw(25) << "stop_after " << stop_after << endl; - if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " inputs)" << endl; + // output + WriteFile out_up, out_raw; + if (output_raw) { + out_raw.Init(output_raw_fn); + *out_raw << setprecision(numeric_limits<double>::digits10+1); + } + if (output_updates) { + out_up.Init(output_updates_fn); + *out_up << setprecision(numeric_limits<double>::digits10+1); } - // pclr - SparseVector<weight_t> learning_rates; - // batch - SparseVector<weight_t> batch_updates; - score_t batch_loss; + // adadelta + SparseVector<weight_t> gradient_accum, update_accum; + if (use_adadelta && adadelta_input!="") { + vector<weight_t> grads_tmp; + Weights::InitFromFile(adadelta_input+".gradient.gz", &grads_tmp); + Weights::InitSparseVector(grads_tmp, &gradient_accum); + vector<weight_t> update_tmp; + Weights::InitFromFile(adadelta_input+".update.gz", &update_tmp); + Weights::InitSparseVector(update_tmp, &update_accum); + } - for (unsigned t = 0; t < T; t++) // T epochs + for (size_t t = 0; t < T; t++) // T iterations { + // batch update + SparseVector<weight_t> batch_update; + time_t start, end; time(&start); - score_t score_sum = 0.; - score_t model_sum(0); - unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0, kbest_loss_improve = 0; - batch_loss = 0.; - if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; + weight_t gold_sum=0., model_sum=0.; + size_t i=0, num_up=0, feature_count=0, list_sz=0; + + cerr << "Iteration #" << t+1 << " of " << T << "." << endl; while(true) { + bool next = true; - string in; - string ref; - bool next = false, stop = false; // next iteration or premature stop + // getting input if (t == 0) { - if(!getline(*input, in)) next = true; - if(read_bitext) { - vector<string> strs; - boost::algorithm::split_regex(strs, in, boost::regex(" \\|\\|\\| ")); - in = strs[0]; - ref = strs[1]; - } - } else { - if (ii == in_sz) next = true; // stop if we reach the end of our input - } - // stop after X sentences (but still go on for those) - if (stop_after > 0 && stop_after == ii && !next) stop = true; - - // produce some pretty output - if (!quiet && !verbose) { - if (ii == 0) cerr << " "; - if ((ii+1) % (DTRAIN_DOTS) == 0) { - cerr << "."; - cerr.flush(); - } - if ((ii+1) % (20*DTRAIN_DOTS) == 0) { - cerr << " " << ii+1 << endl; - if (!next && !stop) cerr << " "; - } - if (stop) { - if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; - cerr << "Stopping after " << stop_after << " input sentences." << endl; + string in; + if(!getline(*input, in)) { + next = false; } else { - if (next) { - if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; + vector<string> parts; + boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| ")); + buf.push_back(parts[0]); + parts.erase(parts.begin()); + buffered_ngrams.push_back({}); + buffered_lengths.push_back({}); + for (auto s: parts) { + vector<WordID> r; + vector<string> toks; + boost::split(toks, s, boost::is_any_of(" ")); + for (auto tok: toks) + r.push_back(TD::Convert(tok)); + buffered_ngrams.back().emplace_back(ngrams(r, N)); + buffered_lengths.back().push_back(r.size()); } } + } else { + next = i<input_sz; } - // next iteration - if (next || stop) break; - - // weights - lambdas.init_vector(&decoder_weights); + if (max_input == i) + next = false; - // getting input - vector<WordID> ref_ids; // reference as vector<WordID> - if (t == 0) { - if (!read_bitext) { - getline(*refs, ref); - } - vector<string> ref_tok; - boost::split(ref_tok, ref, boost::is_any_of(" ")); - register_and_convert(ref_tok, ref_ids); - ref_ids_buf.push_back(ref_ids); - src_str_buf.push_back(in); + // produce some pretty output + if (next) { + if (i%20 == 0) + cerr << " "; + cerr << "."; + if ((i+1)%20==0) + cerr << " " << i+1 << endl; } else { - ref_ids = ref_ids_buf[ii]; - } - observer->SetRef(ref_ids); - if (t == 0) - decoder.Decode(in, observer); - else - decoder.Decode(src_str_buf[ii], observer); - - // get (scored) samples - vector<ScoredHyp>* samples = observer->GetSamples(); - - if (verbose) { - cerr << "--- ref for " << ii << ": "; - if (t > 0) printWordIDVec(ref_ids_buf[ii]); - else printWordIDVec(ref_ids); - cerr << endl; - for (unsigned u = 0; u < samples->size(); u++) { - cerr << _p2 << _np << "[" << u << ". '"; - printWordIDVec((*samples)[u].w); - cerr << "'" << endl; - cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl; - cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl; - } - } - - if (repeat == 1) { - score_sum += (*samples)[0].score; // stats for 1best - model_sum += (*samples)[0].model; + if (i%20 != 0) + cerr << " " << i << endl; } + cerr.flush(); - f_count += observer->get_f_count(); - list_sz += observer->get_sz(); - - // weight updates - if (!noup) { - // get pairs - vector<pair<ScoredHyp,ScoredHyp> > pairs; - if (pair_sampling == "all") - all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron); - if (pair_sampling == "XYX") - partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo); - if (pair_sampling == "PRO") - PROsampling(samples, pairs, pair_threshold, max_pairs); - int cur_npairs = pairs.size(); - npairs += cur_npairs; - - score_t kbest_loss_first = 0.0, kbest_loss_last = 0.0; - - if (check) repeat = 2; - vector<float> losses; // for check - - for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); - it != pairs.end(); it++) { - score_t model_diff = it->first.model - it->second.model; - score_t loss = max(0.0, -1.0 * model_diff); - losses.push_back(loss); - kbest_loss_first += loss; - } - - score_t kbest_loss = 0.0; - for (int ki=0; ki < repeat; ki++) { + // stop iterating + if (!next) break; - SparseVector<weight_t> lambdas_copy; // for l1 regularization - SparseVector<weight_t> sum_up; // for pclr - if (l1naive||l1clip||l1cumul) lambdas_copy = lambdas; + // decode + if (t > 0 || i > 0) + lambdas.init_vector(&decoder_weights); + observer->reference_ngrams = &buffered_ngrams[i]; + observer->reference_lengths = &buffered_lengths[i]; + decoder.Decode(buf[i], observer); + vector<Hyp>* sample = &(observer->sample); - unsigned pair_idx = 0; // for check - for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); - it != pairs.end(); it++) { - score_t model_diff = it->first.model - it->second.model; - score_t loss = max(0.0, -1.0 * model_diff); + // stats for 1-best + gold_sum += sample->front().gold; + model_sum += sample->front().model; + feature_count += observer->feature_count; + list_sz += observer->effective_size; - if (check && ki==repeat-1) cout << losses[pair_idx] - loss << endl; - pair_idx++; + if (output_raw) + output_sample(sample, out_raw, i); - if (repeat > 1) { - model_diff = lambdas.dot(it->first.f) - lambdas.dot(it->second.f); - kbest_loss += loss; - } - bool rank_error = false; - score_t margin; - if (faster_perceptron) { // we only have considering misranked pairs - rank_error = true; // pair sampling already did this for us - margin = std::numeric_limits<float>::max(); - } else { - rank_error = model_diff<=0.0; - margin = fabs(model_diff); - if (!rank_error && margin < loss_margin) margin_violations++; - } - if (rank_error && ki==0) rank_errors++; - if (scale_bleu_diff) eta = it->first.score - it->second.score; - if (rank_error || margin < loss_margin) { - SparseVector<weight_t> diff_vec = it->first.f - it->second.f; - if (batch) { - batch_loss += max(0., -1.0 * model_diff); - batch_updates += diff_vec; - continue; - } - if (pclr != "no") { - sum_up += diff_vec; - } else { - lambdas.plus_eq_v_times_s(diff_vec, eta); - if (gamma) lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./cur_npairs)); - } - } - } + // update model + if (!noup) { - // per-coordinate learning rate - if (pclr != "no") { - SparseVector<weight_t>::iterator it = sum_up.begin(); - for (; it != sum_up.end(); ++it) { - if (pclr == "simple") { - lambdas[it->first] += it->second / max(1.0, learning_rates[it->first]); - learning_rates[it->first]++; - } else if (pclr == "adagrad") { - if (learning_rates[it->first] == 0) { - lambdas[it->first] += it->second * eta; - } else { - lambdas[it->first] += it->second * eta * learning_rates[it->first]; - } - learning_rates[it->first] += pow(it->second, 2.0); - } - } - } + SparseVector<weight_t> updates; + if (structured) + num_up += update_structured(sample, updates, margin, + out_up, i); + else if (all_pairs) + num_up += updates_all(sample, updates, max_up, threshold, + out_up, i); + else if (pro) + num_up += updates_pro(sample, updates, cut, max_up, threshold, + out_up, i); + else + num_up += updates_multipartite(sample, updates, cut, margin, + max_up, threshold, adjust_cut, + out_up, i); + + SparseVector<weight_t> lambdas_copy; + if (l1_reg) + lambdas_copy = lambdas; + + if (use_adadelta) { // adadelta update + SparseVector<weight_t> squared; + for (auto it: updates) + squared[it.first] = pow(it.second, 2.0); + gradient_accum *= adadelta_decay; + squared *= 1.0-adadelta_decay; + gradient_accum += squared; + SparseVector<weight_t> u = gradient_accum + update_accum; + for (auto it: u) + u[it.first] = -1.0*( + sqrt(update_accum[it.first]+adadelta_eta) + / + sqrt(gradient_accum[it.first]+adadelta_eta) + ) * updates[it.first]; + lambdas += u; + update_accum *= adadelta_decay; + for (auto it: u) + u[it.first] = pow(it.second, 2.0); + update_accum = update_accum + (u*(1.0-adadelta_decay)); + } else if (batch) { + batch_update += updates; + } else { // regular update + lambdas.plus_eq_v_times_s(updates, eta); + } - // l1 regularization - // please note that this regularizations happen - // after a _sentence_ -- not after each example/pair! - if (l1naive) { - SparseVector<weight_t>::iterator it = lambdas.begin(); - for (; it != lambdas.end(); ++it) { - if (!lambdas_copy.get(it->first) || lambdas_copy.get(it->first)!=it->second) { - it->second *= max(0.0000001, eta/(eta+learning_rates[it->first])); // FIXME - learning_rates[it->first]++; - it->second -= sign(it->second) * l1_reg; - } - } - } else if (l1clip) { - SparseVector<weight_t>::iterator it = lambdas.begin(); - for (; it != lambdas.end(); ++it) { - if (!lambdas_copy.get(it->first) || lambdas_copy.get(it->first)!=it->second) { - if (it->second != 0) { - weight_t v = it->second; - if (v > 0) { - it->second = max(0., v - l1_reg); - } else { - it->second = min(0., v + l1_reg); - } - } - } - } - } else if (l1cumul) { - weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input - SparseVector<weight_t>::iterator it = lambdas.begin(); - for (; it != lambdas.end(); ++it) { - if (!lambdas_copy.get(it->first) || lambdas_copy.get(it->first)!=it->second) { - if (it->second != 0) { - weight_t v = it->second; - weight_t penalized = 0.; - if (v > 0) { - penalized = max(0., v-(acc_penalty + cumulative_penalties.get(it->first))); - } else { - penalized = min(0., v+(acc_penalty - cumulative_penalties.get(it->first))); - } - it->second = penalized; - cumulative_penalties.set_value(it->first, cumulative_penalties.get(it->first)+penalized); - } - } + // update context for Chiang's approx. BLEU + if (score_name == "chiang") { + for (auto it: *sample) { + if (it.rank == 0) { + scorer->update_context(it.w, buffered_ngrams[i], + buffered_lengths[i], chiang_decay); + break; } } + } - if (ki==repeat-1) { // done - kbest_loss_last = kbest_loss; - if (repeat > 1) { - score_t best_model = -std::numeric_limits<score_t>::max(); - unsigned best_idx = 0; - for (unsigned i=0; i < samples->size(); i++) { - score_t s = lambdas.dot((*samples)[i].f); - if (s > best_model) { - best_idx = i; - best_model = s; - } + // \ell_1 regularization + // NB: regularization is done after each sentence, + // not after every single pair! + if (l1_reg) { + SparseVector<weight_t>::iterator it = lambdas.begin(); + for (; it != lambdas.end(); ++it) { + weight_t v = it->second; + if (!v) + continue; + if (!lambdas_copy.get(it->first) // new or.. + || lambdas_copy.get(it->first)!=v) // updated feature + { + if (v > 0) { + it->second = max(0., v - l1_reg); + } else { + it->second = min(0., v + l1_reg); } - score_sum += (*samples)[best_idx].score; - model_sum += best_model; } } - } // repeat - - if ((kbest_loss_first - kbest_loss_last) >= 0) kbest_loss_improve++; + } } // noup - if (rescale) lambdas /= lambdas.l2norm(); - - ++ii; + i++; } // input loop - if (t == 0) in_sz = ii; // remember size of input (# lines) - + if (t == 0) + input_sz = i; // remember size of input (# lines) + // batch if (batch) { - lambdas.plus_eq_v_times_s(batch_updates, eta); - if (gamma) lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); - batch_updates.clear(); + batch_update /= (weight_t)num_up; + lambdas.plus_eq_v_times_s(batch_update, eta); + lambdas.init_vector(&decoder_weights); } - if (average) w_average += lambdas; - - if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset(); - - // print some stats - score_t score_avg = score_sum/(score_t)in_sz; - score_t model_avg = model_sum/(score_t)in_sz; - score_t score_diff, model_diff; - if (t > 0) { - score_diff = score_avg - all_scores[t-1].first; - model_diff = model_avg - all_scores[t-1].second; - } else { - score_diff = score_avg; - model_diff = model_avg; + // update average + if (average) + w_average += lambdas; + + if (adadelta_output != "") { + WriteFile g(adadelta_output+".gradient.gz"); + for (auto it: gradient_accum) + *g << FD::Convert(it.first) << " " << it.second << endl; + WriteFile u(adadelta_output+".update.gz"); + for (auto it: update_accum) + *u << FD::Convert(it.first) << " " << it.second << endl; } - unsigned nonz = 0; - if (!quiet) nonz = (unsigned)lambdas.num_nonzero(); - - if (!quiet) { - cerr << _p5 << _p << "WEIGHTS" << endl; - for (vector<string>::iterator it = print_weights.begin(); it != print_weights.end(); it++) { - cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; + // stats + weight_t gold_avg = gold_sum/(weight_t)input_sz; + cerr << setiosflags(ios::showpos) << scientific << "WEIGHTS" << endl; + for (auto name: print_weights) { + cerr << setw(18) << name << " = " + << lambdas.get(FD::Convert(name)); + if (use_adadelta) { + weight_t rate = -1.0*(sqrt(update_accum[FD::Convert(name)]+adadelta_eta) + / sqrt(gradient_accum[FD::Convert(name)]+adadelta_eta)); + cerr << " {" << rate << "}"; } - cerr << " ---" << endl; - cerr << _np << " 1best avg score: " << score_avg; - cerr << _p << " (" << score_diff << ")" << endl; - cerr << _np << " 1best avg model score: " << model_avg; - cerr << _p << " (" << model_diff << ")" << endl; - cerr << " avg # pairs: "; - cerr << _np << npairs/(float)in_sz << endl; - cerr << " avg # rank err: "; - cerr << rank_errors/(float)in_sz; - if (faster_perceptron) cerr << " (meaningless)"; cerr << endl; - cerr << " avg # margin viol: "; - cerr << margin_violations/(float)in_sz << endl; - if (batch) cerr << " batch loss: " << batch_loss << endl; - cerr << " k-best loss imp: " << ((float)kbest_loss_improve/in_sz)*100 << "%" << endl; - cerr << " non0 feature count: " << nonz << endl; - cerr << " avg list sz: " << list_sz/(float)in_sz << endl; - cerr << " avg f count: " << f_count/(float)list_sz << endl; - } - - pair<score_t,score_t> remember; - remember.first = score_avg; - remember.second = model_avg; - all_scores.push_back(remember); - if (score_avg > max_score) { - max_score = score_avg; - best_it = t; } - time (&end); - float time_diff = difftime(end, start); - overall_time += time_diff; - if (!quiet) { - cerr << _p2 << _np << "(time " << time_diff/60. << " min, "; - cerr << time_diff/in_sz << " s/S)" << endl; + cerr << " ---" << endl; + cerr << resetiosflags(ios::showpos) + << " 1best avg score: " << gold_avg*100; + cerr << setiosflags(ios::showpos) << fixed << " (" + << (gold_avg-gold_prev)*100 << ")" << endl; + cerr << scientific << " 1best avg model score: " + << model_sum/(weight_t)input_sz << endl; + cerr << fixed; + cerr << " avg # updates: "; + cerr << resetiosflags(ios::showpos) << num_up/(float)input_sz << endl; + cerr << " non-0 feature count: " << lambdas.num_nonzero() << endl; + cerr << " avg f count: " << feature_count/(float)list_sz << endl; + cerr << " avg list sz: " << list_sz/(float)input_sz << endl; + + if (gold_avg > best) { + best = gold_avg; + best_iteration = t; } - if (t+1 != T && !quiet) cerr << endl; + gold_prev = gold_avg; - if (noup) break; + time (&end); + time_t time_diff = difftime(end, start); + total_time += time_diff; + cerr << "(time " << time_diff/60. << " min, "; + cerr << time_diff/input_sz << " s/S)" << endl; + if (t+1 != T) cerr << endl; - // write weights to file - if (select_weights == "best" || keep) { + if (keep) { // keep intermediate weights lambdas.init_vector(&decoder_weights); string w_fn = "weights." + boost::lexical_cast<string>(t) + ".gz"; Weights::WriteToFile(w_fn, decoder_weights, true); } - if (check) cout << "---" << endl; - } // outer loop - if (average) w_average /= (weight_t)T; - - if (!noup) { - if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl; - if (select_weights == "last" || average) { // last, average - WriteFile of(output_fn); // works with '-' - ostream& o = *of.stream(); - o.precision(17); - o << _np; - if (average) { - for (SparseVector<weight_t>::iterator it = w_average.begin(); it != w_average.end(); ++it) { - if (it->second == 0) continue; - o << FD::Convert(it->first) << '\t' << it->second << endl; - } - } else { - for (SparseVector<weight_t>::iterator it = lambdas.begin(); it != lambdas.end(); ++it) { - if (it->second == 0) continue; - o << FD::Convert(it->first) << '\t' << it->second << endl; - } - } - } else if (select_weights == "VOID") { // do nothing with the weights - } else { // best - if (output_fn != "-") { - CopyFile("weights."+boost::lexical_cast<string>(best_it)+".gz", output_fn); - } else { - ReadFile bestw("weights."+boost::lexical_cast<string>(best_it)+".gz"); - string o; - cout.precision(17); - cout << _np; - while(getline(*bestw, o)) cout << o << endl; - } - if (!keep) { - for (unsigned i = 0; i < T; i++) { - string s = "weights." + boost::lexical_cast<string>(i) + ".gz"; - unlink(s.c_str()); - } - } - } - if (!quiet) cerr << "done" << endl; + // final weights + if (average) { + w_average /= T; + w_average.init_vector(decoder_weights); + } else if (!keep) { + lambdas.init_vector(decoder_weights); } + if (average || !keep) + Weights::WriteToFile(output_fn, decoder_weights, true); - if (!quiet) { - cerr << _p5 << _np << endl << "---" << endl << "Best iteration: "; - cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl; - cerr << "This took " << overall_time/60. << " min." << endl; - } + cerr << endl << "---" << endl << "Best iteration: "; + cerr << best_iteration+1 << " [GOLD = " << best*100 << "]." << endl; + cerr << "This took " << total_time/60. << " min." << endl; + + return 0; } diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index 07bd9b65..883e6028 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -1,9 +1,6 @@ #ifndef _DTRAIN_H_ #define _DTRAIN_H_ -#define DTRAIN_DOTS 10 // after how many inputs to display a '.' -#define DTRAIN_SCALE 100000 - #include <iomanip> #include <climits> #include <string.h> @@ -25,113 +22,125 @@ namespace po = boost::program_options; namespace dtrain { - -inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids) -{ - vector<string>::const_iterator it; - for (it = strs.begin(); it < strs.end(); it++) - ids.push_back(TD::Convert(*it)); -} - -inline string gettmpf(const string path, const string infix) -{ - char fn[path.size() + infix.size() + 8]; - strcpy(fn, path.c_str()); - strcat(fn, "/"); - strcat(fn, infix.c_str()); - strcat(fn, "-XXXXXX"); - if (!mkstemp(fn)) { - cerr << "Cannot make temp file in" << path << " , exiting." << endl; - exit(1); - } - return string(fn); -} - -typedef double score_t; - -struct ScoredHyp +struct Hyp { - vector<WordID> w; - SparseVector<double> f; - score_t model; - score_t score; - unsigned rank; + Hyp() {} + Hyp(vector<WordID> w, SparseVector<weight_t> f, weight_t model, weight_t gold, + size_t rank) : w(w), f(f), model(model), gold(gold), rank(rank) {} + + vector<WordID> w; + SparseVector<weight_t> f; + weight_t model, gold; + size_t rank; }; -struct LocalScorer +bool +dtrain_init(int argc, + char** argv, + po::variables_map* conf) { - unsigned N_; - vector<score_t> w_; - - virtual score_t - Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len)=0; - - virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer - - inline void - Init(unsigned N, vector<score_t> weights) - { - assert(N > 0); - N_ = N; - if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_); - else w_ = weights; + po::options_description opts("Configuration File Options"); + opts.add_options() + ("bitext,b", po::value<string>(), + "bitext, source and references in a single file [e ||| f]") + ("decoder_conf,C", po::value<string>(), + "decoder configuration file") + ("iterations,T", po::value<size_t>()->default_value(15), + "number of iterations T") + ("k", po::value<size_t>()->default_value(100), + "sample size per input (e.g. size of k-best lists)") + ("unique_kbest", po::bool_switch()->default_value(true), + "unique k-best lists") + ("forest_sample", po::bool_switch()->default_value(false), + "sample k hyptheses from forest instead of using k-best list") + ("learning_rate,l", po::value<weight_t>()->default_value(0.00001), + "learning rate [only meaningful if margin>0 or input weights are given]") + ("l1_reg,r", po::value<weight_t>()->default_value(0.), + "l1 regularization strength [see Tsuruoka, Tsujii and Ananiadou (2009)]") + ("adadelta,D", po::bool_switch()->default_value(false), + "use AdaDelta dynamic learning rates") + ("adadelta_decay", po::value<weight_t>()->default_value(0.9), + "decay for AdaDelta algorithm") + ("adadelta_input", po::value<string>()->default_value(""), + "input for AdaDelta's parameters, two files: file.gradient, and file.update") + ("adadelta_output", po::value<string>()->default_value(""), + "prefix for outputting AdaDelta's parameters") + ("margin,m", po::value<weight_t>()->default_value(1.0), + "margin for margin perceptron [set =0 for standard perceptron]") + ("cut,u", po::value<weight_t>()->default_value(0.1), + "use top/bottom 10% (default) of k-best as 'good' and 'bad' for pair sampling, 0 to use all pairs") + ("adjust,A", po::bool_switch()->default_value(false), + "adjust cut for optimal pos. in k-best to cut") + ("score,s", po::value<string>()->default_value("nakov"), + "per-sentence BLEU (approx.)") + ("nakov_fix", po::value<weight_t>()->default_value(1.0), + "add to reference length [see score.h]") + ("chiang_decay", po::value<weight_t>()->default_value(0.9), + "decaying factor for Chiang's approx. BLEU") + ("N", po::value<size_t>()->default_value(4), + "N for BLEU approximation") + ("input_weights,w", po::value<string>(), + "weights to initialize model") + ("average,a", po::bool_switch()->default_value(true), + "output average weights") + ("keep,K", po::bool_switch()->default_value(false), + "output a weight file per iteration [as weights.T.gz]") + ("structured,S", po::bool_switch()->default_value(false), + "structured prediction objective [hope/fear] w/ SGD") + ("pro_sampling", po::bool_switch()->default_value(false), + "updates from pairs selected as shown in Fig.4 of (Hopkins and May, 2011) [Gamma=max_pairs (default 5000), Xi=cut (default 50); threshold default 0.05]") + ("threshold", po::value<weight_t>()->default_value(0.), + "(min.) threshold in terms of gold score for pair selection") + ("max_pairs", + po::value<size_t>()->default_value(numeric_limits<size_t>::max()), + "max. number of updates/pairs") + ("batch,B", po::bool_switch()->default_value(false), + "perform batch updates") + ("output,o", po::value<string>()->default_value("-"), + "output weights file, '-' for STDOUT") + ("disable_learning,X", po::bool_switch()->default_value(false), + "fix model") + ("output_updates,U", po::value<string>()->default_value(""), + "output updates (diff. vectors) [to filename]") + ("output_raw,R", po::value<string>()->default_value(""), + "output raw data (e.g. k-best lists) [to filename]") + ("stop_after", po::value<size_t>()->default_value(numeric_limits<size_t>::max()), + "only look at this number of segments") + ("print_weights,P", po::value<string>()->default_value("EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV"), + "list of weights to print after each iteration"); + po::options_description clopts("Command Line Options"); + clopts.add_options() + ("conf,c", po::value<string>(), "dtrain configuration file") + ("help,h", po::bool_switch(), "display options"); + opts.add(clopts); + po::store(parse_command_line(argc, argv, opts), *conf); + cerr << "*dtrain*" << endl << endl; + if ((*conf)["help"].as<bool>()) { + cerr << setprecision(3) << opts << endl; + + return false; } - - inline score_t - brevity_penalty(const unsigned hyp_len, const unsigned ref_len) - { - if (hyp_len > ref_len) return 1; - return exp(1 - (score_t)ref_len/hyp_len); + if (conf->count("conf")) { + ifstream f((*conf)["conf"].as<string>().c_str()); + po::store(po::parse_config_file(f, opts), *conf); } -}; - -struct HypSampler : public DecoderObserver -{ - LocalScorer* scorer_; - vector<WordID>* ref_; - unsigned f_count_, sz_; - virtual vector<ScoredHyp>* GetSamples()=0; - inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; } - inline void SetRef(vector<WordID>& ref) { ref_ = &ref; } - inline unsigned get_f_count() { return f_count_; } - inline unsigned get_sz() { return sz_; } -}; - -struct HSReporter -{ - string task_id_; - - HSReporter(string task_id) : task_id_(task_id) {} + po::notify(*conf); + if (!conf->count("decoder_conf")) { + cerr << "Missing decoder configuration." << endl; + cerr << opts << endl; - inline void update_counter(string name, unsigned amount) { - cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; + return false; } - inline void update_gcounter(string name, unsigned amount) { - cerr << "reporter:counter:Global," << name << "," << amount << endl; - } -}; + if (!conf->count("bitext")) { + cerr << "No input bitext." << endl; + cerr << opts << endl; -inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } -inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } -inline ostream& _p2(ostream& out) { return out << setprecision(2); } -inline ostream& _p5(ostream& out) { return out << setprecision(5); } - -inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr) -{ - for (unsigned i = 0; i < v.size(); i++) { - os << TD::Convert(v[i]); - if (i < v.size()-1) os << " "; + return false; } -} -template<typename T> -inline T sign(T z) -{ - if (z == 0) return 0; - return z < 0 ? -1 : +1; + return true; } - } // namespace #endif diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README deleted file mode 100644 index 89715105..00000000 --- a/training/dtrain/examples/parallelized/README +++ /dev/null @@ -1,5 +0,0 @@ -run for example - ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs - -final weights will be in the file work/weights.3 - diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini deleted file mode 100644 index 5773029a..00000000 --- a/training/dtrain/examples/parallelized/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel ../standard//nc-wmt11.en.srilm.gz -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini deleted file mode 100644 index 0b0932d6..00000000 --- a/training/dtrain/examples/parallelized/dtrain.ini +++ /dev/null @@ -1,14 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=1.0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -decoder_config=cdec.ini diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz Binary files differdeleted file mode 100644 index 1e28a24b..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz Binary files differdeleted file mode 100644 index 372f5675..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz Binary files differdeleted file mode 100644 index 145d0dc0..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz Binary files differdeleted file mode 100644 index 105593ff..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz Binary files differdeleted file mode 100644 index 30781f48..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz Binary files differdeleted file mode 100644 index 834ee759..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz Binary files differdeleted file mode 100644 index 2e76f348..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz Binary files differdeleted file mode 100644 index 3741a887..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz Binary files differdeleted file mode 100644 index ebf6bd0c..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz Binary files differdeleted file mode 100644 index c1791059..00000000 --- a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz +++ /dev/null diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in deleted file mode 100644 index 51d01fe7..00000000 --- a/training/dtrain/examples/parallelized/in +++ /dev/null @@ -1,10 +0,0 @@ -<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> -<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> -<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> -<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> -<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> -<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> -<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> -<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> -<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> -<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs deleted file mode 100644 index 632e27b0..00000000 --- a/training/dtrain/examples/parallelized/refs +++ /dev/null @@ -1,10 +0,0 @@ -europe 's divided racial house -a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . -the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . -while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . -an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . -mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . -it will not , as america 's racial history clearly shows . -race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . -the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . -this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0 deleted file mode 100644 index c559dd4d..00000000 --- a/training/dtrain/examples/parallelized/work/out.0.0 +++ /dev/null @@ -1,62 +0,0 @@ - cdec cfg 'cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ../standard//nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -Seeding random number sequence to 405292278 - -dtrain -Parameters: - k 100 - N 4 - T 1 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.0001 - gamma 0 - loss margin 1 - faster perceptron 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'last' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'cdec.ini' - input 'work/shard.0.0.in' - refs 'work/shard.0.0.refs' - output 'work/weights.0.0' -(a dot represents 10 inputs) -Iteration #1 of 1. - 5 -WEIGHTS - Glue = +0.2663 - WordPenalty = -0.0079042 - LanguageModel = +0.44782 - LanguageModel_OOV = -0.0401 - PhraseModel_0 = -0.193 - PhraseModel_1 = +0.71321 - PhraseModel_2 = +0.85196 - PhraseModel_3 = -0.43986 - PhraseModel_4 = -0.44803 - PhraseModel_5 = -0.0538 - PhraseModel_6 = -0.1788 - PassThrough = -0.1477 - --- - 1best avg score: 0.17521 (+0.17521) - 1best avg model score: 21.556 (+21.556) - avg # pairs: 1671.2 - avg # rank err: 1118.6 - avg # margin viol: 552.6 - non0 feature count: 12 - avg list sz: 100 - avg f count: 11.32 -(time 0.35 min, 4.2 s/S) - -Writing weights file to 'work/weights.0.0' ... -done - ---- -Best iteration: 1 [SCORE 'stupid_bleu'=0.17521]. -This took 0.35 min. diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1 deleted file mode 100644 index 8bc7ea9c..00000000 --- a/training/dtrain/examples/parallelized/work/out.0.1 +++ /dev/null @@ -1,63 +0,0 @@ - cdec cfg 'cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ../standard//nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -Seeding random number sequence to 43859692 - -dtrain -Parameters: - k 100 - N 4 - T 1 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.0001 - gamma 0 - loss margin 1 - faster perceptron 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'last' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'cdec.ini' - input 'work/shard.0.0.in' - refs 'work/shard.0.0.refs' - output 'work/weights.0.1' - weights in 'work/weights.0' -(a dot represents 10 inputs) -Iteration #1 of 1. - 5 -WEIGHTS - Glue = -0.2699 - WordPenalty = +0.080605 - LanguageModel = -0.026572 - LanguageModel_OOV = -0.30025 - PhraseModel_0 = -0.32076 - PhraseModel_1 = +0.67451 - PhraseModel_2 = +0.92 - PhraseModel_3 = -0.36402 - PhraseModel_4 = -0.592 - PhraseModel_5 = -0.0269 - PhraseModel_6 = -0.28755 - PassThrough = -0.33285 - --- - 1best avg score: 0.26638 (+0.26638) - 1best avg model score: 53.197 (+53.197) - avg # pairs: 2028.6 - avg # rank err: 998.2 - avg # margin viol: 918.8 - non0 feature count: 12 - avg list sz: 100 - avg f count: 10.496 -(time 0.35 min, 4.2 s/S) - -Writing weights file to 'work/weights.0.1' ... -done - ---- -Best iteration: 1 [SCORE 'stupid_bleu'=0.26638]. -This took 0.35 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0 deleted file mode 100644 index 65d1e7dc..00000000 --- a/training/dtrain/examples/parallelized/work/out.1.0 +++ /dev/null @@ -1,62 +0,0 @@ - cdec cfg 'cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ../standard//nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -Seeding random number sequence to 4126799437 - -dtrain -Parameters: - k 100 - N 4 - T 1 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.0001 - gamma 0 - loss margin 1 - faster perceptron 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'last' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'cdec.ini' - input 'work/shard.1.0.in' - refs 'work/shard.1.0.refs' - output 'work/weights.1.0' -(a dot represents 10 inputs) -Iteration #1 of 1. - 5 -WEIGHTS - Glue = -0.3815 - WordPenalty = +0.20064 - LanguageModel = +0.95304 - LanguageModel_OOV = -0.264 - PhraseModel_0 = -0.22362 - PhraseModel_1 = +0.12254 - PhraseModel_2 = +0.26328 - PhraseModel_3 = +0.38018 - PhraseModel_4 = -0.48654 - PhraseModel_5 = +0 - PhraseModel_6 = -0.3645 - PassThrough = -0.2216 - --- - 1best avg score: 0.10863 (+0.10863) - 1best avg model score: -4.9841 (-4.9841) - avg # pairs: 1345.4 - avg # rank err: 822.4 - avg # margin viol: 501 - non0 feature count: 11 - avg list sz: 100 - avg f count: 11.814 -(time 0.43 min, 5.2 s/S) - -Writing weights file to 'work/weights.1.0' ... -done - ---- -Best iteration: 1 [SCORE 'stupid_bleu'=0.10863]. -This took 0.43333 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1 deleted file mode 100644 index f479fbbc..00000000 --- a/training/dtrain/examples/parallelized/work/out.1.1 +++ /dev/null @@ -1,63 +0,0 @@ - cdec cfg 'cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ../standard//nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -Seeding random number sequence to 2112412848 - -dtrain -Parameters: - k 100 - N 4 - T 1 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.0001 - gamma 0 - loss margin 1 - faster perceptron 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'last' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'cdec.ini' - input 'work/shard.1.0.in' - refs 'work/shard.1.0.refs' - output 'work/weights.1.1' - weights in 'work/weights.0' -(a dot represents 10 inputs) -Iteration #1 of 1. - 5 -WEIGHTS - Glue = -0.3178 - WordPenalty = +0.11092 - LanguageModel = +0.17269 - LanguageModel_OOV = -0.13485 - PhraseModel_0 = -0.45371 - PhraseModel_1 = +0.38789 - PhraseModel_2 = +0.75311 - PhraseModel_3 = -0.38163 - PhraseModel_4 = -0.58817 - PhraseModel_5 = -0.0269 - PhraseModel_6 = -0.27315 - PassThrough = -0.16745 - --- - 1best avg score: 0.13169 (+0.13169) - 1best avg model score: 24.226 (+24.226) - avg # pairs: 1951.2 - avg # rank err: 985.4 - avg # margin viol: 951 - non0 feature count: 12 - avg list sz: 100 - avg f count: 11.224 -(time 0.45 min, 5.4 s/S) - -Writing weights file to 'work/weights.1.1' ... -done - ---- -Best iteration: 1 [SCORE 'stupid_bleu'=0.13169]. -This took 0.45 min. diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in deleted file mode 100644 index 92f9c78e..00000000 --- a/training/dtrain/examples/parallelized/work/shard.0.0.in +++ /dev/null @@ -1,5 +0,0 @@ -<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> -<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> -<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> -<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> -<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs deleted file mode 100644 index bef68fee..00000000 --- a/training/dtrain/examples/parallelized/work/shard.0.0.refs +++ /dev/null @@ -1,5 +0,0 @@ -europe 's divided racial house -a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . -the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . -while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . -an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in deleted file mode 100644 index b7695ce7..00000000 --- a/training/dtrain/examples/parallelized/work/shard.1.0.in +++ /dev/null @@ -1,5 +0,0 @@ -<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> -<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> -<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> -<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> -<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs deleted file mode 100644 index 6076f6d5..00000000 --- a/training/dtrain/examples/parallelized/work/shard.1.0.refs +++ /dev/null @@ -1,5 +0,0 @@ -mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . -it will not , as america 's racial history clearly shows . -race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . -the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . -this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0 deleted file mode 100644 index ddd595a8..00000000 --- a/training/dtrain/examples/parallelized/work/weights.0 +++ /dev/null @@ -1,12 +0,0 @@ -LanguageModel 0.7004298992212881 -PhraseModel_2 0.5576194336478857 -PhraseModel_1 0.41787318415343155 -PhraseModel_4 -0.46728502545635164 -PhraseModel_3 -0.029839521598455515 -Glue -0.05760000000000068 -PhraseModel_6 -0.2716499999999978 -PhraseModel_0 -0.20831031065605327 -LanguageModel_OOV -0.15205000000000077 -PassThrough -0.1846500000000006 -WordPenalty 0.09636994553433414 -PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0 deleted file mode 100644 index c9370b18..00000000 --- a/training/dtrain/examples/parallelized/work/weights.0.0 +++ /dev/null @@ -1,12 +0,0 @@ -WordPenalty -0.0079041595706392243 -LanguageModel 0.44781580828279532 -LanguageModel_OOV -0.04010000000000042 -Glue 0.26629999999999948 -PhraseModel_0 -0.19299677809125185 -PhraseModel_1 0.71321026861732773 -PhraseModel_2 0.85195540993310537 -PhraseModel_3 -0.43986310822842656 -PhraseModel_4 -0.44802855630415955 -PhraseModel_5 -0.053800000000000514 -PhraseModel_6 -0.17879999999999835 -PassThrough -0.14770000000000036 diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1 deleted file mode 100644 index 8fad3de8..00000000 --- a/training/dtrain/examples/parallelized/work/weights.0.1 +++ /dev/null @@ -1,12 +0,0 @@ -WordPenalty 0.080605055841244472 -LanguageModel -0.026571720531022844 -LanguageModel_OOV -0.30024999999999141 -Glue -0.26989999999999842 -PhraseModel_2 0.92000295209089566 -PhraseModel_1 0.67450748692470841 -PhraseModel_4 -0.5920000014976784 -PhraseModel_3 -0.36402437203127397 -PhraseModel_6 -0.28754999999999603 -PhraseModel_0 -0.32076244202907672 -PassThrough -0.33284999999999004 -PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1 deleted file mode 100644 index 03058a16..00000000 --- a/training/dtrain/examples/parallelized/work/weights.1 +++ /dev/null @@ -1,12 +0,0 @@ -PhraseModel_2 0.8365578543552836 -PhraseModel_4 -0.5900840266009169 -PhraseModel_1 0.5312000609786991 -PhraseModel_0 -0.3872342271319619 -PhraseModel_3 -0.3728279676912084 -Glue -0.2938500000000036 -PhraseModel_6 -0.2803499999999967 -PassThrough -0.25014999999999626 -LanguageModel_OOV -0.21754999999999702 -LanguageModel 0.07306061161169894 -WordPenalty 0.09576193325966899 -PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0 deleted file mode 100644 index 6a6a65c1..00000000 --- a/training/dtrain/examples/parallelized/work/weights.1.0 +++ /dev/null @@ -1,11 +0,0 @@ -WordPenalty 0.20064405063930751 -LanguageModel 0.9530439901597807 -LanguageModel_OOV -0.26400000000000112 -Glue -0.38150000000000084 -PhraseModel_0 -0.22362384322085468 -PhraseModel_1 0.12253609968953538 -PhraseModel_2 0.26328345736266612 -PhraseModel_3 0.38018406503151553 -PhraseModel_4 -0.48654149460854373 -PhraseModel_6 -0.36449999999999722 -PassThrough -0.22160000000000085 diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1 deleted file mode 100644 index f56ea4a2..00000000 --- a/training/dtrain/examples/parallelized/work/weights.1.1 +++ /dev/null @@ -1,12 +0,0 @@ -WordPenalty 0.1109188106780935 -LanguageModel 0.17269294375442074 -LanguageModel_OOV -0.13485000000000266 -Glue -0.3178000000000088 -PhraseModel_2 0.75311275661967159 -PhraseModel_1 0.38789263503268989 -PhraseModel_4 -0.58816805170415531 -PhraseModel_3 -0.38163156335114284 -PhraseModel_6 -0.27314999999999739 -PhraseModel_0 -0.45370601223484697 -PassThrough -0.16745000000000249 -PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README deleted file mode 100644 index ce37d31a..00000000 --- a/training/dtrain/examples/standard/README +++ /dev/null @@ -1,2 +0,0 @@ -Call `dtrain` from this folder with ../../dtrain -c dtrain.ini . - diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini deleted file mode 100644 index 3330dd71..00000000 --- a/training/dtrain/examples/standard/cdec.ini +++ /dev/null @@ -1,27 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -grammar=nc-wmt11.grammar.gz -feature_function=WordPenalty -feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -feature_function=LexicalFeatures 1 1 1 -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini deleted file mode 100644 index a515db02..00000000 --- a/training/dtrain/examples/standard/dtrain.ini +++ /dev/null @@ -1,27 +0,0 @@ -#input=./nc-wmt11.de.gz -#refs=./nc-wmt11.en.gz -bitext=./nc-wmt11.gz -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=avg # output average (over epochs) weight vector -decoder_config=./cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -# newer version of the grammar extractor use different feature names: -#print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=3 # run over input 3 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=fixed_stupid_bleu # use 'stupid' BLEU+1 -learning_rate=0.1 # learning rate, don't care if gamma=0 (perceptron) and loss_margin=0 (not margin perceptron) -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX # -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (here: > 0) -loss_margin=0 # update if correctly ranked, but within this margin -repeat=1 # repeat training on a kbest list 1 times -#batch=true # batch tuning, update after accumulating over all sentences and all kbest lists diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output deleted file mode 100644 index 2460cfbb..00000000 --- a/training/dtrain/examples/standard/expected-output +++ /dev/null @@ -1,123 +0,0 @@ - cdec cfg './cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ./nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -T=1 I=1 D=1 -Seeding random number sequence to 2327685089 - -dtrain -Parameters: - k 100 - N 4 - T 3 - batch 0 - scorer 'fixed_stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.1 - gamma 0 - loss margin 0 - faster perceptron 1 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'avg' - l1 reg 0 'none' - pclr no - max pairs 4294967295 - repeat 1 - cdec cfg './cdec.ini' - input './nc-wmt11.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 3. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = +6.9 - WordPenalty = -46.426 - LanguageModel = +535.12 - LanguageModel_OOV = -123.5 - PhraseModel_0 = -160.73 - PhraseModel_1 = -350.13 - PhraseModel_2 = -187.81 - PhraseModel_3 = +172.04 - PhraseModel_4 = +0.90108 - PhraseModel_5 = +21.6 - PhraseModel_6 = +67.2 - PassThrough = -149.7 - --- - 1best avg score: 0.23327 (+0.23327) - 1best avg model score: -9084.9 (-9084.9) - avg # pairs: 780.7 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 1389 - avg list sz: 91.3 - avg f count: 146.2 -(time 0.37 min, 2.2 s/S) - -Iteration #2 of 3. - . 10 -WEIGHTS - Glue = -43 - WordPenalty = -22.019 - LanguageModel = +591.53 - LanguageModel_OOV = -252.1 - PhraseModel_0 = -120.21 - PhraseModel_1 = -43.589 - PhraseModel_2 = +73.53 - PhraseModel_3 = +113.7 - PhraseModel_4 = -223.81 - PhraseModel_5 = +64 - PhraseModel_6 = +54.8 - PassThrough = -331.1 - --- - 1best avg score: 0.29568 (+0.062413) - 1best avg model score: -15879 (-6794.1) - avg # pairs: 566.1 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 1931 - avg list sz: 91.3 - avg f count: 139.89 -(time 0.33 min, 2 s/S) - -Iteration #3 of 3. - . 10 -WEIGHTS - Glue = -44.3 - WordPenalty = -131.85 - LanguageModel = +230.91 - LanguageModel_OOV = -285.4 - PhraseModel_0 = -194.27 - PhraseModel_1 = -294.83 - PhraseModel_2 = -92.043 - PhraseModel_3 = -140.24 - PhraseModel_4 = +85.613 - PhraseModel_5 = +238.1 - PhraseModel_6 = +158.7 - PassThrough = -359.6 - --- - 1best avg score: 0.37375 (+0.078067) - 1best avg model score: -14519 (+1359.7) - avg # pairs: 545.4 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 2218 - avg list sz: 91.3 - avg f count: 137.77 -(time 0.35 min, 2.1 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375]. -This took 1.05 min. diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz Binary files differdeleted file mode 100644 index 0741fd92..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.de.gz +++ /dev/null diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz Binary files differdeleted file mode 100644 index 1c0bd401..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.en.gz +++ /dev/null diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz Binary files differdeleted file mode 100644 index 7ce81057..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz +++ /dev/null diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz Binary files differdeleted file mode 100644 index ce4024a1..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.grammar.gz +++ /dev/null diff --git a/training/dtrain/examples/standard/nc-wmt11.gz b/training/dtrain/examples/standard/nc-wmt11.gz Binary files differdeleted file mode 100644 index c39c5aef..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.gz +++ /dev/null diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini deleted file mode 100644 index e6c19abe..00000000 --- a/training/dtrain/examples/toy/cdec.ini +++ /dev/null @@ -1,4 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -grammar=grammar.gz -#add_extra_pass_through_features=6 diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini deleted file mode 100644 index ef956df7..00000000 --- a/training/dtrain/examples/toy/dtrain.ini +++ /dev/null @@ -1,13 +0,0 @@ -decoder_config=cdec.ini -input=src -refs=tgt -output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6 -k=4 -N=4 -epochs=2 -scorer=bleu -sample_from=kbest -filter=uniq -pair_sampling=all -learning_rate=1 diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output deleted file mode 100644 index 1da2aadd..00000000 --- a/training/dtrain/examples/toy/expected-output +++ /dev/null @@ -1,77 +0,0 @@ -Warning: hi_lo only works with pair_sampling XYX. - cdec cfg 'cdec.ini' -Seeding random number sequence to 1664825829 - -dtrain -Parameters: - k 4 - N 4 - T 2 - scorer 'bleu' - sample from 'kbest' - filter 'uniq' - learning rate 1 - gamma 0 - loss margin 0 - pairs 'all' - pair threshold 0 - select weights 'last' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'cdec.ini' - input 'src' - refs 'tgt' - output '-' -(a dot represents 10 inputs) -Iteration #1 of 2. - 2 -WEIGHTS - logp = +0 - shell_rule = -1 - house_rule = +2 - small_rule = -2 - little_rule = +3 - PassThrough = -5 - --- - 1best avg score: 0.5 (+0.5) - 1best avg model score: 2.5 (+2.5) - avg # pairs: 4 - avg # rank err: 1.5 - avg # margin viol: 0 - non0 feature count: 6 - avg list sz: 4 - avg f count: 2.875 -(time 0 min, 0 s/S) - -Iteration #2 of 2. - 2 -WEIGHTS - logp = +0 - shell_rule = -1 - house_rule = +2 - small_rule = -2 - little_rule = +3 - PassThrough = -5 - --- - 1best avg score: 1 (+0.5) - 1best avg model score: 5 (+2.5) - avg # pairs: 5 - avg # rank err: 0 - avg # margin viol: 0 - non0 feature count: 6 - avg list sz: 4 - avg f count: 3 -(time 0 min, 0 s/S) - -Writing weights file to '-' ... -house_rule 2 -little_rule 3 -Glue -4 -PassThrough -5 -small_rule -2 -shell_rule -1 -done - ---- -Best iteration: 2 [SCORE 'bleu'=1]. -This took 0 min. diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz Binary files differdeleted file mode 100644 index 8eb0d29e..00000000 --- a/training/dtrain/examples/toy/grammar.gz +++ /dev/null diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src deleted file mode 100644 index 87e39ef2..00000000 --- a/training/dtrain/examples/toy/src +++ /dev/null @@ -1,2 +0,0 @@ -ich sah ein kleines haus -ich fand ein kleines haus diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt deleted file mode 100644 index 174926b3..00000000 --- a/training/dtrain/examples/toy/tgt +++ /dev/null @@ -1,2 +0,0 @@ -i saw a little house -i found a little house diff --git a/training/dtrain/kbestget.h b/training/dtrain/kbestget.h deleted file mode 100644 index 85252db3..00000000 --- a/training/dtrain/kbestget.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef _DTRAIN_KBESTGET_H_ -#define _DTRAIN_KBESTGET_H_ - -#include "kbest.h" - -namespace dtrain -{ - - -struct KBestGetter : public HypSampler -{ - const unsigned k_; - const string filter_type_; - vector<ScoredHyp> s_; - unsigned src_len_; - - KBestGetter(const unsigned k, const string filter_type) : - k_(k), filter_type_(filter_type) {} - - virtual void - NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) - { - src_len_ = smeta.GetSourceLength(); - KBestScored(*hg); - } - - vector<ScoredHyp>* GetSamples() { return &s_; } - - void - KBestScored(const Hypergraph& forest) - { - if (filter_type_ == "uniq") { - KBestUnique(forest); - } else if (filter_type_ == "not") { - KBestNoFilter(forest); - } - } - - void - KBestUnique(const Hypergraph& forest) - { - s_.clear(); sz_ = f_count_ = 0; - KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, - KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_); - for (unsigned i = 0; i < k_; ++i) { - const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, KBest::FilterUnique, - prob_t, EdgeProb>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - ScoredHyp h; - h.w = d->yield; - h.f = d->feature_values; - h.model = log(d->score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - } - - void - KBestNoFilter(const Hypergraph& forest) - { - s_.clear(); sz_ = f_count_ = 0; - KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(forest, k_); - for (unsigned i = 0; i < k_; ++i) { - const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - ScoredHyp h; - h.w = d->yield; - h.f = d->feature_values; - h.model = log(d->score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - } -}; - - -} // namespace - -#endif - diff --git a/training/dtrain/ksampler.h b/training/dtrain/ksampler.h deleted file mode 100644 index 29dab667..00000000 --- a/training/dtrain/ksampler.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _DTRAIN_KSAMPLER_H_ -#define _DTRAIN_KSAMPLER_H_ - -#include "hg_sampler.h" - -namespace dtrain -{ - - -bool -cmp_hyp_by_model_d(ScoredHyp a, ScoredHyp b) -{ - return a.model > b.model; -} - -struct KSampler : public HypSampler -{ - const unsigned k_; - vector<ScoredHyp> s_; - MT19937* prng_; - score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>); - unsigned src_len_; - - explicit KSampler(const unsigned k, MT19937* prng) : - k_(k), prng_(prng) {} - - virtual void - NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) - { - src_len_ = smeta.GetSourceLength(); - ScoredSamples(*hg); - } - - vector<ScoredHyp>* GetSamples() { return &s_; } - - void ScoredSamples(const Hypergraph& forest) { - s_.clear(); sz_ = f_count_ = 0; - std::vector<HypergraphSampler::Hypothesis> samples; - HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); - for (unsigned i = 0; i < k_; ++i) { - ScoredHyp h; - h.w = samples[i].words; - h.f = samples[i].fmap; - h.model = log(samples[i].model_score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - sort(s_.begin(), s_.end(), cmp_hyp_by_model_d); - for (unsigned i = 0; i < s_.size(); i++) s_[i].rank = i; - } -}; - - -} // namespace - -#endif - diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb index 86e835e8..ac3fb758 100755 --- a/training/dtrain/lplp.rb +++ b/training/dtrain/lplp.rb @@ -1,4 +1,6 @@ -# lplp.rb +#!/usr/bin/env ruby + +require 'zipf' # norms def l0(feature_column, n) @@ -19,7 +21,8 @@ end # stats def median(feature_column, n) - return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] + return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}) + .sort[feature_column.size/2] end def mean(feature_column, n) @@ -28,7 +31,7 @@ end # selection def select_k(weights, norm_fun, n, k=10000) - weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p| + weights.sort{|a,b| norm_fun.call(b[1], n)<=>norm_fun.call(a[1], n)}.each { |p| puts "#{p[0]}\t#{mean(p[1], n)}" k -= 1 if k == 0 then break end @@ -84,19 +87,24 @@ def _test() end #_test() - def usage() - puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>" + puts "lplp.rb <l0,l1,l2,linfty,mean,median,/path/to/file> <cut|select_k|feature_names> <k|threshold|--> <#shards> < <input>" puts " l0...: norms for selection" puts "select_k: only output top k (according to the norm of their column vector) features" puts " cut: output features with weight >= threshold" - puts " n: if we do not have a shard count use this number for averaging" + puts " n: number of shards for averaging" exit 1 end -if ARGV.size < 4 then usage end -norm_fun = method(ARGV[0].to_sym) +usage if ARGV.size<4 +norm_fun = nil +feature_names = nil type = ARGV[1] +if type == 'feature_names' + feature_names = ARGV[0] +else + norm_fun = method(ARGV[0].to_sym) +end x = ARGV[2].to_f shard_count = ARGV[3].to_f @@ -117,6 +125,17 @@ if type == 'cut' cut(w, norm_fun, shard_count, x) elsif type == 'select_k' select_k(w, norm_fun, shard_count, x) +elsif type == 'feature_names' + a = ReadFile.readlines_strip "#{fnames}" + h = {} + a.each { |i| + h[i] = true + } + w.each_pair { |k,v| + if h[k] + puts "#{k}\t#{mean(v, shard_count)}" + end + } else puts "oh oh" end diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h deleted file mode 100644 index 1a3c498c..00000000 --- a/training/dtrain/pairsampling.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef _DTRAIN_PAIRSAMPLING_H_ -#define _DTRAIN_PAIRSAMPLING_H_ - -namespace dtrain -{ - - -bool -accept_pair(score_t a, score_t b, score_t threshold) -{ - if (fabs(a - b) < threshold) return false; - return true; -} - -bool -cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) -{ - return a.score > b.score; -} - -inline void -all_pairs(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1) -{ - sort(s->begin(), s->end(), cmp_hyp_by_score_d); - unsigned sz = s->size(); - bool b = false; - unsigned count = 0; - for (unsigned i = 0; i < sz-1; i++) { - for (unsigned j = i+1; j < sz; j++) { - if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) { - b = true; - break; - } - } - if (b) break; - } -} - -/* - * multipartite ranking - * sort (descending) by bleu - * compare top X to middle Y and low X - * cmp middle Y to low X - */ - -inline void -partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo) -{ - unsigned sz = s->size(); - if (sz < 2) return; - sort(s->begin(), s->end(), cmp_hyp_by_score_d); - unsigned sep = round(sz*hi_lo); - unsigned sep_hi = sep; - if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; - else sep_hi = 1; - bool b = false; - unsigned count = 0; - for (unsigned i = 0; i < sep_hi; i++) { - for (unsigned j = sep_hi; j < sz; j++) { - if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) { - b = true; - break; - } - } - if (b) break; - } - unsigned sep_lo = sz-sep; - while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; - for (unsigned i = sep_hi; i < sz-sep_lo; i++) { - for (unsigned j = sz-sep_lo; j < sz; j++) { - if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) return; - } - } -} - -/* - * pair sampling as in - * 'Tuning as Ranking' (Hopkins & May, 2011) - * count = 5000 - * threshold = 5% BLEU (0.05 for param 3) - * cut = top 50 - */ -bool -_PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b) -{ - return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); -} -inline void -PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0) -{ - sort(s->begin(), s->end(), cmp_hyp_by_score_d); - unsigned max_count = 5000, count = 0, sz = s->size(); - bool b = false; - for (unsigned i = 0; i < sz-1; i++) { - for (unsigned j = i+1; j < sz; j++) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { - training.push_back(make_pair((*s)[i], (*s)[j])); - if (++count == max_count) { - b = true; - break; - } - } - } - if (b) break; - } - if (training.size() > 50) { - sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d); - training.erase(training.begin()+50, training.end()); - } - return; -} - - -} // namespace - -#endif - diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 82600009..3159a888 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -1,170 +1,181 @@ #!/usr/bin/env ruby require 'trollop' +require 'zipf' -def usage - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"] [--extra_qsub \"-l virtual_free=24G\"]\n" - exit 1 +conf = Trollop::options do + opt :conf, "dtrain configuration", :type => :string, :short => '-c' + opt :input, "input as bitext (f ||| e)", :type => :string, :short => '-i' + opt :epochs, "number of epochs", :type => :int, :default => 10, :short => '-e' + opt :randomize, "randomize shards once", :type => :bool, :default => false, :short => '-z' + opt :reshard, "randomize after each epoch", :type => :bool, :default => false, :short => '-y' + opt :shards, "number of shards", :type => :int, :short => '-s' + opt :weights, "input weights for first epoch", :type => :string, :default => '', :short => '-w' + opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000", :short => '-l' + opt :per_shard_decoder_configs, "give custom decoder config per shard", :type => :string, :short => '-o' + opt :processes_at_once, "jobs to run at oce", :type => :int, :default => 9999, :short => '-p' + opt :qsub, "use qsub", :type => :bool, :default => false, :short => '-q' + opt :qsub_args, "extra args for qsub", :type => :string, :default => "h_vmem=5G", :short => '-r' + opt :dtrain_binary, "path to dtrain binary", :type => :string, :short => '-d' + opt :adadelta, "use adadelta", :type => :bool, :default => false, :short => '-D' end -opts = Trollop::options do - opt :config, "dtrain config file", :type => :string - opt :epochs, "number of epochs", :type => :int, :default => 10 - opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000" - opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false - opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false - opt :shards, "number of shards", :type => :int - opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999 - opt :input, "input", :type => :string - opt :references, "references", :type => :string - opt :qsub, "use qsub", :type => :bool, :default => false - opt :dtrain_binary, "path to dtrain binary", :type => :string - opt :extra_qsub, "extra qsub args", :type => :string, :default => "" - opt :per_shard_decoder_configs, "give special decoder config per shard", :type => :string, :short => '-o' - opt :first_input_weights, "input weights for first iter", :type => :string, :default => '', :short => '-w' -end -usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] - dtrain_dir = File.expand_path File.dirname(__FILE__) -if not opts[:dtrain_binary] +if not conf[:dtrain_binary] dtrain_bin = "#{dtrain_dir}/dtrain" else - dtrain_bin = opts[:dtrain_binary] + dtrain_bin = conf[:dtrain_binary] end -ruby = '/usr/bin/ruby' lplp_rb = "#{dtrain_dir}/lplp.rb" -lplp_args = opts[:lplp_args] -cat = '/bin/cat' +lplp_args = conf[:lplp_args] -ini = opts[:config] -epochs = opts[:epochs] -rand = opts[:randomize] -reshard = opts[:reshard] -predefined_shards = false +dtrain_conf = conf[:conf] +epochs = conf[:epochs] +rand = conf[:randomize] +reshard = conf[:reshard] +predefined_shards = false per_shard_decoder_configs = false -if opts[:shards] == 0 +if conf[:shards] == 0 predefined_shards = true num_shards = 0 - per_shard_decoder_configs = true if opts[:per_shard_decoder_configs] + per_shard_decoder_configs = true if conf[:per_shard_decoder_configs] else - num_shards = opts[:shards] + num_shards = conf[:shards] end -input = opts[:input] -refs = opts[:references] -use_qsub = opts[:qsub] -shards_at_once = opts[:processes_at_once] -first_input_weights = opts[:first_input_weights] -opts[:extra_qsub] = "-l #{opts[:extra_qsub]}" if opts[:extra_qsub]!="" +input = conf[:input] +use_qsub = conf[:qsub] +shards_at_once = conf[:processes_at_once] +first_input_weights = conf[:weights] +use_adadelta = conf[:adadelta] `mkdir work` -def make_shards(input, refs, num_shards, epoch, rand) +def make_shards input, num_shards, epoch, rand lc = `wc -l #{input}`.split.first.to_i index = (0..lc-1).to_a index.reverse! index.shuffle! if rand shard_sz = (lc / num_shards.to_f).round 0 leftover = lc - (num_shards*shard_sz) - leftover = 0 if leftover < 0 + leftover = [0, leftover].max in_f = File.new input, 'r' in_lines = in_f.readlines - refs_f = File.new refs, 'r' - refs_lines = refs_f.readlines shard_in_files = [] - shard_refs_files = [] in_fns = [] - refs_fns = [] - new_num_shards = 0 + real_num_shards = 0 0.upto(num_shards-1) { |shard| break if index.size==0 - new_num_shards += 1 - in_fn = "work/shard.#{shard}.#{epoch}.in" - shard_in = File.new in_fn, 'w+' + real_num_shards += 1 + in_fn = "work/shard.#{shard}.#{epoch}.gz" + shard_in = WriteFile.new in_fn in_fns << in_fn - refs_fn = "work/shard.#{shard}.#{epoch}.refs" - shard_refs = File.new refs_fn, 'w+' - refs_fns << refs_fn 0.upto(shard_sz-1) { |i| j = index.pop + break if !j shard_in.write in_lines[j] - shard_refs.write refs_lines[j] } shard_in_files << shard_in - shard_refs_files << shard_refs } while leftover > 0 j = index.pop + break if !j shard_in_files[-1].write in_lines[j] - shard_refs_files[-1].write refs_lines[j] leftover -= 1 end - (shard_in_files + shard_refs_files).each do |f| f.close end + shard_in_files.each do |f| f.close end in_f.close - refs_f.close - return in_fns, refs_fns, new_num_shards + return in_fns, real_num_shards end input_files = [] -refs_files = [] if predefined_shards - input_files = File.new(input).readlines.map {|i| i.strip } - refs_files = File.new(refs).readlines.map {|i| i.strip } + input_files = File.new(input).readlines.map { |i| i.strip } if per_shard_decoder_configs - decoder_configs = File.new(opts[:per_shard_decoder_configs]).readlines.map {|i| i.strip} + decoder_configs = ReadFile.readlines_strip(conf[:per_shard_decoder_configs] + ).map { |i| i.strip } end num_shards = input_files.size else - input_files, refs_files, num_shards = make_shards input, refs, num_shards, 0, rand + input_files, num_shards = make_shards input, num_shards, 0, rand end 0.upto(epochs-1) { |epoch| puts "epoch #{epoch+1}" pids = [] input_weights = '' - if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end - weights_files = [] + input_weights = "--input_weights work/weights.#{epoch-1}.gz" if epoch>0 shard = 0 remaining_shards = num_shards while remaining_shards > 0 shards_at_once.times { break if remaining_shards==0 - qsub_str_start = qsub_str_end = '' - local_end = '' + qsub_str_start = qsub_str_end = local_end = '' if use_qsub - qsub_str_start = "qsub #{opts[:extra_qsub]} -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \"" + qsub_str_start = "qsub -l #{conf[:qsub_args]} -cwd -sync y -b y -j y\ + -o work/out.#{shard}.#{epoch}\ + -N dtrain.#{shard}.#{epoch} \"" qsub_str_end = "\"" local_end = '' else local_end = "2>work/out.#{shard}.#{epoch}" end if per_shard_decoder_configs - cdec_cfg = "--decoder_config #{decoder_configs[shard]}" + cdec_conf = "--decoder_conf #{decoder_configs[shard]}" else - cdec_cfg = "" + cdec_conf = "" end - if first_input_weights!='' && epoch == 0 + adadelta_input = "" + adadelta_output = "" + if use_adadelta + adadelta_output = "--adadelta_output work/adadelta.#{shard}.#{epoch}" + if epoch > 0 + adadelta_input = "--adadelta_input work/adadelta.#{epoch-1}" + end + end + if first_input_weights != '' && epoch == 0 input_weights = "--input_weights #{first_input_weights}" end pids << Kernel.fork { - `#{qsub_str_start}#{dtrain_bin} -c #{ini} #{cdec_cfg} #{input_weights}\ - --input #{input_files[shard]}\ - --refs #{refs_files[shard]}\ - --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}` + `#{qsub_str_start}#{dtrain_bin} -c #{dtrain_conf} #{cdec_conf}\ + #{input_weights}\ + #{adadelta_output} #{adadelta_input}\ + --bitext #{input_files[shard]}\ + --output work/weights.#{shard}.#{epoch}.gz#{qsub_str_end} #{local_end}` } - weights_files << "work/weights.#{shard}.#{epoch}" shard += 1 remaining_shards -= 1 } pids.each { |pid| Process.wait(pid) } pids.clear end - `#{cat} work/weights.*.#{epoch} > work/weights_cat` - `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` + `zcat work/weights.*.#{epoch}.gz \ + | ruby #{lplp_rb} #{lplp_args} #{num_shards} \ + | gzip -c \ + > work/weights.#{epoch}.gz` + if use_adadelta + h = {} + ReadFile.readlines_strip("work/weights.#{epoch}.gz").map { |line| + h[line.split.first] = true + } + max = (2**(0.size * 8 -2) -1) + ["gradient", "update"].each { |i| + `zcat work/adadelta.*.#{epoch}.#{i}.gz \ + | ruby #{lplp_rb} l0 select_k #{max} #{num_shards} \ + | gzip -c \ + > work/adadelta_avg.#{i}.gz` + o = WriteFile.new "work/adadelta.#{epoch}.#{i}.gz" + ReadFile.readlines_strip("work/adadelta_avg.#{i}.gz").each { |line| + k,v = line.split + if h.has_key? k + o.write "#{k} #{v}\n" + end + } + `rm work/adadelta_avg.#{i}.gz` + o.close + } + end if rand and reshard and epoch+1!=epochs - input_files, refs_files, num_shards = make_shards input, refs, num_shards, epoch+1, rand + input_files, num_shards = make_shards input, num_shards, epoch+1, rand end } -`rm work/weights_cat` - diff --git a/training/dtrain/sample.h b/training/dtrain/sample.h new file mode 100644 index 00000000..860904fd --- /dev/null +++ b/training/dtrain/sample.h @@ -0,0 +1,131 @@ +#ifndef _DTRAIN_SAMPLE_H_ +#define _DTRAIN_SAMPLE_H_ + +#include "kbest.h" +#include "hg_sampler.h" + +#include "score.h" + +namespace dtrain +{ + +struct HypSampler : public DecoderObserver +{ + size_t feature_count, effective_size; + vector<Hyp> sample; + vector<Ngrams>* reference_ngrams; + vector<size_t>* reference_lengths; + + void + reset() + { + sample.clear(); + effective_size = feature_count = 0; + } +}; + +struct KBestSampler : public HypSampler +{ + size_t k; + bool unique; + Scorer* scorer; + + KBestSampler() {} + KBestSampler(const size_t k, Scorer* scorer) : + k(k), scorer(scorer) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg) + { + reset(); + KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, + KBest::FilterUnique, prob_t, EdgeProb> kbest(*hg, k); + for (size_t i=0; i<k; ++i) { + KBest::KBestDerivations<vector<WordID>, ESentenceTraversal, + KBest::FilterUnique, prob_t, EdgeProb>::Derivation* d = + kbest.LazyKthBest(hg->nodes_.size() - 1, i); + if (!d) break; + sample.emplace_back( + d->yield, + d->feature_values, + log(d->score), + scorer->score(d->yield, *reference_ngrams, *reference_lengths), + i + ); + effective_size++; + feature_count += sample.back().f.size(); + } + } +}; + +struct KBestNoFilterSampler : public KBestSampler +{ + size_t k; + bool unique; + Scorer* scorer; + + KBestNoFilterSampler(const size_t k, Scorer* scorer) : + k(k), scorer(scorer) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg) + { + reset(); + KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(*hg, k); + for (size_t i=0; i<k; ++i) { + const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg->nodes_.size() - 1, i); + if (!d) break; + sample.emplace_back( + d->yield, + d->feature_values, + log(d->score), + scorer->score(d->yield, *reference_ngrams, *reference_lengths), + i + ); + effective_size++; + feature_count += sample.back().f.size(); + } + } +}; + +struct KSampler : public HypSampler +{ + const size_t k; + Scorer* scorer; + MT19937 rng; + + explicit KSampler(const unsigned k, Scorer* scorer) : + k(k), scorer(scorer) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg) + { + reset(); + std::vector<HypergraphSampler::Hypothesis> hs; + HypergraphSampler::sample_hypotheses(*hg, k, &rng, &hs); + for (size_t i=0; i<k; ++i) { + sample.emplace_back( + hs[i].words, + hs[i].fmap, + log(hs[i].model_score), + 0, + 0 + ); + effective_size++; + feature_count += sample.back().f.size(); + } + sort(sample.begin(), sample.end(), [](Hyp first, Hyp second) { + return first.model > second.model; + }); + for (unsigned i=0; i<sample.size(); i++) { + sample[i].rank=i; + scorer->score(sample[i].w, *reference_ngrams, *reference_lengths); + } + } +}; + +} // namespace + +#endif + diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc deleted file mode 100644 index 127f34d2..00000000 --- a/training/dtrain/score.cc +++ /dev/null @@ -1,283 +0,0 @@ -#include "score.h" - -namespace dtrain -{ - - -/* - * bleu - * - * as in "BLEU: a Method for Automatic Evaluation - * of Machine Translation" - * (Papineni et al. '02) - * - * NOTE: 0 if for one n \in {1..N} count is 0 - */ -score_t -BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len) -{ - if (hyp_len == 0 || ref_len == 0) return 0.; - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; - sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -score_t -BleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - return Bleu(counts, hyp_len, ref_len); -} - -/* - * 'stupid' bleu - * - * as in "ORANGE: a Method for Evaluating - * Automatic Evaluation Metrics - * for Machine Translation" - * (Lin & Och '04) - * - * NOTE: 0 iff no 1gram match ('grounded') - */ -score_t -StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0, add = 0; - for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; - if (i == 1) add = 1; - sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -/* - * fixed 'stupid' bleu - * - * as in "Optimizing for Sentence-Level BLEU+1 - * Yields Short Translations" - * (Nakov et al. '12) - */ -score_t -FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0, add = 0; - for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; - if (i == 1) add = 1; - sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); - } - return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix -} - -/* - * smooth bleu - * - * as in "An End-to-End Discriminative Approach - * to Machine Translation" - * (Liang et al. '06) - * - * NOTE: max is 0.9375 (with N=4) - */ -score_t -SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - vector<score_t> i_bleu; - for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.); - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) { - break; - } else { - score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]); - for (unsigned j = i; j < M; j++) { - i_bleu[j] += (1/((score_t)j+1)) * i_ng; - } - } - sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i)); - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' bleu - * - * sum up Ngram precisions - */ -score_t -SumBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (exp) bleu - * - * sum up exp(Ngram precisions) - */ -score_t -SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (whatever) bleu - * - * sum up exp(weight * log(Ngram precisions)) - */ -score_t -SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - vector<score_t> v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * approx. bleu - * - * as in "Online Large-Margin Training of Syntactic - * and Structural Translation Features" - * (Chiang et al. '08) - * - * NOTE: Needs some more code in dtrain.cc . - * No scaling by src len. - */ -score_t -ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned rank, const unsigned src_len) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (ref_len == 0) return 0.; - score_t score = 0.; - NgramCounts counts(N_); - if (hyp_len > 0) { - counts = make_ngram_counts(hyp, ref, N_); - NgramCounts tmp = glob_onebest_counts_ + counts; - score = Bleu(tmp, hyp_len, ref_len); - } - if (rank == 0) { // 'context of 1best translations' - glob_onebest_counts_ += counts; - glob_onebest_counts_ *= discount_; - glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len); - glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); - glob_src_len_ = discount_ * (glob_src_len_ + src_len); - } - return score; -} - -/* - * Linear (Corpus) Bleu - * - * as in "Lattice Minimum Bayes-Risk Decoding - * for Statistical Machine Translation" - * (Tromble et al. '08) - * - */ -score_t -LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref, - const unsigned rank, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (ref_len == 0) return 0.; - unsigned M = N_; - if (ref_len < N_) M = ref_len; - NgramCounts counts(M); - if (hyp_len > 0) - counts = make_ngram_counts(hyp, ref, M); - score_t ret = 0.; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break; - ret += counts.sum_[i]/onebest_counts_.sum_[i]; - } - ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret; - if (rank == 0) { - onebest_len_ += hyp_len; - onebest_counts_ += counts; - } - return ret; -} - - -} // namespace - diff --git a/training/dtrain/score.h b/training/dtrain/score.h index 1cdd3fa9..748b3317 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -6,215 +6,480 @@ namespace dtrain { - struct NgramCounts { - unsigned N_; - map<unsigned, score_t> clipped_; - map<unsigned, score_t> sum_; + size_t N_; + map<size_t, weight_t> clipped; + map<size_t, weight_t> sum; + + NgramCounts() {} - NgramCounts(const unsigned N) : N_(N) { Zero(); } + NgramCounts(const size_t N) : N_(N) { zero(); } inline void operator+=(const NgramCounts& rhs) { - if (rhs.N_ > N_) Resize(rhs.N_); - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] += rhs.clipped_.find(i)->second; - this->sum_[i] += rhs.sum_.find(i)->second; + if (rhs.N_ > N_) resize(rhs.N_); + for (size_t i = 0; i < N_; i++) { + this->clipped[i] += rhs.clipped.find(i)->second; + this->sum[i] += rhs.sum.find(i)->second; } } - inline const NgramCounts - operator+(const NgramCounts &other) const - { - NgramCounts result = *this; - result += other; - return result; - } - inline void - operator*=(const score_t rhs) + operator*=(const weight_t rhs) { - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] *= rhs; - this->sum_[i] *= rhs; + for (size_t i=0; i<N_; i++) { + this->clipped[i] *= rhs; + this->sum[i] *= rhs; } } inline void - Add(const unsigned count, const unsigned ref_count, const unsigned i) + add(const size_t count, + const size_t count_ref, + const size_t i) { assert(i < N_); - if (count > ref_count) { - clipped_[i] += ref_count; + if (count > count_ref) { + clipped[i] += count_ref; } else { - clipped_[i] += count; + clipped[i] += count; } - sum_[i] += count; + sum[i] += count; } inline void - Zero() + zero() { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; + for (size_t i=0; i<N_; i++) { + clipped[i] = 0.; + sum[i] = 0.; } } inline void - One() + one() { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 1.; - sum_[i] = 1.; + for (size_t i=0; i<N_; i++) { + clipped[i] = 1.; + sum[i] = 1.; } } inline void - Print() - { - for (unsigned i = 0; i < N_; i++) { - cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl; - cout << i+1 << "grams:\t\t\t" << sum_[i] << endl; - } - } - - inline void Resize(unsigned N) + resize(size_t N) { if (N == N_) return; else if (N > N_) { - for (unsigned i = N_; i < N; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; + for (size_t i = N_; i < N; i++) { + clipped[i] = 0.; + sum[i] = 0.; } } else { // N < N_ - for (unsigned i = N_-1; i > N-1; i--) { - clipped_.erase(i); - sum_.erase(i); + for (size_t i = N_-1; i > N-1; i--) { + clipped.erase(i); + sum.erase(i); } } N_ = N; } }; -typedef map<vector<WordID>, unsigned> Ngrams; +typedef map<vector<WordID>, size_t> Ngrams; inline Ngrams -make_ngrams(const vector<WordID>& s, const unsigned N) +ngrams(const vector<WordID>& vw, + const size_t N) { - Ngrams ngrams; + Ngrams r; vector<WordID> ng; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i=0; i<vw.size(); i++) { ng.clear(); - for (unsigned j = i; j < min(i+N, s.size()); j++) { - ng.push_back(s[j]); - ngrams[ng]++; + for (size_t j=i; j<min(i+N, vw.size()); j++) { + ng.push_back(vw[j]); + r[ng]++; } } - return ngrams; + + return r; } inline NgramCounts -make_ngram_counts(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned N) +ngram_counts(const vector<WordID>& hyp, + const vector<Ngrams>& ngrams_ref, + const size_t N) { - Ngrams hyp_ngrams = make_ngrams(hyp, N); - Ngrams ref_ngrams = make_ngrams(ref, N); + Ngrams ngrams_hyp = ngrams(hyp, N); NgramCounts counts(N); - Ngrams::iterator it; - Ngrams::iterator ti; - for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { - ti = ref_ngrams.find(it->first); - if (ti != ref_ngrams.end()) { - counts.Add(it->second, ti->second, it->first.size() - 1); - } else { - counts.Add(it->second, 0, it->first.size() - 1); + Ngrams::iterator it, ti; + for (it = ngrams_hyp.begin(); it != ngrams_hyp.end(); it++) { + size_t max_ref_count = 0; + for (auto r: ngrams_ref) { + ti = r.find(it->first); + if (ti != r.end()) + max_ref_count = max(max_ref_count, ti->second); } + counts.add(it->second, min(it->second, max_ref_count), it->first.size()-1); } + return counts; } -struct BleuScorer : public LocalScorer +class Scorer { - score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; + protected: + const size_t N_; + vector<weight_t> w_; + + public: + Scorer(size_t n): N_(n) + { + for (size_t i = 1; i <= N_; i++) + w_.push_back(1.0/N_); + } -struct StupidBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} -}; + inline bool + init(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths, + size_t& hl, + size_t& rl, + size_t& M, + vector<weight_t>& v, + NgramCounts& counts) + { + hl = hyp.size(); + if (hl == 0) + return false; + rl = best_match_length(hl, reference_lengths); + if (rl == 0) + return false; + counts = ngram_counts(hyp, reference_ngrams, N_); + if (rl < N_) { + M = rl; + for (size_t i = 0; i < M; i++) v.push_back(1/((weight_t)M)); + } else { + M = N_; + v = w_; + } -struct FixedStupidBleuScorer : public LocalScorer -{ - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} + return true; + } + + inline weight_t + brevity_penalty(const size_t hl, + const size_t rl) + { + if (hl > rl) + return 1; + + return exp(1 - (weight_t)rl/hl); + } + + inline size_t + best_match_length(const size_t hl, + const vector<size_t>& reference_lengths) + { + size_t m; + if (reference_lengths.size() == 1) { + m = reference_lengths.front(); + } else { + size_t i = 0, best_idx = 0; + size_t best = numeric_limits<size_t>::max(); + for (auto l: reference_lengths) { + size_t d = abs(hl-l); + if (d < best) { + best_idx = i; + best = d; + } + i += 1; + } + m = reference_lengths[best_idx]; + } + + return m; + } + + virtual weight_t + score(const vector<WordID>&, + const vector<Ngrams>&, + const vector<size_t>&) = 0; + + void + update_context(const vector<WordID>& /*hyp*/, + const vector<Ngrams>& /*reference_ngrams*/, + const vector<size_t>& /*reference_lengths*/, + weight_t /*decay*/) {} }; -struct SmoothBleuScorer : public LocalScorer +/* + * ['fixed'] per-sentence BLEU + * simply add 'fix' (1) to reference length for calculation of BP + * to avoid short translations + * + * as in "Optimizing for Sentence-Level BLEU+1 + * Yields Short Translations" + * (Nakov et al. '12) + * + */ +class NakovBleuScorer : public Scorer { - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} + weight_t fix; + + public: + NakovBleuScorer(size_t n, weight_t fix) : Scorer(n), fix(fix) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + if (!init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts)) + return 0.; + weight_t sum=0, add=0; + for (size_t i=0; i<M; i++) { + if (i == 0 && (counts.sum[i]==0 || counts.clipped[i]==0)) return 0.; + if (i > 0) add = 1; + sum += v[i] * log(((weight_t)counts.clipped[i] + add) + / ((counts.sum[i] + add))); + } + + return brevity_penalty(hl, rl+1) * exp(sum); + } }; -struct SumBleuScorer : public LocalScorer +/* + * BLEU + * 0 if for one n \in {1..N} count is 0 + * + * as in "BLEU: a Method for Automatic Evaluation + * of Machine Translation" + * (Papineni et al. '02) + * + */ +class PapineniBleuScorer : public Scorer { - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} + public: + PapineniBleuScorer(size_t n) : Scorer(n) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + if (!init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts)) + return 0.; + weight_t sum = 0; + for (size_t i=0; i<M; i++) { + if (counts.sum[i] == 0 || counts.clipped[i] == 0) return 0.; + sum += v[i] * log((weight_t)counts.clipped[i]/counts.sum[i]); + } + + return brevity_penalty(hl, rl) * exp(sum); + } }; -struct SumExpBleuScorer : public LocalScorer +/* + * original BLEU+1 + * 0 iff no 1gram match ('grounded') + * + * as in "ORANGE: a Method for Evaluating + * Automatic Evaluation Metrics + * for Machine Translation" + * (Lin & Och '04) + * + */ +class LinBleuScorer : public Scorer { - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {} + public: + LinBleuScorer(size_t n) : Scorer(n) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + if (!init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts)) + return 0.; + weight_t sum=0, add=0; + for (size_t i=0; i<M; i++) { + if (i == 0 && (counts.sum[i]==0 || counts.clipped[i]==0)) return 0.; + if (i == 1) add = 1; + sum += v[i] * log(((weight_t)counts.clipped[i] + add) + / ((counts.sum[i] + add))); + } + + return brevity_penalty(hl, rl) * exp(sum); + } }; -struct SumWhateverBleuScorer : public LocalScorer +/* + * smooth BLEU + * max is 0.9375 (with N=4) + * + * as in "An End-to-End Discriminative Approach + * to Machine Translation" + * (Liang et al. '06) + * + */ +class LiangBleuScorer : public Scorer { - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/); - void Reset() {}; + public: + LiangBleuScorer(size_t n) : Scorer(n) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl=hyp.size(), rl=best_match_length(hl, reference_lengths); + if (hl == 0 || rl == 0) return 0.; + NgramCounts counts = ngram_counts(hyp, reference_ngrams, N_); + size_t M = N_; + if (rl < N_) M = rl; + weight_t sum = 0.; + vector<weight_t> i_bleu; + for (size_t i=0; i<M; i++) + i_bleu.push_back(0.); + for (size_t i=0; i<M; i++) { + if (counts.sum[i]==0 || counts.clipped[i]==0) { + break; + } else { + weight_t i_score = log((weight_t)counts.clipped[i]/counts.sum[i]); + for (size_t j=i; j<M; j++) { + i_bleu[j] += (1/((weight_t)j+1)) * i_score; + } + } + sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i)); + } + + return brevity_penalty(hl, rl) * sum; + } }; -struct ApproxBleuScorer : public BleuScorer +/* + * approx. bleu + * Needs some more code in dtrain.cc . + * We do not scale by source length, as hypotheses are compared only + * within single k-best lists, not globally (as in batch algorithms). + * TODO: reset after one iteration? + * TODO: maybe scale by source length? + * + * as in "Online Large-Margin Training of Syntactic + * and Structural Translation Features" + * (Chiang et al. '08) + * + */ +class ChiangBleuScorer : public Scorer { - NgramCounts glob_onebest_counts_; - unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; - score_t discount_; - - ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) - { - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; - } + private: + NgramCounts context; + weight_t hyp_sz_sum; + weight_t ref_sz_sum; + + public: + ChiangBleuScorer(size_t n) : + Scorer(n), context(n), hyp_sz_sum(0), ref_sz_sum(0) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + if (!init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts)) + return 0.; + counts += context; + weight_t sum = 0; + for (size_t i = 0; i < M; i++) { + if (counts.sum[i]==0 || counts.clipped[i]==0) return 0.; + sum += v[i] * log((weight_t)counts.clipped[i] / counts.sum[i]); + } - inline void Reset() { - glob_onebest_counts_.Zero(); - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; - } + return brevity_penalty(hyp_sz_sum+hl, ref_sz_sum+rl) * exp(sum); + } - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len); + void + update_context(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths, + weight_t decay=0.9) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts); + + context += counts; + context *= decay; + hyp_sz_sum += hl; + hyp_sz_sum *= decay; + ref_sz_sum += rl; + ref_sz_sum *= decay; + } }; -struct LinearBleuScorer : public BleuScorer +/* + * 'sum' bleu + * + * Merely sum up Ngram precisions + */ +class SumBleuScorer : public Scorer { - unsigned onebest_len_; - NgramCounts onebest_counts_; - - LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N) - { - onebest_counts_.One(); - } - - score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned /*src_len*/); + public: + SumBleuScorer(size_t n) : Scorer(n) {} + + weight_t + score(const vector<WordID>& hyp, + const vector<Ngrams>& reference_ngrams, + const vector<size_t>& reference_lengths) + { + size_t hl, rl, M; + vector<weight_t> v; + NgramCounts counts; + if (!init(hyp, reference_ngrams, reference_lengths, hl, rl, M, v, counts)) + return 0.; + weight_t sum = 0.; + size_t j = 1; + for (size_t i=0; i<M; i++) { + if (counts.sum[i]==0 || counts.clipped[i]==0) break; + sum += ((weight_t)counts.clipped[i]/counts.sum[i]) + / pow(2.0, (weight_t) (N_-j+1)); + //sum += exp(((score_t)counts.clipped[i]/counts.sum[i])) + // / pow(2.0, (weight_t) (N_-j+1)); + //sum += exp(v[i] * log(((score_t)counts.clipped[i]/counts.sum[i]))) + // / pow(2.0, (weight_t) (N_-j+1)); + j++; + } - inline void Reset() { - onebest_len_ = 1; - onebest_counts_.One(); - } + return brevity_penalty(hl, rl) * sum; + } }; +/* + * Linear (Corpus) Bleu + * TODO + * + * as in "Lattice Minimum Bayes-Risk Decoding + * for Statistical Machine Translation" + * (Tromble et al. '08) + * or "Hope and fear for discriminative training of + * statistical translation models" + * (Chiang '12) + * + */ } // namespace diff --git a/training/dtrain/update.h b/training/dtrain/update.h new file mode 100644 index 00000000..405a3f76 --- /dev/null +++ b/training/dtrain/update.h @@ -0,0 +1,235 @@ +#ifndef _DTRAIN_UPDATE_H_ +#define _DTRAIN_UPDATE_H_ + +namespace dtrain +{ + +/* + * multipartite [multi=3] ranking + * partitions are determined by the 'cut' parameter + * 0. sort sample (descending) by bleu + * 1. compare top X(=sz*cut) to middle Y(=sz-2*(sz*cut)) and bottom X + * -"- middle Y to bottom X + * + */ +inline size_t +updates_multipartite(vector<Hyp>* sample, + SparseVector<weight_t>& updates, + weight_t cut, + weight_t margin, + size_t max_up, + weight_t threshold, + bool adjust, + WriteFile& output, + size_t id) +{ + size_t up = 0; + size_t sz = sample->size(); + if (sz < 2) return 0; + sort(sample->begin(), sample->end(), [](Hyp first, Hyp second) + { + return first.gold > second.gold; + }); + size_t sep = round(sz*cut); + + size_t sep_hi = sep; + if (adjust) { + if (sz > 4) { + while (sep_hi<sz && (*sample)[sep_hi-1].gold==(*sample)[sep_hi].gold) + ++sep_hi; + } else { + sep_hi = 1; + } + } + for (size_t i = 0; i < sep_hi; i++) { + for (size_t j = sep_hi; j < sz; j++) { + Hyp& first=(*sample)[i], second=(*sample)[j]; + if ((first.model-second.model)>margin + || (first.gold==second.gold) + || (threshold && (first.gold-second.gold < threshold))) + continue; + if (output) + *output << id << "\t" << first.f-second.f << endl; + updates += first.f-second.f; + if (++up==max_up) + return up; + } + } + + size_t sep_lo = sz-sep; + if (adjust) { + while (sep_lo>0 && (*sample)[sep_lo-1].gold==(*sample)[sep_lo].gold) + --sep_lo; + } + for (size_t i = sep_hi; i < sep_lo; i++) { + for (size_t j = sep_lo; j < sz; j++) { + Hyp& first=(*sample)[i], second=(*sample)[j]; + if ((first.model-second.model)>margin + || (first.gold==second.gold) + || (threshold && (first.gold-second.gold < threshold))) + continue; + if (output) + *output << id << "\t" << first.f-second.f << endl; + updates += first.f-second.f; + if (++up==max_up) + break; + } + } + + return up; +} + +/* + * all pairs + * only ignore a pair if gold scores are + * identical + * + */ +inline size_t +updates_all(vector<Hyp>* sample, + SparseVector<weight_t>& updates, + size_t max_up, + weight_t threshold, + WriteFile output, + size_t id) +{ + size_t up = 0; + size_t sz = sample->size(); + sort(sample->begin(), sample->end(), [](Hyp first, Hyp second) + { + return first.gold > second.gold; + }); + for (size_t i = 0; i < sz-1; i++) { + for (size_t j = i+1; j < sz; j++) { + Hyp& first=(*sample)[i], second=(*sample)[j]; + if ((first.gold == second.gold) + || (threshold && (first.gold-second.gold < threshold))) + continue; + if (output) + *output << id << "\t" << first.f-second.f << endl; + updates += first.f-second.f; + if (++up==max_up) + break; + } + } + + return up; +} + +/* + * hope/fear + * just one pair: hope - fear + * + */ +inline size_t +update_structured(vector<Hyp>* sample, + SparseVector<weight_t>& updates, + weight_t margin, + WriteFile output, + size_t id) +{ + // hope + sort(sample->begin(), sample->end(), [](Hyp first, Hyp second) + { + return (first.model+first.gold) > (second.model+second.gold); + }); + Hyp hope = (*sample)[0]; + // fear + sort(sample->begin(), sample->end(), [](Hyp first, Hyp second) + { + return (first.model-first.gold) > (second.model-second.gold); + }); + Hyp fear = (*sample)[0]; + + if (hope.gold != fear.gold) { + updates += hope.f - fear.f; + if (output) + *output << id << "\t" << hope.f << "\t" << fear.f << endl; + + return 1; + } + + if (output) + *output << endl; + + return 0; +} + + +/* + * pair sampling as in + * 'Tuning as Ranking' (Hopkins & May, 2011) + * count = 5000 [maxs] + * threshold = 5% BLEU [threshold=0.05] + * cut = top 50 [max_up] + */ +inline size_t +updates_pro(vector<Hyp>* sample, + SparseVector<weight_t>& updates, + size_t maxs, + size_t max_up, + weight_t threshold, + WriteFile& output, + size_t id) +{ + + size_t sz = sample->size(), s; + vector<pair<Hyp*,Hyp*> > g; + while (s < maxs) { + size_t i=rand()%sz, j=rand()%sz; + Hyp& first=(*sample)[i], second=(*sample)[j]; + if (i==j || fabs(first.gold-second.gold)<threshold) + continue; + if (first.gold > second.gold) + g.emplace_back(make_pair(&first,&second)); + else + g.emplace_back(make_pair(&second,&first)); + s++; + } + + if (g.size() > max_up) { + sort(g.begin(), g.end(), [](pair<Hyp*,Hyp*> a, pair<Hyp*,Hyp*> b) + { + return fabs(a.first->gold-a.second->gold) + > fabs(b.first->gold-b.second->gold); + }); + g.erase(g.begin()+max_up, g.end()); + } + + for (auto i: g) { + if (output) + *output << id << "\t" << i.first->f-i.second->f << endl; + updates += i.first->f-i.second->f; + } + + return g.size(); +} + +/* + * output (sorted) items in sample (k-best list) + * + */ +inline void +output_sample(vector<Hyp>* sample, + WriteFile& output, + size_t id=0, + bool sorted=true) +{ + if (sorted) { + sort(sample->begin(), sample->end(), [](Hyp first, Hyp second) + { + return first.gold > second.gold; + }); + } + size_t j = 0; + for (auto k: *sample) { + *output << id << "\t" << j << "\t" << k.gold << "\t" << k.model + << "\t" << k.f << endl; + j++; + } +} + +} // namespace + +#endif + diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 5d8385c2..353ebe0e 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -96,7 +96,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("weights_output,O",po::value<string>(),"Directory to write weights to") ("output_dir,D",po::value<string>(),"Directory to place output in") ("decoder_config,c",po::value<string>(),"Decoder configuration file") - ("verbose,v",po::value<bool>()->zero_tokens(),"verbose stderr output"); + ("verbose,v",po::value<bool>()->zero_tokens(),"Verbose stderr output"); po::options_description clo("Command line options"); clo.add_options() ("config", po::value<string>(), "Configuration file") @@ -104,7 +104,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description dconfig_options, dcmdline_options; dconfig_options.add(opts); dcmdline_options.add(opts).add(clo); - + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); if (conf->count("config")) { ifstream config((*conf)["config"].as<string>().c_str()); @@ -229,14 +229,15 @@ void CuttingPlane(vector<boost::shared_ptr<HypothesisInfo> >* cur_c, bool* again } -double ComputeDelta(vector<boost::shared_ptr<HypothesisInfo> >* cur_p, double max_step_size,vector<weight_t> dense_weights ) +double ComputeDelta(vector<boost::shared_ptr<HypothesisInfo> >* cur_p, double max_step_size,vector<weight_t> dense_weights, bool verbose = true ) { vector<boost::shared_ptr<HypothesisInfo> >& cur_pair = *cur_p; double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss; double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights)); const double num = margin + loss; - cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl; + if (verbose) + cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <<endl; SparseVector<double> diff = cur_pair[0]->features; @@ -704,7 +705,8 @@ int main(int argc, char** argv) { SparseVector<double> old_lambdas = lambdas; tot.clear(); tot += lambdas; - cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; + if (VERBOSE) + cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; ScoreP acc, acc_h, acc_f; while(*in) { @@ -841,7 +843,7 @@ int main(int argc, char** argv) { cur_pair.clear(); cur_pair.push_back(cur_constraint[j]); cur_pair.push_back(cur_constraint[i]); - double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights, VERBOSE); if (delta == 0) optimize_again = false; cur_constraint[j]->alpha += delta; @@ -865,7 +867,7 @@ int main(int argc, char** argv) { } else if(optimizer == 2 || optimizer == 3) //PA and Cutting Plane MIRA update { - bool DEBUG_SMO= true; + bool DEBUG_SMO= false; vector<boost::shared_ptr<HypothesisInfo> > cur_constraint; cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set bool optimize_again = true; @@ -914,7 +916,7 @@ int main(int argc, char** argv) { continue; } //pair is undefined so we are done with this smo - double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights, VERBOSE); cur_pair[0]->alpha += delta; cur_pair[1]->alpha -= delta; @@ -928,7 +930,7 @@ int main(int argc, char** argv) { //reload weights based on update dense_weights.clear(); lambdas.init_vector(&dense_weights); - if (dense_weights.size() < 500) + if (VERBOSE && dense_weights.size() < 500) ShowLargestFeatures(dense_weights); dense_w_local = dense_weights; iter++; @@ -968,12 +970,14 @@ int main(int argc, char** argv) { for(int u=0;u!=cur_constraint.size();u++) { - cerr << "alpha=" << cur_constraint[u]->alpha << " hope=" << cur_constraint[u]->hope << " fear=" << cur_constraint[u]->fear << endl; + if (VERBOSE) + cerr << "alpha=" << cur_constraint[u]->alpha << " hope=" << cur_constraint[u]->hope << " fear=" << cur_constraint[u]->fear << endl; temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear; } objective += temp_objective; - cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; + if (VERBOSE) + cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; } diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc index 2868de0c..07718a7f 100644 --- a/training/mira/kbest_mira.cc +++ b/training/mira/kbest_mira.cc @@ -57,7 +57,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("sample_forest,f", "Instead of a k-best list, sample k hypotheses from the decoder's forest") ("sample_forest_unit_weight_vector,x", "Before sampling (must use -f option), rescale the weight vector used so it has unit length; this may improve the quality of the samples") ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)") - ("decoder_config,c",po::value<string>(),"Decoder configuration file"); + ("decoder_config,c",po::value<string>(),"Decoder configuration file") + ("verbose,v", po::value<bool>()->zero_tokens(), "verbose stderr output"); po::options_description clo("Command line options"); clo.add_options() ("config", po::value<string>(), "Configuration file") @@ -188,6 +189,8 @@ int main(int argc, char** argv) { po::variables_map conf; if (!InitCommandLine(argc, argv, &conf)) return 1; + const bool VERBOSE = conf.count("verbose"); + if (conf.count("random_seed")) rng.reset(new MT19937(conf["random_seed"].as<uint32_t>())); else @@ -254,7 +257,8 @@ int main(int argc, char** argv) { if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; } if (corpus.size() == cur_sent) { cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n"; - Weights::ShowLargestFeatures(dense_weights); + if (VERBOSE) + Weights::ShowLargestFeatures(dense_weights); cur_sent = 0; tot_loss = 0; dots = 0; diff --git a/training/pro/pro.pl b/training/pro/pro.pl index a059477d..0517a781 100755 --- a/training/pro/pro.pl +++ b/training/pro/pro.pl @@ -73,6 +73,7 @@ if (GetOptions( "weights=s" => \$initial_weights, "devset=s" => \$devset, "jobs=i" => \$jobs, + "max-iterations=i" => \$max_iterations, "metric=s" => \$metric, "pass-suffix=s" => \$pass_suffix, "qsub" => \$useqsub, |