From 0b0616c6f7400ce52d07350f7a7054a2513d9813 Mon Sep 17 00:00:00 2001
From: Victor Chahuneau <vchahune@cs.cmu.edu>
Date: Sat, 28 Jul 2012 17:12:40 -0400
Subject: [python] Suffix array compiler can read bitext (-b)

---
 python/src/cdec.sa._sa.pxd     |    1 +
 python/src/sa/_sa.c            | 4291 +++++++++++++++++++++++++---------------
 python/src/sa/data_array.pxi   |   38 +-
 python/src/sa/suffix_array.pxi |    8 +-
 4 files changed, 2762 insertions(+), 1576 deletions(-)
 create mode 120000 python/src/cdec.sa._sa.pxd

(limited to 'python/src')

diff --git a/python/src/cdec.sa._sa.pxd b/python/src/cdec.sa._sa.pxd
new file mode 120000
index 00000000..3613f643
--- /dev/null
+++ b/python/src/cdec.sa._sa.pxd
@@ -0,0 +1 @@
+sa/_sa.pxd
\ No newline at end of file
diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c
index b7f3627a..0f9b0d22 100644
--- a/python/src/sa/_sa.c
+++ b/python/src/sa/_sa.c
@@ -1,4 +1,4 @@
-/* Generated by Cython 0.17.beta1 on Fri Jul 27 23:31:04 2012 */
+/* Generated by Cython 0.17.beta1 on Sat Jul 28 17:07:03 2012 */
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
@@ -383,6 +383,7 @@ static const char *__pyx_f[] = {
 
 /*--- Type declarations ---*/
 struct __pyx_obj_3_sa_HieroCachingRuleFactory;
+struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats;
 struct __pyx_obj_3_sa_IntList;
 struct __pyx_obj_3_sa_VEBIterator;
 struct __pyx_obj_3_sa_BiLex;
@@ -390,23 +391,24 @@ struct __pyx_obj_3_sa_VEB;
 struct __pyx_obj_3_sa_LCP;
 struct __pyx_obj_3_sa_DataArray;
 struct __pyx_obj_3_sa_BitSetIterator;
+struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext;
 struct __pyx_obj_3_sa_Precomputation;
 struct __pyx_obj_3_sa_SuffixArray;
+struct __pyx_obj_3_sa___pyx_scope_struct_4_input;
 struct __pyx_obj_3_sa_Alphabet;
 struct __pyx_obj_3_sa_Rule;
 struct __pyx_obj_3_sa_PhraseLocation;
-struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats;
-struct __pyx_obj_3_sa___pyx_scope_struct_2_input;
+struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__;
 struct __pyx_obj_3_sa_Alignment;
 struct __pyx_obj_3_sa_BitSet;
 struct __pyx_obj_3_sa_Sampler;
 struct __pyx_obj_3_sa_StringMap;
-struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__;
 struct __pyx_obj_3_sa_TrieNode;
 struct __pyx_obj_3_sa_ExtendedTrieNode;
 struct __pyx_obj_3_sa_TrieMap;
 struct __pyx_obj_3_sa_Phrase;
 struct __pyx_obj_3_sa_TrieTable;
+struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr;
 struct __pyx_obj_3_sa_FloatList;
 struct __pyx_t_3_sa__node;
 struct __pyx_t_3_sa__BitSet;
@@ -563,6 +565,37 @@ struct __pyx_obj_3_sa_HieroCachingRuleFactory {
 };
 
 
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":36
+ *         logger.info("LCP array completed")
+ * 
+ *     def compute_stats(self, int max_n):             # <<<<<<<<<<<<<<
+ *         """Note: the output of this function is not exact.  In
+ *         particular, the frequency associated with each word is
+ */
+struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats {
+  PyObject_HEAD
+  int __pyx_v_N;
+  int __pyx_v_freq;
+  int __pyx_v_h;
+  int __pyx_v_i;
+  int __pyx_v_ii;
+  int __pyx_v_iii;
+  int __pyx_v_j;
+  int __pyx_v_k;
+  int __pyx_v_max_n;
+  int __pyx_v_n;
+  PyObject *__pyx_v_ngram;
+  struct __pyx_obj_3_sa_IntList *__pyx_v_ngram_start;
+  PyObject *__pyx_v_ngram_starts;
+  int __pyx_v_rs;
+  struct __pyx_obj_3_sa_IntList *__pyx_v_run_start;
+  struct __pyx_obj_3_sa_LCP *__pyx_v_self;
+  int __pyx_v_valid;
+  struct __pyx_obj_3_sa_VEB *__pyx_v_veb;
+  int __pyx_t_0;
+};
+
+
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":9
  * from libc.string cimport memset, memcpy
  * 
@@ -676,6 +709,20 @@ struct __pyx_obj_3_sa_BitSetIterator {
 };
 
 
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71
+ *             self.read_text_data(fp)
+ * 
+ *     def read_bitext(self, char* filename, int side):             # <<<<<<<<<<<<<<
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ */
+struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext {
+  PyObject_HEAD
+  PyObject *__pyx_v_fp;
+  int __pyx_v_side;
+};
+
+
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":188
  * 
  * 
@@ -713,84 +760,6 @@ struct __pyx_obj_3_sa_SuffixArray {
 };
 
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":7
- * cdef int INDEX_MASK = (1<<INDEX_SHIFT)-1
- * 
- * cdef class Alphabet:             # <<<<<<<<<<<<<<
- *     cdef readonly StringMap terminals, nonterminals
- *     cdef int first_nonterminal, last_nonterminal
- */
-struct __pyx_obj_3_sa_Alphabet {
-  PyObject_HEAD
-  struct __pyx_vtabstruct_3_sa_Alphabet *__pyx_vtab;
-  struct __pyx_obj_3_sa_StringMap *terminals;
-  struct __pyx_obj_3_sa_StringMap *nonterminals;
-  int first_nonterminal;
-  int last_nonterminal;
-  PyObject *id2sym;
-};
-
-struct __pyx_obj_3_sa_Rule {
-  PyObject_HEAD
-  int lhs;
-  struct __pyx_obj_3_sa_Phrase *f;
-  struct __pyx_obj_3_sa_Phrase *e;
-  float *cscores;
-  int n_scores;
-  PyObject *word_alignments;
-};
-
-
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":56
- * # in the suffix array; if discontiguous, it is the set of
- * # actual locations (packed into an array)
- * cdef class PhraseLocation:             # <<<<<<<<<<<<<<
- *     cdef int sa_low
- *     cdef int sa_high
- */
-struct __pyx_obj_3_sa_PhraseLocation {
-  PyObject_HEAD
-  struct __pyx_vtabstruct_3_sa_PhraseLocation *__pyx_vtab;
-  int sa_low;
-  int sa_high;
-  int arr_low;
-  int arr_high;
-  struct __pyx_obj_3_sa_IntList *arr;
-  int num_subpatterns;
-};
-
-
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":36
- *         logger.info("LCP array completed")
- * 
- *     def compute_stats(self, int max_n):             # <<<<<<<<<<<<<<
- *         """Note: the output of this function is not exact.  In
- *         particular, the frequency associated with each word is
- */
-struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats {
-  PyObject_HEAD
-  int __pyx_v_N;
-  int __pyx_v_freq;
-  int __pyx_v_h;
-  int __pyx_v_i;
-  int __pyx_v_ii;
-  int __pyx_v_iii;
-  int __pyx_v_j;
-  int __pyx_v_k;
-  int __pyx_v_max_n;
-  int __pyx_v_n;
-  PyObject *__pyx_v_ngram;
-  struct __pyx_obj_3_sa_IntList *__pyx_v_ngram_start;
-  PyObject *__pyx_v_ngram_starts;
-  int __pyx_v_rs;
-  struct __pyx_obj_3_sa_IntList *__pyx_v_run_start;
-  struct __pyx_obj_3_sa_LCP *__pyx_v_self;
-  int __pyx_v_valid;
-  struct __pyx_obj_3_sa_VEB *__pyx_v_veb;
-  int __pyx_t_0;
-};
-
-
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":919
  *         return sorted(result);
  * 
@@ -798,7 +767,7 @@ struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats {
  *         '''When this function is called on the RuleFactory,
  *         it looks up all of the rules that can be used to translate
  */
-struct __pyx_obj_3_sa___pyx_scope_struct_2_input {
+struct __pyx_obj_3_sa___pyx_scope_struct_4_input {
   PyObject_HEAD
   PyObject *__pyx_v_alignment;
   PyObject *__pyx_v_als;
@@ -877,6 +846,68 @@ struct __pyx_obj_3_sa___pyx_scope_struct_2_input {
 };
 
 
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":7
+ * cdef int INDEX_MASK = (1<<INDEX_SHIFT)-1
+ * 
+ * cdef class Alphabet:             # <<<<<<<<<<<<<<
+ *     cdef readonly StringMap terminals, nonterminals
+ *     cdef int first_nonterminal, last_nonterminal
+ */
+struct __pyx_obj_3_sa_Alphabet {
+  PyObject_HEAD
+  struct __pyx_vtabstruct_3_sa_Alphabet *__pyx_vtab;
+  struct __pyx_obj_3_sa_StringMap *terminals;
+  struct __pyx_obj_3_sa_StringMap *nonterminals;
+  int first_nonterminal;
+  int last_nonterminal;
+  PyObject *id2sym;
+};
+
+struct __pyx_obj_3_sa_Rule {
+  PyObject_HEAD
+  int lhs;
+  struct __pyx_obj_3_sa_Phrase *f;
+  struct __pyx_obj_3_sa_Phrase *e;
+  float *cscores;
+  int n_scores;
+  PyObject *word_alignments;
+};
+
+
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":56
+ * # in the suffix array; if discontiguous, it is the set of
+ * # actual locations (packed into an array)
+ * cdef class PhraseLocation:             # <<<<<<<<<<<<<<
+ *     cdef int sa_low
+ *     cdef int sa_high
+ */
+struct __pyx_obj_3_sa_PhraseLocation {
+  PyObject_HEAD
+  struct __pyx_vtabstruct_3_sa_PhraseLocation *__pyx_vtab;
+  int sa_low;
+  int sa_high;
+  int arr_low;
+  int arr_high;
+  struct __pyx_obj_3_sa_IntList *arr;
+  int num_subpatterns;
+};
+
+
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":141
+ *         return self.syms[i]
+ * 
+ *     def __iter__(self):             # <<<<<<<<<<<<<<
+ *         cdef int i
+ *         for i from 0 <= i < self.n:
+ */
+struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ {
+  PyObject_HEAD
+  int __pyx_v_i;
+  struct __pyx_obj_3_sa_Phrase *__pyx_v_self;
+  int __pyx_t_0;
+};
+
+
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":8
  * # May need to revisit if things get really tight, though.
  * 
@@ -933,21 +964,6 @@ struct __pyx_obj_3_sa_StringMap {
 };
 
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":141
- *         return self.syms[i]
- * 
- *     def __iter__(self):             # <<<<<<<<<<<<<<
- *         cdef int i
- *         for i from 0 <= i < self.n:
- */
-struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ {
-  PyObject_HEAD
-  int __pyx_v_i;
-  struct __pyx_obj_3_sa_Phrase *__pyx_v_self;
-  int __pyx_t_0;
-};
-
-
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":18
  * cdef int EPSILON = sym_fromstring('*EPS*', True)
  * 
@@ -1021,6 +1037,23 @@ struct __pyx_obj_3_sa_TrieTable {
 };
 
 
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73
+ *     def read_bitext(self, char* filename, int side):
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)             # <<<<<<<<<<<<<<
+ *             self.read_text_data(data)
+ * 
+ */
+struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr {
+  PyObject_HEAD
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *__pyx_outer_scope;
+  PyObject *__pyx_v_line;
+  PyObject *__pyx_t_0;
+  Py_ssize_t __pyx_t_1;
+  PyObject *(*__pyx_t_2)(PyObject *);
+};
+
+
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":9
  * from libc.string cimport memset, strcpy, strlen
  * 
@@ -1539,6 +1572,8 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Append(PyObject* L, PyObject* x) {
 
 static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 
+static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname);
+
 static CYTHON_INLINE long __Pyx_div_long(long, long); /* proto */
 
 static CYTHON_INLINE long __Pyx_mod_long(long, long); /* proto */
@@ -1746,6 +1781,18 @@ static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb);
 
 static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level); /*proto*/
 
+#include <string.h>
+
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
+
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
+
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
+#else
+#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
+#endif
+
 static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject *);
 
 static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject *);
@@ -1868,9 +1915,11 @@ static PyTypeObject *__pyx_ptype_3_sa_TrieTable = 0;
 static PyTypeObject *__pyx_ptype_3_sa_PhraseLocation = 0;
 static PyTypeObject *__pyx_ptype_3_sa_Sampler = 0;
 static PyTypeObject *__pyx_ptype_3_sa_HieroCachingRuleFactory = 0;
-static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct__compute_stats = 0;
-static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_1___iter__ = 0;
-static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_2_input = 0;
+static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct__read_bitext = 0;
+static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr = 0;
+static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats = 0;
+static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_3___iter__ = 0;
+static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_4_input = 0;
 static int __pyx_v_3_sa_MIN_BOTTOM_SIZE;
 static int __pyx_v_3_sa_MIN_BOTTOM_BITS;
 static int __pyx_v_3_sa_LOWER_MASK[32];
@@ -1957,7 +2006,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_28write(struct __pyx_obj_3_sa_IntList *_
 static PyObject *__pyx_pf_3_sa_7IntList_30read(struct __pyx_obj_3_sa_IntList *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static int __pyx_pf_3_sa_9StringMap___cinit__(struct __pyx_obj_3_sa_StringMap *__pyx_v_self); /* proto */
 static void __pyx_pf_3_sa_9StringMap_2__dealloc__(struct __pyx_obj_3_sa_StringMap *__pyx_v_self); /* proto */
-static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, int __pyx_v_use_sent_id); /* proto */
+static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side, int __pyx_v_use_sent_id); /* proto */
 static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self); /* proto */
 static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */
 static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */
@@ -1966,10 +2015,13 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr
 static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_id); /* proto */
 static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
-static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
-static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
-static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f); /* proto */
-static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(PyObject *__pyx_self); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename, int __pyx_v_side); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_data); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f); /* proto */
+static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static PyObject *__pyx_pf_3_sa_9Alignment_unlink(CYTHON_UNUSED struct __pyx_obj_3_sa_Alignment *__pyx_v_self, PyObject *__pyx_v_link); /* proto */
 static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_Alignment *__pyx_v_self, int __pyx_v_sent_id); /* proto */
 static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text); /* proto */
@@ -2061,12 +2113,12 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom
 static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, PyObject *__pyx_v_stats, struct __pyx_obj_3_sa_SuffixArray *__pyx_v_sarray); /* proto */
-static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text); /* proto */
+static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_2__getitem__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_4getSentId(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_6getSent(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_loc); /* proto */
-static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
+static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_filename, PyObject *__pyx_v_side); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, int __pyx_v_i, int __pyx_v_j, int __pyx_v_h, struct __pyx_obj_3_sa_IntList *__pyx_v_isa, PyObject *__pyx_v_pad); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
 static PyObject *__pyx_pf_3_sa_11SuffixArray_16read_binary(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */
@@ -2121,49 +2173,47 @@ static char __pyx_k_8[] = "Requested index %d:%d of %d-length IntList";
 static char __pyx_k_9[] = "Illegal key type %s for IntList";
 static char __pyx_k_13[] = "%s ";
 static char __pyx_k_14[] = "\n";
-static char __pyx_k_18[] = "%d ";
-static char __pyx_k_22[] = "%s %d ";
-static char __pyx_k_24[] = "write_enhanced_handle";
-static char __pyx_k_28[] = "-";
-static char __pyx_k_32[] = "%d-%d ";
-static char __pyx_k_39[] = "%d-%d out of bounds (I=%d,J=%d) in line %d\n";
-static char __pyx_k_42[] = "";
-static char __pyx_k_43[] = "Sort error in CLex";
-static char __pyx_k_45[] = "    ";
-static char __pyx_k_47[] = "%d %f %f ";
-static char __pyx_k_49[] = "%d %s ";
-static char __pyx_k_53[] = "%s %s %.6f %.6f\n";
-static char __pyx_k_55[] = "  (";
-static char __pyx_k_56[] = ")";
-static char __pyx_k_57[] = "Constructing LCP array";
-static char __pyx_k_59[] = "LCP array completed";
-static char __pyx_k_61[] = "[%s,%d]";
-static char __pyx_k_62[] = "[%s]";
-static char __pyx_k_63[] = "\\";
-static char __pyx_k_64[] = " ";
-static char __pyx_k_65[] = "Invalid LHS symbol: %d";
-static char __pyx_k_66[] = "%d-%d";
-static char __pyx_k_67[] = " ||| ";
-static char __pyx_k_68[] = "precompute_secondary_rank";
-static char __pyx_k_69[] = "train_max_initial_size";
-static char __pyx_k_70[] = "Precomputing frequent intersections";
-static char __pyx_k_72[] = "    Computing inverted indexes...";
-static char __pyx_k_74[] = "    Computing collocations...";
-static char __pyx_k_76[] = "        %d sentences";
-static char __pyx_k_81[] = "X ";
-static char __pyx_k_82[] = "ERROR: unexpected pattern %s in set of precomputed collocations";
-static char __pyx_k_83[] = "RANK %d\tCOUNT, COST: %d    %d\tCUMUL: %d, %d";
-static char __pyx_k_84[] = "Precomputed collocations for %d patterns out of %d possible (upper bound %d)";
-static char __pyx_k_85[] = "Precomputed inverted index for %d patterns ";
-static char __pyx_k_86[] = "Precomputation took %f seconds";
-static char __pyx_k_87[] = "    Bucket sort took %f seconds";
-static char __pyx_k_88[] = "    Refining, sort depth = %d";
-static char __pyx_k_89[] = "    Refinement took %f seconds";
-static char __pyx_k_90[] = "    Finalizing sort...";
-static char __pyx_k_92[] = "Suffix array construction took %f seconds";
-static char __pyx_k_93[] = "Unexpected condition found in q3sort: sort from %d to %d";
-static char __pyx_k_98[] = "Sampling strategy: uniform, max sample size = %d";
-static char __pyx_k_99[] = "Sampling strategy: no sampling";
+static char __pyx_k_18[] = " ||| ";
+static char __pyx_k_21[] = "%d ";
+static char __pyx_k_25[] = "%s %d ";
+static char __pyx_k_27[] = "write_enhanced_handle";
+static char __pyx_k_31[] = "-";
+static char __pyx_k_35[] = "%d-%d ";
+static char __pyx_k_42[] = "%d-%d out of bounds (I=%d,J=%d) in line %d\n";
+static char __pyx_k_45[] = "";
+static char __pyx_k_46[] = "Sort error in CLex";
+static char __pyx_k_48[] = "    ";
+static char __pyx_k_50[] = "%d %f %f ";
+static char __pyx_k_52[] = "%d %s ";
+static char __pyx_k_56[] = "%s %s %.6f %.6f\n";
+static char __pyx_k_58[] = "  (";
+static char __pyx_k_59[] = ")";
+static char __pyx_k_60[] = "Constructing LCP array";
+static char __pyx_k_62[] = "LCP array completed";
+static char __pyx_k_64[] = "[%s,%d]";
+static char __pyx_k_65[] = "[%s]";
+static char __pyx_k_66[] = "\\";
+static char __pyx_k_67[] = " ";
+static char __pyx_k_68[] = "Invalid LHS symbol: %d";
+static char __pyx_k_69[] = "%d-%d";
+static char __pyx_k_70[] = "precompute_secondary_rank";
+static char __pyx_k_71[] = "train_max_initial_size";
+static char __pyx_k_72[] = "Precomputing frequent intersections";
+static char __pyx_k_74[] = "    Computing inverted indexes...";
+static char __pyx_k_76[] = "    Computing collocations...";
+static char __pyx_k_78[] = "        %d sentences";
+static char __pyx_k_83[] = "X ";
+static char __pyx_k_84[] = "ERROR: unexpected pattern %s in set of precomputed collocations";
+static char __pyx_k_85[] = "RANK %d\tCOUNT, COST: %d    %d\tCUMUL: %d, %d";
+static char __pyx_k_86[] = "Precomputed collocations for %d patterns out of %d possible (upper bound %d)";
+static char __pyx_k_87[] = "Precomputed inverted index for %d patterns ";
+static char __pyx_k_88[] = "Precomputation took %f seconds";
+static char __pyx_k_89[] = "    Bucket sort took %f seconds";
+static char __pyx_k_90[] = "    Refining, sort depth = %d";
+static char __pyx_k_91[] = "    Refinement took %f seconds";
+static char __pyx_k_92[] = "    Finalizing sort...";
+static char __pyx_k_94[] = "Suffix array construction took %f seconds";
+static char __pyx_k_95[] = "Unexpected condition found in q3sort: sort from %d to %d";
 static char __pyx_k__0[] = "0";
 static char __pyx_k__1[] = "1";
 static char __pyx_k__e[] = "e";
@@ -2173,39 +2223,41 @@ static char __pyx_k__i[] = "i";
 static char __pyx_k__j[] = "j";
 static char __pyx_k__r[] = "r";
 static char __pyx_k__w[] = "w";
-static char __pyx_k_101[] = "require_aligned_terminal";
-static char __pyx_k_102[] = "require_aligned_chunks";
-static char __pyx_k_103[] = "[X]";
-static char __pyx_k_104[] = "Must specify an alignment object";
-static char __pyx_k_106[] = "Reading precomputed data from file %s... ";
-static char __pyx_k_107[] = "Precomputation done with max nonterminals %d, decoder uses %d";
-static char __pyx_k_108[] = "Precomputation done with max terminals %d, decoder uses %d";
-static char __pyx_k_109[] = "Precomputation done with max initial size %d, decoder uses %d";
-static char __pyx_k_110[] = "Precomputation done with min gap size %d, decoder uses %d";
-static char __pyx_k_111[] = "Converting %d hash keys on precomputed inverted index... ";
-static char __pyx_k_112[] = "Converting %d hash keys on precomputed collocations... ";
-static char __pyx_k_113[] = "Processing precomputations took %f seconds";
-static char __pyx_k_114[] = "{";
-  static char __pyx_k_115[] = "(";
-static char __pyx_k_116[] = "}";
-static char __pyx_k_117[] = "get_precomputed_collocation";
-static char __pyx_k_118[] = "double binary";
-static char __pyx_k_119[] = "Keyword trie error";
-static char __pyx_k_121[] = "get_all_nodes_isteps_away";
-static char __pyx_k_122[] = "Total time for rule lookup, extraction, and scoring = %f seconds";
-static char __pyx_k_123[] = "    Extract time = %f seconds";
-static char __pyx_k_124[] = "No aligned terminals";
-static char __pyx_k_125[] = "Unaligned chunk";
-static char __pyx_k_126[] = "Gaps are not tight phrases";
-static char __pyx_k_127[] = "Inside edges of preceding subphrase are not tight";
-static char __pyx_k_128[] = "Inside edges of following subphrase are not tight";
-static char __pyx_k_129[] = "Subphrase [%d, %d] failed integrity check";
-static char __pyx_k_130[] = "Didn't extract anything from [%d, %d] -> [%d, %d]";
-static char __pyx_k_131[] = "Unable to extract basic phrase";
-static char __pyx_k_134[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/_sa.pyx";
-static char __pyx_k_135[] = "cdec.sa";
-static char __pyx_k_139[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi";
-static char __pyx_k_140[] = "*EPS*";
+static char __pyx_k_100[] = "Sampling strategy: uniform, max sample size = %d";
+static char __pyx_k_101[] = "Sampling strategy: no sampling";
+static char __pyx_k_103[] = "require_aligned_terminal";
+static char __pyx_k_104[] = "require_aligned_chunks";
+static char __pyx_k_105[] = "[X]";
+static char __pyx_k_106[] = "Must specify an alignment object";
+static char __pyx_k_108[] = "Reading precomputed data from file %s... ";
+static char __pyx_k_109[] = "Precomputation done with max nonterminals %d, decoder uses %d";
+static char __pyx_k_110[] = "Precomputation done with max terminals %d, decoder uses %d";
+static char __pyx_k_111[] = "Precomputation done with max initial size %d, decoder uses %d";
+static char __pyx_k_112[] = "Precomputation done with min gap size %d, decoder uses %d";
+static char __pyx_k_113[] = "Converting %d hash keys on precomputed inverted index... ";
+static char __pyx_k_114[] = "Converting %d hash keys on precomputed collocations... ";
+static char __pyx_k_115[] = "Processing precomputations took %f seconds";
+static char __pyx_k_116[] = "{";
+  static char __pyx_k_117[] = "(";
+static char __pyx_k_118[] = "}";
+static char __pyx_k_119[] = "get_precomputed_collocation";
+static char __pyx_k_120[] = "double binary";
+static char __pyx_k_121[] = "Keyword trie error";
+static char __pyx_k_123[] = "get_all_nodes_isteps_away";
+static char __pyx_k_124[] = "Total time for rule lookup, extraction, and scoring = %f seconds";
+static char __pyx_k_125[] = "    Extract time = %f seconds";
+static char __pyx_k_126[] = "No aligned terminals";
+static char __pyx_k_127[] = "Unaligned chunk";
+static char __pyx_k_128[] = "Gaps are not tight phrases";
+static char __pyx_k_129[] = "Inside edges of preceding subphrase are not tight";
+static char __pyx_k_130[] = "Inside edges of following subphrase are not tight";
+static char __pyx_k_131[] = "Subphrase [%d, %d] failed integrity check";
+static char __pyx_k_132[] = "Didn't extract anything from [%d, %d] -> [%d, %d]";
+static char __pyx_k_133[] = "Unable to extract basic phrase";
+static char __pyx_k_136[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/_sa.pyx";
+static char __pyx_k_137[] = "cdec.sa";
+static char __pyx_k_141[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi";
+static char __pyx_k_142[] = "*EPS*";
 static char __pyx_k__gc[] = "gc";
 static char __pyx_k__sa[] = "sa";
 static char __pyx_k___sa[] = "_sa";
@@ -2229,6 +2281,7 @@ static char __pyx_k__info[] = "info";
 static char __pyx_k__join[] = "join";
 static char __pyx_k__open[] = "open";
 static char __pyx_k__seek[] = "seek";
+static char __pyx_k__side[] = "side";
 static char __pyx_k__size[] = "size";
 static char __pyx_k__skip[] = "skip";
 static char __pyx_k__stop[] = "stop";
@@ -2266,6 +2319,7 @@ static char __pyx_k__sample[] = "sample";
 static char __pyx_k__sarray[] = "sarray";
 static char __pyx_k__scores[] = "scores";
 static char __pyx_k__sorted[] = "sorted";
+static char __pyx_k__source[] = "source";
 static char __pyx_k__string[] = "string";
 static char __pyx_k__unlink[] = "unlink";
 static char __pyx_k__advance[] = "advance";
@@ -2334,6 +2388,7 @@ static char __pyx_k__initial_len[] = "initial_len";
 static char __pyx_k__next_states[] = "next_states";
 static char __pyx_k__precomputed[] = "precomputed";
 static char __pyx_k__read_binary[] = "read_binary";
+static char __pyx_k__read_bitext[] = "read_bitext";
 static char __pyx_k__sample_size[] = "sample_size";
 static char __pyx_k__suffix_link[] = "suffix_link";
 static char __pyx_k__use_sent_id[] = "use_sent_id";
@@ -2344,6 +2399,7 @@ static char __pyx_k__StopIteration[] = "StopIteration";
 static char __pyx_k__alphabet_size[] = "alphabet_size";
 static char __pyx_k__tight_phrases[] = "tight_phrases";
 static char __pyx_k__pattern2phrase[] = "pattern2phrase";
+static char __pyx_k__read_text_data[] = "read_text_data";
 static char __pyx_k__sym_fromstring[] = "sym_fromstring";
 static char __pyx_k__by_slack_factor[] = "by_slack_factor";
 static char __pyx_k__get_next_states[] = "get_next_states";
@@ -2362,11 +2418,11 @@ static char __pyx_k__max_target_length[] = "max_target_length";
 static char __pyx_k__train_min_gap_size[] = "train_min_gap_size";
 static char __pyx_k__pattern2phrase_plus[] = "pattern2phrase_plus";
 static PyObject *__pyx_kp_s_1;
-static PyObject *__pyx_n_s_101;
-static PyObject *__pyx_n_s_102;
-static PyObject *__pyx_kp_s_104;
+static PyObject *__pyx_kp_s_100;
+static PyObject *__pyx_kp_s_101;
+static PyObject *__pyx_n_s_103;
+static PyObject *__pyx_n_s_104;
 static PyObject *__pyx_kp_s_106;
-static PyObject *__pyx_kp_s_107;
 static PyObject *__pyx_kp_s_108;
 static PyObject *__pyx_kp_s_109;
 static PyObject *__pyx_kp_s_110;
@@ -2376,12 +2432,12 @@ static PyObject *__pyx_kp_s_113;
 static PyObject *__pyx_kp_s_114;
 static PyObject *__pyx_kp_s_115;
 static PyObject *__pyx_kp_s_116;
-static PyObject *__pyx_n_s_117;
+static PyObject *__pyx_kp_s_117;
 static PyObject *__pyx_kp_s_118;
-static PyObject *__pyx_kp_s_119;
-static PyObject *__pyx_n_s_121;
-static PyObject *__pyx_kp_s_122;
-static PyObject *__pyx_kp_s_123;
+static PyObject *__pyx_n_s_119;
+static PyObject *__pyx_kp_s_120;
+static PyObject *__pyx_kp_s_121;
+static PyObject *__pyx_n_s_123;
 static PyObject *__pyx_kp_s_124;
 static PyObject *__pyx_kp_s_125;
 static PyObject *__pyx_kp_s_126;
@@ -2391,49 +2447,49 @@ static PyObject *__pyx_kp_s_129;
 static PyObject *__pyx_kp_s_13;
 static PyObject *__pyx_kp_s_130;
 static PyObject *__pyx_kp_s_131;
-static PyObject *__pyx_kp_s_134;
-static PyObject *__pyx_kp_s_135;
-static PyObject *__pyx_kp_s_139;
+static PyObject *__pyx_kp_s_132;
+static PyObject *__pyx_kp_s_133;
+static PyObject *__pyx_kp_s_136;
+static PyObject *__pyx_kp_s_137;
 static PyObject *__pyx_kp_s_14;
-static PyObject *__pyx_kp_s_140;
+static PyObject *__pyx_kp_s_141;
+static PyObject *__pyx_kp_s_142;
 static PyObject *__pyx_kp_s_18;
 static PyObject *__pyx_kp_s_2;
-static PyObject *__pyx_kp_s_22;
-static PyObject *__pyx_n_s_24;
-static PyObject *__pyx_kp_s_28;
+static PyObject *__pyx_kp_s_21;
+static PyObject *__pyx_kp_s_25;
+static PyObject *__pyx_n_s_27;
 static PyObject *__pyx_kp_s_3;
-static PyObject *__pyx_kp_s_32;
-static PyObject *__pyx_kp_s_39;
+static PyObject *__pyx_kp_s_31;
+static PyObject *__pyx_kp_s_35;
 static PyObject *__pyx_kp_s_4;
 static PyObject *__pyx_kp_s_42;
-static PyObject *__pyx_kp_s_43;
 static PyObject *__pyx_kp_s_45;
-static PyObject *__pyx_kp_s_47;
-static PyObject *__pyx_kp_s_49;
+static PyObject *__pyx_kp_s_46;
+static PyObject *__pyx_kp_s_48;
 static PyObject *__pyx_kp_s_5;
-static PyObject *__pyx_kp_s_53;
-static PyObject *__pyx_kp_s_55;
+static PyObject *__pyx_kp_s_50;
+static PyObject *__pyx_kp_s_52;
 static PyObject *__pyx_kp_s_56;
-static PyObject *__pyx_kp_s_57;
+static PyObject *__pyx_kp_s_58;
 static PyObject *__pyx_kp_s_59;
 static PyObject *__pyx_kp_s_6;
-static PyObject *__pyx_kp_s_61;
+static PyObject *__pyx_kp_s_60;
 static PyObject *__pyx_kp_s_62;
-static PyObject *__pyx_kp_s_63;
 static PyObject *__pyx_kp_s_64;
 static PyObject *__pyx_kp_s_65;
 static PyObject *__pyx_kp_s_66;
 static PyObject *__pyx_kp_s_67;
-static PyObject *__pyx_n_s_68;
-static PyObject *__pyx_n_s_69;
+static PyObject *__pyx_kp_s_68;
+static PyObject *__pyx_kp_s_69;
 static PyObject *__pyx_kp_s_7;
-static PyObject *__pyx_kp_s_70;
+static PyObject *__pyx_n_s_70;
+static PyObject *__pyx_n_s_71;
 static PyObject *__pyx_kp_s_72;
 static PyObject *__pyx_kp_s_74;
 static PyObject *__pyx_kp_s_76;
+static PyObject *__pyx_kp_s_78;
 static PyObject *__pyx_kp_s_8;
-static PyObject *__pyx_kp_s_81;
-static PyObject *__pyx_kp_s_82;
 static PyObject *__pyx_kp_s_83;
 static PyObject *__pyx_kp_s_84;
 static PyObject *__pyx_kp_s_85;
@@ -2443,10 +2499,10 @@ static PyObject *__pyx_kp_s_88;
 static PyObject *__pyx_kp_s_89;
 static PyObject *__pyx_kp_s_9;
 static PyObject *__pyx_kp_s_90;
+static PyObject *__pyx_kp_s_91;
 static PyObject *__pyx_kp_s_92;
-static PyObject *__pyx_kp_s_93;
-static PyObject *__pyx_kp_s_98;
-static PyObject *__pyx_kp_s_99;
+static PyObject *__pyx_kp_s_94;
+static PyObject *__pyx_kp_s_95;
 static PyObject *__pyx_kp_s__0;
 static PyObject *__pyx_kp_s__1;
 static PyObject *__pyx_n_s__END_OF_FILE;
@@ -2564,7 +2620,9 @@ static PyObject *__pyx_n_s__range;
 static PyObject *__pyx_n_s__reachable;
 static PyObject *__pyx_n_s__reachable_buffer;
 static PyObject *__pyx_n_s__read_binary;
+static PyObject *__pyx_n_s__read_bitext;
 static PyObject *__pyx_n_s__read_text;
+static PyObject *__pyx_n_s__read_text_data;
 static PyObject *__pyx_n_s__res;
 static PyObject *__pyx_n_s__reset;
 static PyObject *__pyx_n_s__resource;
@@ -2581,9 +2639,11 @@ static PyObject *__pyx_n_s__scores;
 static PyObject *__pyx_n_s__seek;
 static PyObject *__pyx_n_s__setdefault;
 static PyObject *__pyx_n_s__shortest;
+static PyObject *__pyx_n_s__side;
 static PyObject *__pyx_n_s__size;
 static PyObject *__pyx_n_s__skip;
 static PyObject *__pyx_n_s__sorted;
+static PyObject *__pyx_n_s__source;
 static PyObject *__pyx_n_s__spanlen;
 static PyObject *__pyx_n_s__split;
 static PyObject *__pyx_n_s__start;
@@ -2618,8 +2678,8 @@ static PyObject *__pyx_int_10;
 static PyObject *__pyx_int_20;
 static PyObject *__pyx_int_1000;
 static PyObject *__pyx_int_65536;
-static PyObject *__pyx_k_38;
-static PyObject *__pyx_k_97;
+static PyObject *__pyx_k_41;
+static PyObject *__pyx_k_99;
 static PyObject *__pyx_k_tuple_10;
 static PyObject *__pyx_k_tuple_11;
 static PyObject *__pyx_k_tuple_12;
@@ -2628,49 +2688,51 @@ static PyObject *__pyx_k_tuple_16;
 static PyObject *__pyx_k_tuple_17;
 static PyObject *__pyx_k_tuple_19;
 static PyObject *__pyx_k_tuple_20;
-static PyObject *__pyx_k_tuple_21;
+static PyObject *__pyx_k_tuple_22;
 static PyObject *__pyx_k_tuple_23;
-static PyObject *__pyx_k_tuple_25;
+static PyObject *__pyx_k_tuple_24;
 static PyObject *__pyx_k_tuple_26;
-static PyObject *__pyx_k_tuple_27;
+static PyObject *__pyx_k_tuple_28;
 static PyObject *__pyx_k_tuple_29;
 static PyObject *__pyx_k_tuple_30;
-static PyObject *__pyx_k_tuple_31;
+static PyObject *__pyx_k_tuple_32;
 static PyObject *__pyx_k_tuple_33;
 static PyObject *__pyx_k_tuple_34;
-static PyObject *__pyx_k_tuple_35;
 static PyObject *__pyx_k_tuple_36;
 static PyObject *__pyx_k_tuple_37;
+static PyObject *__pyx_k_tuple_38;
+static PyObject *__pyx_k_tuple_39;
 static PyObject *__pyx_k_tuple_40;
-static PyObject *__pyx_k_tuple_41;
+static PyObject *__pyx_k_tuple_43;
 static PyObject *__pyx_k_tuple_44;
-static PyObject *__pyx_k_tuple_46;
-static PyObject *__pyx_k_tuple_48;
-static PyObject *__pyx_k_tuple_50;
+static PyObject *__pyx_k_tuple_47;
+static PyObject *__pyx_k_tuple_49;
 static PyObject *__pyx_k_tuple_51;
-static PyObject *__pyx_k_tuple_52;
+static PyObject *__pyx_k_tuple_53;
 static PyObject *__pyx_k_tuple_54;
-static PyObject *__pyx_k_tuple_58;
-static PyObject *__pyx_k_tuple_60;
-static PyObject *__pyx_k_tuple_71;
+static PyObject *__pyx_k_tuple_55;
+static PyObject *__pyx_k_tuple_57;
+static PyObject *__pyx_k_tuple_61;
+static PyObject *__pyx_k_tuple_63;
 static PyObject *__pyx_k_tuple_73;
 static PyObject *__pyx_k_tuple_75;
 static PyObject *__pyx_k_tuple_77;
-static PyObject *__pyx_k_tuple_78;
 static PyObject *__pyx_k_tuple_79;
 static PyObject *__pyx_k_tuple_80;
-static PyObject *__pyx_k_tuple_91;
-static PyObject *__pyx_k_tuple_94;
-static PyObject *__pyx_k_tuple_95;
+static PyObject *__pyx_k_tuple_81;
+static PyObject *__pyx_k_tuple_82;
+static PyObject *__pyx_k_tuple_93;
 static PyObject *__pyx_k_tuple_96;
-static PyObject *__pyx_k_tuple_100;
-static PyObject *__pyx_k_tuple_105;
-static PyObject *__pyx_k_tuple_120;
-static PyObject *__pyx_k_tuple_132;
-static PyObject *__pyx_k_tuple_136;
-static PyObject *__pyx_k_tuple_137;
-static PyObject *__pyx_k_codeobj_133;
-static PyObject *__pyx_k_codeobj_138;
+static PyObject *__pyx_k_tuple_97;
+static PyObject *__pyx_k_tuple_98;
+static PyObject *__pyx_k_tuple_102;
+static PyObject *__pyx_k_tuple_107;
+static PyObject *__pyx_k_tuple_122;
+static PyObject *__pyx_k_tuple_134;
+static PyObject *__pyx_k_tuple_138;
+static PyObject *__pyx_k_tuple_139;
+static PyObject *__pyx_k_codeobj_135;
+static PyObject *__pyx_k_codeobj_140;
 
 /* "_sa.pyx":5
  * import gzip
@@ -6290,27 +6352,30 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject
 static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
   PyObject *__pyx_v_from_binary = 0;
   PyObject *__pyx_v_from_text = 0;
+  PyObject *__pyx_v_side = 0;
   int __pyx_v_use_sent_id;
   int __pyx_r;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
   {
-    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__use_sent_id,0};
-    PyObject* values[3] = {0,0,0};
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,&__pyx_n_s__use_sent_id,0};
+    PyObject* values[4] = {0,0,0,0};
 
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":17
  *     cdef bint use_sent_id
  * 
- *     def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False):             # <<<<<<<<<<<<<<
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False):             # <<<<<<<<<<<<<<
  *         self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1}
  *         self.id2word = ["END_OF_FILE", "END_OF_LINE"]
  */
     values[0] = ((PyObject *)Py_None);
     values[1] = ((PyObject *)Py_None);
+    values[2] = ((PyObject *)Py_None);
     if (unlikely(__pyx_kwds)) {
       Py_ssize_t kw_args;
       const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
       switch (pos_args) {
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
         case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
         case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
         case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
@@ -6331,15 +6396,21 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject
         }
         case  2:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__use_sent_id);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side);
           if (value) { values[2] = value; kw_args--; }
         }
+        case  3:
+        if (kw_args > 0) {
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__use_sent_id);
+          if (value) { values[3] = value; kw_args--; }
+        }
       }
       if (unlikely(kw_args > 0)) {
         if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
       }
     } else {
       switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
         case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
         case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
         case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
@@ -6349,32 +6420,34 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject
     }
     __pyx_v_from_binary = values[0];
     __pyx_v_from_text = values[1];
-    if (values[2]) {
-      __pyx_v_use_sent_id = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_use_sent_id == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_side = values[2];
+    if (values[3]) {
+      __pyx_v_use_sent_id = __Pyx_PyObject_IsTrue(values[3]); if (unlikely((__pyx_v_use_sent_id == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     } else {
       __pyx_v_use_sent_id = ((int)0);
     }
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L5_argtuple_error:;
-  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   __pyx_L3_error:;
   __Pyx_AddTraceback("_sa.DataArray.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __Pyx_RefNannyFinishContext();
   return -1;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_9DataArray___cinit__(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_use_sent_id);
+  __pyx_r = __pyx_pf_3_sa_9DataArray___cinit__(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_side, __pyx_v_use_sent_id);
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, int __pyx_v_use_sent_id) {
+static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side, int __pyx_v_use_sent_id) {
   int __pyx_r;
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
   int __pyx_t_2;
   PyObject *__pyx_t_3 = NULL;
   PyObject *__pyx_t_4 = NULL;
+  long __pyx_t_5;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
@@ -6382,7 +6455,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":18
  * 
- *     def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False):
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False):
  *         self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1}             # <<<<<<<<<<<<<<
  *         self.id2word = ["END_OF_FILE", "END_OF_LINE"]
  *         self.data = IntList(1000,1000)
@@ -6398,7 +6471,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_
   __pyx_t_1 = 0;
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":19
- *     def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False):
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False):
  *         self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1}
  *         self.id2word = ["END_OF_FILE", "END_OF_LINE"]             # <<<<<<<<<<<<<<
  *         self.data = IntList(1000,1000)
@@ -6487,7 +6560,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_
  *         if from_binary:
  *             self.read_binary(from_binary)             # <<<<<<<<<<<<<<
  *         elif from_text:
- *             self.read_text(from_text)
+ *             if side:
  */
     __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_binary); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
@@ -6508,8 +6581,8 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_
  *         if from_binary:
  *             self.read_binary(from_binary)
  *         elif from_text:             # <<<<<<<<<<<<<<
- *             self.read_text(from_text)
- * 
+ *             if side:
+ *                 self.read_bitext(from_text, (0 if side == 'source' else 1))
  */
   __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   if (__pyx_t_2) {
@@ -6517,22 +6590,68 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":27
  *             self.read_binary(from_binary)
  *         elif from_text:
- *             self.read_text(from_text)             # <<<<<<<<<<<<<<
+ *             if side:             # <<<<<<<<<<<<<<
+ *                 self.read_bitext(from_text, (0 if side == 'source' else 1))
+ *             else:
+ */
+    __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_side); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    if (__pyx_t_2) {
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":28
+ *         elif from_text:
+ *             if side:
+ *                 self.read_bitext(from_text, (0 if side == 'source' else 1))             # <<<<<<<<<<<<<<
+ *             else:
+ *                 self.read_text(from_text)
+ */
+      __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_bitext); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_2 = __Pyx_PyString_Equals(__pyx_v_side, ((PyObject *)__pyx_n_s__source), Py_EQ); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      if (__pyx_t_2) {
+        __pyx_t_5 = 0;
+      } else {
+        __pyx_t_5 = 1;
+      }
+      __pyx_t_3 = PyInt_FromLong(__pyx_t_5); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __Pyx_INCREF(__pyx_v_from_text);
+      PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_from_text);
+      __Pyx_GIVEREF(__pyx_v_from_text);
+      PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3);
+      __Pyx_GIVEREF(__pyx_t_3);
+      __pyx_t_3 = 0;
+      __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      goto __pyx_L4;
+    }
+    /*else*/ {
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":30
+ *                 self.read_bitext(from_text, (0 if side == 'source' else 1))
+ *             else:
+ *                 self.read_text(from_text)             # <<<<<<<<<<<<<<
  * 
  *     def __len__(self):
  */
-    __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_4);
-    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_INCREF(__pyx_v_from_text);
-    PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_from_text);
-    __Pyx_GIVEREF(__pyx_v_from_text);
-    __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __Pyx_GOTREF(__pyx_t_1);
-    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-    __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+      __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __Pyx_INCREF(__pyx_v_from_text);
+      PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_from_text);
+      __Pyx_GIVEREF(__pyx_v_from_text);
+      __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    }
+    __pyx_L4:;
     goto __pyx_L3;
   }
   __pyx_L3:;
@@ -6561,8 +6680,8 @@ static Py_ssize_t __pyx_pw_3_sa_9DataArray_3__len__(PyObject *__pyx_v_self) {
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":29
- *             self.read_text(from_text)
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":32
+ *                 self.read_text(from_text)
  * 
  *     def __len__(self):             # <<<<<<<<<<<<<<
  *         return len(self.data)
@@ -6579,7 +6698,7 @@ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataAr
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__len__", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":30
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":33
  * 
  *     def __len__(self):
  *         return len(self.data)             # <<<<<<<<<<<<<<
@@ -6588,7 +6707,7 @@ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataAr
  */
   __pyx_t_1 = ((PyObject *)__pyx_v_self->data);
   __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_r = __pyx_t_2;
   goto __pyx_L0;
@@ -6615,7 +6734,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_5getSentId(PyObject *__pyx_v_self, PyO
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":32
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":35
  *         return len(self.data)
  * 
  *     def getSentId(self, i):             # <<<<<<<<<<<<<<
@@ -6633,7 +6752,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataA
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("getSentId", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":33
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":36
  * 
  *     def getSentId(self, i):
  *         return self.sent_id.arr[i]             # <<<<<<<<<<<<<<
@@ -6641,8 +6760,8 @@ static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataA
  *     def getSent(self, i):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_id->arr[__pyx_t_1])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_id->arr[__pyx_t_1])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_r = __pyx_t_2;
   __pyx_t_2 = 0;
@@ -6671,7 +6790,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7getSent(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":35
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":38
  *         return self.sent_id.arr[i]
  * 
  *     def getSent(self, i):             # <<<<<<<<<<<<<<
@@ -6696,42 +6815,42 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArr
   __Pyx_RefNannySetupContext("getSent", 0);
   __Pyx_INCREF(__pyx_v_i);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":37
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40
  *     def getSent(self, i):
  *         cdef int j, start, stop
  *         sent = []             # <<<<<<<<<<<<<<
  *         start = self.sent_index.arr[i]
  *         stop = self.sent_index.arr[i+1]
  */
-  __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_v_sent = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":38
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":41
  *         cdef int j, start, stop
  *         sent = []
  *         start = self.sent_index.arr[i]             # <<<<<<<<<<<<<<
  *         stop = self.sent_index.arr[i+1]
  *         for i from start <= i < stop:
  */
-  __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_v_start = (__pyx_v_self->sent_index->arr[__pyx_t_2]);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":39
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":42
  *         sent = []
  *         start = self.sent_index.arr[i]
  *         stop = self.sent_index.arr[i+1]             # <<<<<<<<<<<<<<
  *         for i from start <= i < stop:
  *             sent.append(self.id2word[self.data.arr[i]])
  */
-  __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_v_stop = (__pyx_v_self->sent_index->arr[__pyx_t_2]);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43
  *         start = self.sent_index.arr[i]
  *         stop = self.sent_index.arr[i+1]
  *         for i from start <= i < stop:             # <<<<<<<<<<<<<<
@@ -6740,41 +6859,41 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArr
  */
   __pyx_t_3 = __pyx_v_stop;
   for (__pyx_t_4 = __pyx_v_start; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) {
-    __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(__pyx_v_i);
     __pyx_v_i = __pyx_t_1;
     __pyx_t_1 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":41
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":44
  *         stop = self.sent_index.arr[i+1]
  *         for i from start <= i < stop:
  *             sent.append(self.id2word[self.data.arr[i]])             # <<<<<<<<<<<<<<
  *         return sent
  * 
  */
-    __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_self->id2word, (__pyx_v_self->data->arr[__pyx_t_2]), sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_self->id2word, (__pyx_v_self->data->arr[__pyx_t_2]), sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_5 = PyList_Append(__pyx_v_sent, __pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyList_Append(__pyx_v_sent, __pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-    __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   }
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43
  *         start = self.sent_index.arr[i]
  *         stop = self.sent_index.arr[i+1]
  *         for i from start <= i < stop:             # <<<<<<<<<<<<<<
  *             sent.append(self.id2word[self.data.arr[i]])
  *         return sent
  */
-  __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_v_i);
   __pyx_v_i = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":42
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":45
  *         for i from start <= i < stop:
  *             sent.append(self.id2word[self.data.arr[i]])
  *         return sent             # <<<<<<<<<<<<<<
@@ -6811,7 +6930,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_9getSentPos(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":44
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":47
  *         return sent
  * 
  *     def getSentPos(self, loc):             # <<<<<<<<<<<<<<
@@ -6830,7 +6949,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8getSentPos(struct __pyx_obj_3_sa_Data
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("getSentPos", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":45
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":48
  * 
  *     def getSentPos(self, loc):
  *         return loc - self.sent_index.arr[self.sent_id.arr[loc]]             # <<<<<<<<<<<<<<
@@ -6838,10 +6957,10 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8getSentPos(struct __pyx_obj_3_sa_Data
  *     def get_id(self, word):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_loc); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_index->arr[(__pyx_v_self->sent_id->arr[__pyx_t_1])])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_loc); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_index->arr[(__pyx_v_self->sent_id->arr[__pyx_t_1])])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
-  __pyx_t_3 = PyNumber_Subtract(__pyx_v_loc, __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyNumber_Subtract(__pyx_v_loc, __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   __pyx_r = __pyx_t_3;
@@ -6872,7 +6991,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_11get_id(PyObject *__pyx_v_self, PyObj
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":47
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":50
  *         return loc - self.sent_index.arr[self.sent_id.arr[loc]]
  * 
  *     def get_id(self, word):             # <<<<<<<<<<<<<<
@@ -6892,18 +7011,18 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("get_id", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":48
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":51
  * 
  *     def get_id(self, word):
  *         if not word in self.word2id:             # <<<<<<<<<<<<<<
  *             self.word2id[word] = len(self.id2word)
  *             self.id2word.append(word)
  */
-  __pyx_t_1 = ((PySequence_Contains(__pyx_v_self->word2id, __pyx_v_word))); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = ((PySequence_Contains(__pyx_v_self->word2id, __pyx_v_word))); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_t_2 = (!__pyx_t_1);
   if (__pyx_t_2) {
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":49
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":52
  *     def get_id(self, word):
  *         if not word in self.word2id:
  *             self.word2id[word] = len(self.id2word)             # <<<<<<<<<<<<<<
@@ -6912,28 +7031,28 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr
  */
     __pyx_t_3 = __pyx_v_self->id2word;
     __Pyx_INCREF(__pyx_t_3);
-    __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    if (PyObject_SetItem(__pyx_v_self->word2id, __pyx_v_word, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    if (PyObject_SetItem(__pyx_v_self->word2id, __pyx_v_word, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":50
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":53
  *         if not word in self.word2id:
  *             self.word2id[word] = len(self.id2word)
  *             self.id2word.append(word)             # <<<<<<<<<<<<<<
  *         return self.word2id[word]
  * 
  */
-    __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, __pyx_v_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, __pyx_v_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     goto __pyx_L3;
   }
   __pyx_L3:;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":51
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":54
  *             self.word2id[word] = len(self.id2word)
  *             self.id2word.append(word)
  *         return self.word2id[word]             # <<<<<<<<<<<<<<
@@ -6941,7 +7060,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr
  *     def get_word(self, id):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_3 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_3) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_3) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __pyx_r = __pyx_t_3;
   __pyx_t_3 = 0;
@@ -6970,7 +7089,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_13get_word(PyObject *__pyx_v_self, PyO
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":53
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":56
  *         return self.word2id[word]
  * 
  *     def get_word(self, id):             # <<<<<<<<<<<<<<
@@ -6987,7 +7106,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataA
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("get_word", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":54
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57
  * 
  *     def get_word(self, id):
  *         return self.id2word[id]             # <<<<<<<<<<<<<<
@@ -6995,7 +7114,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataA
  *     def write_text(self, char* filename):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = PyObject_GetItem(__pyx_v_self->id2word, __pyx_v_id); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetItem(__pyx_v_self->id2word, __pyx_v_id); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_r = __pyx_t_1;
   __pyx_t_1 = 0;
@@ -7021,7 +7140,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_15write_text(PyObject *__pyx_v_self, P
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("write_text (wrapper)", 0);
   assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L3_error:;
@@ -7034,7 +7153,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_15write_text(PyObject *__pyx_v_self, P
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":56
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":59
  *         return self.id2word[id]
  * 
  *     def write_text(self, char* filename):             # <<<<<<<<<<<<<<
@@ -7066,7 +7185,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("write_text", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60
  * 
  *     def write_text(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
@@ -7074,9 +7193,9 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
  *                 if w_id > 1:
  */
   /*with:*/ {
-    __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-    __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
@@ -7084,14 +7203,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
     PyTuple_SET_ITEM(__pyx_t_2, 1, ((PyObject *)__pyx_n_s__w));
     __Pyx_GIVEREF(((PyObject *)__pyx_n_s__w));
     __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
-    __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -7106,7 +7225,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
           __pyx_v_f = __pyx_t_4;
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":58
+          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61
  *     def write_text(self, char* filename):
  *         with open(filename, "w") as f:
  *             for w_id in self.data:             # <<<<<<<<<<<<<<
@@ -7117,7 +7236,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
             __pyx_t_4 = ((PyObject *)__pyx_v_self->data); __Pyx_INCREF(__pyx_t_4); __pyx_t_8 = 0;
             __pyx_t_9 = NULL;
           } else {
-            __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_4);
             __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext;
           }
@@ -7127,21 +7246,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
               #if CYTHON_COMPILING_IN_CPYTHON
               __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++;
               #else
-              __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
+              __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
               #endif
             } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_4)) {
               if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
               #if CYTHON_COMPILING_IN_CPYTHON
               __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++;
               #else
-              __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
+              __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
               #endif
             } else {
               __pyx_t_1 = __pyx_t_9(__pyx_t_4);
               if (unlikely(!__pyx_t_1)) {
                 if (PyErr_Occurred()) {
                   if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-                  else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+                  else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
                 }
                 break;
               }
@@ -7151,48 +7270,48 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
             __pyx_v_w_id = __pyx_t_1;
             __pyx_t_1 = 0;
 
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":59
+            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62
  *         with open(filename, "w") as f:
  *             for w_id in self.data:
  *                 if w_id > 1:             # <<<<<<<<<<<<<<
  *                     f.write("%s " % self.get_word(w_id))
  *                 if w_id == 1:
  */
-            __pyx_t_1 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_GT); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_1 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_GT); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
-            __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
             if (__pyx_t_10) {
 
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60
+              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":63
  *             for w_id in self.data:
  *                 if w_id > 1:
  *                     f.write("%s " % self.get_word(w_id))             # <<<<<<<<<<<<<<
  *                 if w_id == 1:
  *                     f.write("\n")
  */
-              __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_1);
-              __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_word); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_word); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_2);
-              __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_11);
               __Pyx_INCREF(__pyx_v_w_id);
               PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_v_w_id);
               __Pyx_GIVEREF(__pyx_v_w_id);
-              __pyx_t_12 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_12 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_12);
               __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
               __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0;
-              __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_13), __pyx_t_12); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_13), __pyx_t_12); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(((PyObject *)__pyx_t_11));
               __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
-              __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_12);
               PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_t_11));
               __Pyx_GIVEREF(((PyObject *)__pyx_t_11));
               __pyx_t_11 = 0;
-              __pyx_t_11 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_12), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_11 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_12), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_11);
               __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
               __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0;
@@ -7201,29 +7320,29 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
             }
             __pyx_L18:;
 
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61
+            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":64
  *                 if w_id > 1:
  *                     f.write("%s " % self.get_word(w_id))
  *                 if w_id == 1:             # <<<<<<<<<<<<<<
  *                     f.write("\n")
  * 
  */
-            __pyx_t_11 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_EQ); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_11 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_EQ); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_11);
-            __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
             if (__pyx_t_10) {
 
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62
+              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65
  *                     f.write("%s " % self.get_word(w_id))
  *                 if w_id == 1:
  *                     f.write("\n")             # <<<<<<<<<<<<<<
  * 
  *     def read_text(self, char* filename):
  */
-              __pyx_t_11 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_11 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_11);
-              __pyx_t_12 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_k_tuple_15), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_12 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_k_tuple_15), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_12);
               __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
               __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
@@ -7244,7 +7363,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
         __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0;
         __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57
+        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60
  * 
  *     def write_text(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
@@ -7253,11 +7372,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
  */
         /*except:*/ {
           __Pyx_AddTraceback("_sa.DataArray.write_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
-          if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_12, &__pyx_t_11) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_12, &__pyx_t_11) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_4);
           __Pyx_GOTREF(__pyx_t_12);
           __Pyx_GOTREF(__pyx_t_11);
-          __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_INCREF(__pyx_t_4);
           PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4);
@@ -7270,11 +7389,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
           __Pyx_GIVEREF(__pyx_t_11);
           __pyx_t_13 = PyObject_Call(__pyx_t_3, __pyx_t_1, NULL);
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-          if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_13);
           __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_13);
           __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
-          if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __pyx_t_14 = (!__pyx_t_10);
           if (__pyx_t_14) {
             __Pyx_GIVEREF(__pyx_t_4);
@@ -7282,7 +7401,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
             __Pyx_GIVEREF(__pyx_t_11);
             __Pyx_ErrRestore(__pyx_t_4, __pyx_t_12, __pyx_t_11);
             __pyx_t_4 = 0; __pyx_t_12 = 0; __pyx_t_11 = 0; 
-            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
             goto __pyx_L22;
           }
           __pyx_L22:;
@@ -7310,11 +7429,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat
       if (__pyx_t_3) {
         __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_16, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
         __pyx_t_14 = __Pyx_PyObject_IsTrue(__pyx_t_7);
         __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
-        if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       }
     }
     goto __pyx_L23;
@@ -7350,7 +7469,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_17read_text(PyObject *__pyx_v_self, Py
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("read_text (wrapper)", 0);
   assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L3_error:;
@@ -7363,20 +7482,16 @@ static PyObject *__pyx_pw_3_sa_9DataArray_17read_text(PyObject *__pyx_v_self, Py
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":64
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":67
  *                     f.write("\n")
  * 
  *     def read_text(self, char* filename):             # <<<<<<<<<<<<<<
- *         cdef int word_count = 0
  *         with gzip_or_text(filename) as fp:
+ *             self.read_text_data(fp)
  */
 
 static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
-  int __pyx_v_word_count;
   PyObject *__pyx_v_fp = NULL;
-  PyObject *__pyx_v_line_num = NULL;
-  PyObject *__pyx_v_line = NULL;
-  PyObject *__pyx_v_word = NULL;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
@@ -7386,56 +7501,446 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data
   PyObject *__pyx_t_5 = NULL;
   PyObject *__pyx_t_6 = NULL;
   PyObject *__pyx_t_7 = NULL;
-  Py_ssize_t __pyx_t_8;
-  PyObject *(*__pyx_t_9)(PyObject *);
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_t_9;
   PyObject *__pyx_t_10 = NULL;
-  Py_ssize_t __pyx_t_11;
-  PyObject *(*__pyx_t_12)(PyObject *);
-  PyObject *__pyx_t_13 = NULL;
-  PyObject *__pyx_t_14 = NULL;
-  int __pyx_t_15;
-  PyObject *__pyx_t_16 = NULL;
-  int __pyx_t_17;
+  int __pyx_t_11;
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("read_text", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68
  * 
  *     def read_text(self, char* filename):
- *         cdef int word_count = 0             # <<<<<<<<<<<<<<
+ *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
+ *             self.read_text_data(fp)
+ * 
+ */
+  /*with:*/ {
+    __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(((PyObject *)__pyx_t_2));
+    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2));
+    __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
+    __pyx_t_2 = 0;
+    __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+    __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    /*try:*/ {
+      {
+        __Pyx_ExceptionSave(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7);
+        __Pyx_XGOTREF(__pyx_t_5);
+        __Pyx_XGOTREF(__pyx_t_6);
+        __Pyx_XGOTREF(__pyx_t_7);
+        /*try:*/ {
+          __Pyx_INCREF(__pyx_t_1);
+          __pyx_v_fp = __pyx_t_1;
+          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69
+ *     def read_text(self, char* filename):
  *         with gzip_or_text(filename) as fp:
- *             for line_num, line in enumerate(fp):
+ *             self.read_text_data(fp)             # <<<<<<<<<<<<<<
+ * 
+ *     def read_bitext(self, char* filename, int side):
  */
-  __pyx_v_word_count = 0;
+          __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text_data); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __Pyx_GOTREF(__pyx_t_1);
+          __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __Pyx_GOTREF(__pyx_t_2);
+          __Pyx_INCREF(__pyx_v_fp);
+          PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_fp);
+          __Pyx_GIVEREF(__pyx_v_fp);
+          __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __Pyx_GOTREF(__pyx_t_3);
+          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+          __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+          __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+        }
+        __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+        __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+        __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+        goto __pyx_L14_try_end;
+        __pyx_L7_error:;
+        __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+        __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+        __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66
+        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68
+ * 
  *     def read_text(self, char* filename):
- *         cdef int word_count = 0
  *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
- *             for line_num, line in enumerate(fp):
- *                 self.sent_index.append(word_count)
+ *             self.read_text_data(fp)
+ * 
+ */
+        /*except:*/ {
+          __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
+          if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_3);
+          __Pyx_GOTREF(__pyx_t_2);
+          __Pyx_GOTREF(__pyx_t_1);
+          __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_8);
+          __Pyx_INCREF(__pyx_t_3);
+          PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3);
+          __Pyx_GIVEREF(__pyx_t_3);
+          __Pyx_INCREF(__pyx_t_2);
+          PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_2);
+          __Pyx_GIVEREF(__pyx_t_2);
+          __Pyx_INCREF(__pyx_t_1);
+          PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_1);
+          __Pyx_GIVEREF(__pyx_t_1);
+          __pyx_t_10 = PyObject_Call(__pyx_t_4, __pyx_t_8, NULL);
+          __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+          if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_10);
+          __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10);
+          __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+          if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __pyx_t_11 = (!__pyx_t_9);
+          if (__pyx_t_11) {
+            __Pyx_GIVEREF(__pyx_t_3);
+            __Pyx_GIVEREF(__pyx_t_2);
+            __Pyx_GIVEREF(__pyx_t_1);
+            __Pyx_ErrRestore(__pyx_t_3, __pyx_t_2, __pyx_t_1);
+            __pyx_t_3 = 0; __pyx_t_2 = 0; __pyx_t_1 = 0; 
+            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+            goto __pyx_L18;
+          }
+          __pyx_L18:;
+          __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+          __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+          __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+          goto __pyx_L8_exception_handled;
+        }
+        __pyx_L9_except_error:;
+        __Pyx_XGIVEREF(__pyx_t_5);
+        __Pyx_XGIVEREF(__pyx_t_6);
+        __Pyx_XGIVEREF(__pyx_t_7);
+        __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7);
+        goto __pyx_L1_error;
+        __pyx_L8_exception_handled:;
+        __Pyx_XGIVEREF(__pyx_t_5);
+        __Pyx_XGIVEREF(__pyx_t_6);
+        __Pyx_XGIVEREF(__pyx_t_7);
+        __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7);
+        __pyx_L14_try_end:;
+      }
+    }
+    /*finally:*/ {
+      if (__pyx_t_4) {
+        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_17, NULL);
+        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_7);
+        __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7);
+        __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+        if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      }
+    }
+    goto __pyx_L19;
+    __pyx_L3_error:;
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    goto __pyx_L1_error;
+    __pyx_L19:;
+  }
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_fp);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_3_sa_9DataArray_19read_bitext(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_19read_bitext(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  char *__pyx_v_filename;
+  int __pyx_v_side;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("read_bitext (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__filename,&__pyx_n_s__side,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__filename)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("read_bitext", 1, 2, 2, 1); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "read_bitext") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+    }
+    __pyx_v_filename = PyBytes_AsString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_side = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_side == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("read_bitext", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_3_sa_9DataArray_18read_bitext(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_filename, __pyx_v_side);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator3(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
+
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73
+ *     def read_bitext(self, char* filename, int side):
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)             # <<<<<<<<<<<<<<
+ *             self.read_text_data(data)
+ * 
+ */
+
+static PyObject *__pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(PyObject *__pyx_self) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *__pyx_cur_scope;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("genexpr", 0);
+  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr, __pyx_empty_tuple, NULL);
+  if (unlikely(!__pyx_cur_scope)) {
+    __Pyx_RefNannyFinishContext();
+    return NULL;
+  }
+  __Pyx_GOTREF(__pyx_cur_scope);
+  __pyx_cur_scope->__pyx_outer_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *) __pyx_self;
+  __Pyx_INCREF(((PyObject *)__pyx_cur_scope->__pyx_outer_scope));
+  __Pyx_GIVEREF(__pyx_cur_scope->__pyx_outer_scope);
+  {
+    __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_3_sa_9DataArray_11read_bitext_2generator3, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_DECREF(__pyx_cur_scope);
+    __Pyx_RefNannyFinishContext();
+    return (PyObject *) gen;
+  }
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_AddTraceback("_sa.DataArray.read_bitext.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_DECREF(((PyObject *)__pyx_cur_scope));
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator3(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */
+{
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)__pyx_generator->closure);
+  PyObject *__pyx_r = NULL;
+  PyObject *__pyx_t_1 = NULL;
+  Py_ssize_t __pyx_t_2;
+  PyObject *(*__pyx_t_3)(PyObject *);
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("None", 0);
+  switch (__pyx_generator->resume_label) {
+    case 0: goto __pyx_L3_first_run;
+    case 1: goto __pyx_L6_resume_from_yield;
+    default: /* CPython raises the right error here */
+    __Pyx_RefNannyFinishContext();
+    return NULL;
+  }
+  __pyx_L3_first_run:;
+  if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp)) { __Pyx_RaiseClosureNameError("fp"); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} }
+  if (PyList_CheckExact(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp) || PyTuple_CheckExact(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp)) {
+    __pyx_t_1 = __pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
+    __pyx_t_3 = NULL;
+  } else {
+    __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext;
+  }
+  for (;;) {
+    if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) {
+      if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break;
+      #if CYTHON_COMPILING_IN_CPYTHON
+      __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++;
+      #else
+      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      #endif
+    } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) {
+      if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
+      #if CYTHON_COMPILING_IN_CPYTHON
+      __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++;
+      #else
+      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      #endif
+    } else {
+      __pyx_t_4 = __pyx_t_3(__pyx_t_1);
+      if (unlikely(!__pyx_t_4)) {
+        if (PyErr_Occurred()) {
+          if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        }
+        break;
+      }
+      __Pyx_GOTREF(__pyx_t_4);
+    }
+    __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_line);
+    __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_line);
+    __Pyx_GIVEREF(__pyx_t_4);
+    __pyx_cur_scope->__pyx_v_line = __pyx_t_4;
+    __pyx_t_4 = 0;
+    __pyx_t_4 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_5, __pyx_cur_scope->__pyx_outer_scope->__pyx_v_side, sizeof(int), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __pyx_r = __pyx_t_4;
+    __pyx_t_4 = 0;
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __pyx_cur_scope->__pyx_t_0 = __pyx_t_1;
+    __pyx_cur_scope->__pyx_t_1 = __pyx_t_2;
+    __pyx_cur_scope->__pyx_t_2 = __pyx_t_3;
+    __Pyx_XGIVEREF(__pyx_r);
+    __Pyx_RefNannyFinishContext();
+    /* return from generator, yielding value */
+    __pyx_generator->resume_label = 1;
+    return __pyx_r;
+    __pyx_L6_resume_from_yield:;
+    __pyx_t_1 = __pyx_cur_scope->__pyx_t_0;
+    __pyx_cur_scope->__pyx_t_0 = 0;
+    __Pyx_XGOTREF(__pyx_t_1);
+    __pyx_t_2 = __pyx_cur_scope->__pyx_t_1;
+    __pyx_t_3 = __pyx_cur_scope->__pyx_t_2;
+    if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  PyErr_SetNone(PyExc_StopIteration);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_generator->resume_label = -1;
+  __Pyx_Generator_clear((PyObject*)__pyx_generator);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+}
+
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71
+ *             self.read_text_data(fp)
+ * 
+ *     def read_bitext(self, char* filename, int side):             # <<<<<<<<<<<<<<
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ */
+
+static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename, int __pyx_v_side) {
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *__pyx_cur_scope;
+  PyObject *__pyx_v_data = NULL;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_t_9;
+  PyObject *__pyx_t_10 = NULL;
+  int __pyx_t_11;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("read_bitext", 0);
+  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)__pyx_ptype_3_sa___pyx_scope_struct__read_bitext->tp_new(__pyx_ptype_3_sa___pyx_scope_struct__read_bitext, __pyx_empty_tuple, NULL);
+  if (unlikely(!__pyx_cur_scope)) {
+    __Pyx_RefNannyFinishContext();
+    return NULL;
+  }
+  __Pyx_GOTREF(__pyx_cur_scope);
+  __pyx_cur_scope->__pyx_v_side = __pyx_v_side;
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72
+ * 
+ *     def read_bitext(self, char* filename, int side):
+ *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ *             self.read_text_data(data)
  */
   /*with:*/ {
-    __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_2));
-    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_2));
     __pyx_t_2 = 0;
-    __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-    __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -7447,315 +7952,97 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data
         __Pyx_XGOTREF(__pyx_t_7);
         /*try:*/ {
           __Pyx_INCREF(__pyx_t_1);
-          __pyx_v_fp = __pyx_t_1;
+          __Pyx_GIVEREF(__pyx_t_1);
+          __pyx_cur_scope->__pyx_v_fp = __pyx_t_1;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":67
- *         cdef int word_count = 0
- *         with gzip_or_text(filename) as fp:
- *             for line_num, line in enumerate(fp):             # <<<<<<<<<<<<<<
- *                 self.sent_index.append(word_count)
- *                 for word in line.split():
- */
-          __Pyx_INCREF(__pyx_int_0);
-          __pyx_t_1 = __pyx_int_0;
-          if (PyList_CheckExact(__pyx_v_fp) || PyTuple_CheckExact(__pyx_v_fp)) {
-            __pyx_t_2 = __pyx_v_fp; __Pyx_INCREF(__pyx_t_2); __pyx_t_8 = 0;
-            __pyx_t_9 = NULL;
-          } else {
-            __pyx_t_8 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_fp); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_2);
-            __pyx_t_9 = Py_TYPE(__pyx_t_2)->tp_iternext;
-          }
-          for (;;) {
-            if (!__pyx_t_9 && PyList_CheckExact(__pyx_t_2)) {
-              if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_2)) break;
-              #if CYTHON_COMPILING_IN_CPYTHON
-              __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_3); __pyx_t_8++;
-              #else
-              __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
-              #endif
-            } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_2)) {
-              if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
-              #if CYTHON_COMPILING_IN_CPYTHON
-              __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_3); __pyx_t_8++;
-              #else
-              __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
-              #endif
-            } else {
-              __pyx_t_3 = __pyx_t_9(__pyx_t_2);
-              if (unlikely(!__pyx_t_3)) {
-                if (PyErr_Occurred()) {
-                  if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-                  else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-                }
-                break;
-              }
-              __Pyx_GOTREF(__pyx_t_3);
-            }
-            __Pyx_XDECREF(__pyx_v_line);
-            __pyx_v_line = __pyx_t_3;
-            __pyx_t_3 = 0;
-            __Pyx_INCREF(__pyx_t_1);
-            __Pyx_XDECREF(__pyx_v_line_num);
-            __pyx_v_line_num = __pyx_t_1;
-            __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_3);
-            __Pyx_DECREF(__pyx_t_1);
-            __pyx_t_1 = __pyx_t_3;
-            __pyx_t_3 = 0;
-
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68
+          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73
+ *     def read_bitext(self, char* filename, int side):
  *         with gzip_or_text(filename) as fp:
- *             for line_num, line in enumerate(fp):
- *                 self.sent_index.append(word_count)             # <<<<<<<<<<<<<<
- *                 for word in line.split():
- *                     self.data.append(self.get_id(word))
- */
-            __pyx_t_3 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_3);
-            __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_10);
-            __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-            __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69
- *             for line_num, line in enumerate(fp):
- *                 self.sent_index.append(word_count)
- *                 for word in line.split():             # <<<<<<<<<<<<<<
- *                     self.data.append(self.get_id(word))
- *                     if self.use_sent_id:
- */
-            __pyx_t_10 = PyObject_GetAttr(__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_10);
-            __pyx_t_3 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_3);
-            __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-            if (PyList_CheckExact(__pyx_t_3) || PyTuple_CheckExact(__pyx_t_3)) {
-              __pyx_t_10 = __pyx_t_3; __Pyx_INCREF(__pyx_t_10); __pyx_t_11 = 0;
-              __pyx_t_12 = NULL;
-            } else {
-              __pyx_t_11 = -1; __pyx_t_10 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_10);
-              __pyx_t_12 = Py_TYPE(__pyx_t_10)->tp_iternext;
-            }
-            __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-            for (;;) {
-              if (!__pyx_t_12 && PyList_CheckExact(__pyx_t_10)) {
-                if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_10)) break;
-                #if CYTHON_COMPILING_IN_CPYTHON
-                __pyx_t_3 = PyList_GET_ITEM(__pyx_t_10, __pyx_t_11); __Pyx_INCREF(__pyx_t_3); __pyx_t_11++;
-                #else
-                __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
-                #endif
-              } else if (!__pyx_t_12 && PyTuple_CheckExact(__pyx_t_10)) {
-                if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_10)) break;
-                #if CYTHON_COMPILING_IN_CPYTHON
-                __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_10, __pyx_t_11); __Pyx_INCREF(__pyx_t_3); __pyx_t_11++;
-                #else
-                __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;};
-                #endif
-              } else {
-                __pyx_t_3 = __pyx_t_12(__pyx_t_10);
-                if (unlikely(!__pyx_t_3)) {
-                  if (PyErr_Occurred()) {
-                    if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-                    else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-                  }
-                  break;
-                }
-                __Pyx_GOTREF(__pyx_t_3);
-              }
-              __Pyx_XDECREF(__pyx_v_word);
-              __pyx_v_word = __pyx_t_3;
-              __pyx_t_3 = 0;
-
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":70
- *                 self.sent_index.append(word_count)
- *                 for word in line.split():
- *                     self.data.append(self.get_id(word))             # <<<<<<<<<<<<<<
- *                     if self.use_sent_id:
- *                         self.sent_id.append(line_num)
- */
-              __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_id); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_3);
-              __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_13);
-              __Pyx_INCREF(__pyx_v_word);
-              PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_v_word);
-              __Pyx_GIVEREF(__pyx_v_word);
-              __pyx_t_14 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_13), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_14);
-              __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-              __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0;
-              __pyx_t_13 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_t_14); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_13);
-              __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0;
-              __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
-
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71
- *                 for word in line.split():
- *                     self.data.append(self.get_id(word))
- *                     if self.use_sent_id:             # <<<<<<<<<<<<<<
- *                         self.sent_id.append(line_num)
- *                     word_count = word_count + 1
- */
-              if (__pyx_v_self->use_sent_id) {
-
-                /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72
- *                     self.data.append(self.get_id(word))
- *                     if self.use_sent_id:
- *                         self.sent_id.append(line_num)             # <<<<<<<<<<<<<<
- *                     word_count = word_count + 1
- *                 self.data.append(1)
- */
-                __pyx_t_13 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-                __Pyx_GOTREF(__pyx_t_13);
-                __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
-                goto __pyx_L20;
-              }
-              __pyx_L20:;
-
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73
- *                     if self.use_sent_id:
- *                         self.sent_id.append(line_num)
- *                     word_count = word_count + 1             # <<<<<<<<<<<<<<
- *                 self.data.append(1)
- *                 if self.use_sent_id:
- */
-              __pyx_v_word_count = (__pyx_v_word_count + 1);
-            }
-            __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74
- *                         self.sent_id.append(line_num)
- *                     word_count = word_count + 1
- *                 self.data.append(1)             # <<<<<<<<<<<<<<
- *                 if self.use_sent_id:
- *                     self.sent_id.append(line_num)
- */
-            __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-            __Pyx_GOTREF(__pyx_t_10);
-            __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":75
- *                     word_count = word_count + 1
- *                 self.data.append(1)
- *                 if self.use_sent_id:             # <<<<<<<<<<<<<<
- *                     self.sent_id.append(line_num)
- *                 word_count = word_count + 1
- */
-            if (__pyx_v_self->use_sent_id) {
-
-              /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":76
- *                 self.data.append(1)
- *                 if self.use_sent_id:
- *                     self.sent_id.append(line_num)             # <<<<<<<<<<<<<<
- *                 word_count = word_count + 1
- *             self.data.append(0)
- */
-              __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
-              __Pyx_GOTREF(__pyx_t_10);
-              __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-              goto __pyx_L21;
-            }
-            __pyx_L21:;
-
-            /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":77
- *                 if self.use_sent_id:
- *                     self.sent_id.append(line_num)
- *                 word_count = word_count + 1             # <<<<<<<<<<<<<<
- *             self.data.append(0)
- *             self.sent_index.append(word_count)
- */
-            __pyx_v_word_count = (__pyx_v_word_count + 1);
-          }
-          __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-
-          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":78
- *                     self.sent_id.append(line_num)
- *                 word_count = word_count + 1
- *             self.data.append(0)             # <<<<<<<<<<<<<<
- *             self.sent_index.append(word_count)
+ *             data = (line.split(' ||| ')[side] for line in fp)             # <<<<<<<<<<<<<<
+ *             self.read_text_data(data)
  * 
  */
-          __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = __pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(((PyObject*)__pyx_cur_scope)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
-          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+          __pyx_v_data = __pyx_t_1;
+          __pyx_t_1 = 0;
 
-          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":79
- *                 word_count = word_count + 1
- *             self.data.append(0)
- *             self.sent_index.append(word_count)             # <<<<<<<<<<<<<<
+          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ *             self.read_text_data(data)             # <<<<<<<<<<<<<<
  * 
- *     def read_binary(self, char* filename):
+ *     def read_text_data(self, data):
  */
-          __pyx_t_1 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text_data); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
+          __Pyx_INCREF(__pyx_v_data);
+          PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_data);
+          __Pyx_GIVEREF(__pyx_v_data);
+          __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __Pyx_GOTREF(__pyx_t_3);
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-          __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+          __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+          __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         }
         __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
         __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
         __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
         goto __pyx_L14_try_end;
         __pyx_L7_error:;
-        __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
-        __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0;
-        __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0;
-        __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+        __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66
- *     def read_text(self, char* filename):
- *         cdef int word_count = 0
+        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72
+ * 
+ *     def read_bitext(self, char* filename, int side):
  *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
- *             for line_num, line in enumerate(fp):
- *                 self.sent_index.append(word_count)
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ *             self.read_text_data(data)
  */
         /*except:*/ {
-          __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
-          if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_10) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename);
+          if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_3);
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_GOTREF(__pyx_t_1);
-          __Pyx_GOTREF(__pyx_t_10);
-          __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
-          __Pyx_GOTREF(__pyx_t_13);
+          __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_8);
+          __Pyx_INCREF(__pyx_t_3);
+          PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3);
+          __Pyx_GIVEREF(__pyx_t_3);
           __Pyx_INCREF(__pyx_t_2);
-          PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_2);
+          PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_2);
           __Pyx_GIVEREF(__pyx_t_2);
           __Pyx_INCREF(__pyx_t_1);
-          PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_1);
+          PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_1);
           __Pyx_GIVEREF(__pyx_t_1);
-          __Pyx_INCREF(__pyx_t_10);
-          PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10);
-          __Pyx_GIVEREF(__pyx_t_10);
-          __pyx_t_16 = PyObject_Call(__pyx_t_4, __pyx_t_13, NULL);
+          __pyx_t_10 = PyObject_Call(__pyx_t_4, __pyx_t_8, NULL);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-          if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
-          __Pyx_GOTREF(__pyx_t_16);
-          __pyx_t_15 = __Pyx_PyObject_IsTrue(__pyx_t_16);
-          __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0;
-          if (unlikely(__pyx_t_15 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
-          __pyx_t_17 = (!__pyx_t_15);
-          if (__pyx_t_17) {
+          if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __Pyx_GOTREF(__pyx_t_10);
+          __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10);
+          __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+          if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __pyx_t_11 = (!__pyx_t_9);
+          if (__pyx_t_11) {
+            __Pyx_GIVEREF(__pyx_t_3);
             __Pyx_GIVEREF(__pyx_t_2);
             __Pyx_GIVEREF(__pyx_t_1);
-            __Pyx_GIVEREF(__pyx_t_10);
-            __Pyx_ErrRestore(__pyx_t_2, __pyx_t_1, __pyx_t_10);
-            __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_10 = 0; 
-            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
-            goto __pyx_L24;
+            __Pyx_ErrRestore(__pyx_t_3, __pyx_t_2, __pyx_t_1);
+            __pyx_t_3 = 0; __pyx_t_2 = 0; __pyx_t_1 = 0; 
+            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+            goto __pyx_L18;
           }
-          __pyx_L24:;
-          __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0;
+          __pyx_L18:;
+          __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+          __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-          __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
           goto __pyx_L8_exception_handled;
         }
         __pyx_L9_except_error:;
@@ -7774,20 +8061,20 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data
     }
     /*finally:*/ {
       if (__pyx_t_4) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_17, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_20, NULL);
         __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
-        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
-        __pyx_t_17 = __Pyx_PyObject_IsTrue(__pyx_t_7);
+        __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7);
         __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
-        if (unlikely(__pyx_t_17 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       }
     }
-    goto __pyx_L25;
+    goto __pyx_L19;
     __pyx_L3_error:;
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
     goto __pyx_L1_error;
-    __pyx_L25:;
+    __pyx_L19:;
   }
 
   __pyx_r = Py_None; __Pyx_INCREF(Py_None);
@@ -7796,13 +8083,327 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data
   __Pyx_XDECREF(__pyx_t_1);
   __Pyx_XDECREF(__pyx_t_2);
   __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_data);
+  __Pyx_DECREF(((PyObject *)__pyx_cur_scope));
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_3_sa_9DataArray_21read_text_data(PyObject *__pyx_v_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_21read_text_data(PyObject *__pyx_v_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("read_text_data (wrapper)", 0);
+  __pyx_r = __pyx_pf_3_sa_9DataArray_20read_text_data(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_data));
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":76
+ *             self.read_text_data(data)
+ * 
+ *     def read_text_data(self, data):             # <<<<<<<<<<<<<<
+ *         cdef int word_count = 0
+ *         for line_num, line in enumerate(data):
+ */
+
+static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_data) {
+  int __pyx_v_word_count;
+  PyObject *__pyx_v_line_num = NULL;
+  PyObject *__pyx_v_line = NULL;
+  PyObject *__pyx_v_word = NULL;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  Py_ssize_t __pyx_t_3;
+  PyObject *(*__pyx_t_4)(PyObject *);
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  Py_ssize_t __pyx_t_7;
+  PyObject *(*__pyx_t_8)(PyObject *);
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("read_text_data", 0);
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":77
+ * 
+ *     def read_text_data(self, data):
+ *         cdef int word_count = 0             # <<<<<<<<<<<<<<
+ *         for line_num, line in enumerate(data):
+ *             self.sent_index.append(word_count)
+ */
+  __pyx_v_word_count = 0;
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":78
+ *     def read_text_data(self, data):
+ *         cdef int word_count = 0
+ *         for line_num, line in enumerate(data):             # <<<<<<<<<<<<<<
+ *             self.sent_index.append(word_count)
+ *             for word in line.split():
+ */
+  __Pyx_INCREF(__pyx_int_0);
+  __pyx_t_1 = __pyx_int_0;
+  if (PyList_CheckExact(__pyx_v_data) || PyTuple_CheckExact(__pyx_v_data)) {
+    __pyx_t_2 = __pyx_v_data; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0;
+    __pyx_t_4 = NULL;
+  } else {
+    __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_data); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext;
+  }
+  for (;;) {
+    if (!__pyx_t_4 && PyList_CheckExact(__pyx_t_2)) {
+      if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break;
+      #if CYTHON_COMPILING_IN_CPYTHON
+      __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++;
+      #else
+      __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      #endif
+    } else if (!__pyx_t_4 && PyTuple_CheckExact(__pyx_t_2)) {
+      if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
+      #if CYTHON_COMPILING_IN_CPYTHON
+      __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++;
+      #else
+      __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      #endif
+    } else {
+      __pyx_t_5 = __pyx_t_4(__pyx_t_2);
+      if (unlikely(!__pyx_t_5)) {
+        if (PyErr_Occurred()) {
+          if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        }
+        break;
+      }
+      __Pyx_GOTREF(__pyx_t_5);
+    }
+    __Pyx_XDECREF(__pyx_v_line);
+    __pyx_v_line = __pyx_t_5;
+    __pyx_t_5 = 0;
+    __Pyx_INCREF(__pyx_t_1);
+    __Pyx_XDECREF(__pyx_v_line_num);
+    __pyx_v_line_num = __pyx_t_1;
+    __pyx_t_5 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_1);
+    __pyx_t_1 = __pyx_t_5;
+    __pyx_t_5 = 0;
+
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":79
+ *         cdef int word_count = 0
+ *         for line_num, line in enumerate(data):
+ *             self.sent_index.append(word_count)             # <<<<<<<<<<<<<<
+ *             for word in line.split():
+ *                 self.data.append(self.get_id(word))
+ */
+    __pyx_t_5 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":80
+ *         for line_num, line in enumerate(data):
+ *             self.sent_index.append(word_count)
+ *             for word in line.split():             # <<<<<<<<<<<<<<
+ *                 self.data.append(self.get_id(word))
+ *                 if self.use_sent_id:
+ */
+    __pyx_t_6 = PyObject_GetAttr(__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_5 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (PyList_CheckExact(__pyx_t_5) || PyTuple_CheckExact(__pyx_t_5)) {
+      __pyx_t_6 = __pyx_t_5; __Pyx_INCREF(__pyx_t_6); __pyx_t_7 = 0;
+      __pyx_t_8 = NULL;
+    } else {
+      __pyx_t_7 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
+      __pyx_t_8 = Py_TYPE(__pyx_t_6)->tp_iternext;
+    }
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    for (;;) {
+      if (!__pyx_t_8 && PyList_CheckExact(__pyx_t_6)) {
+        if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_6)) break;
+        #if CYTHON_COMPILING_IN_CPYTHON
+        __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++;
+        #else
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+        #endif
+      } else if (!__pyx_t_8 && PyTuple_CheckExact(__pyx_t_6)) {
+        if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_6)) break;
+        #if CYTHON_COMPILING_IN_CPYTHON
+        __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++;
+        #else
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+        #endif
+      } else {
+        __pyx_t_5 = __pyx_t_8(__pyx_t_6);
+        if (unlikely(!__pyx_t_5)) {
+          if (PyErr_Occurred()) {
+            if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
+            else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          }
+          break;
+        }
+        __Pyx_GOTREF(__pyx_t_5);
+      }
+      __Pyx_XDECREF(__pyx_v_word);
+      __pyx_v_word = __pyx_t_5;
+      __pyx_t_5 = 0;
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":81
+ *             self.sent_index.append(word_count)
+ *             for word in line.split():
+ *                 self.data.append(self.get_id(word))             # <<<<<<<<<<<<<<
+ *                 if self.use_sent_id:
+ *                     self.sent_id.append(line_num)
+ */
+      __pyx_t_5 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_id); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __Pyx_INCREF(__pyx_v_word);
+      PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_v_word);
+      __Pyx_GIVEREF(__pyx_v_word);
+      __pyx_t_10 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0;
+      __pyx_t_9 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_t_10); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":82
+ *             for word in line.split():
+ *                 self.data.append(self.get_id(word))
+ *                 if self.use_sent_id:             # <<<<<<<<<<<<<<
+ *                     self.sent_id.append(line_num)
+ *                 word_count = word_count + 1
+ */
+      if (__pyx_v_self->use_sent_id) {
+
+        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":83
+ *                 self.data.append(self.get_id(word))
+ *                 if self.use_sent_id:
+ *                     self.sent_id.append(line_num)             # <<<<<<<<<<<<<<
+ *                 word_count = word_count + 1
+ *             self.data.append(1)
+ */
+        __pyx_t_9 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_9);
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+        goto __pyx_L7;
+      }
+      __pyx_L7:;
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":84
+ *                 if self.use_sent_id:
+ *                     self.sent_id.append(line_num)
+ *                 word_count = word_count + 1             # <<<<<<<<<<<<<<
+ *             self.data.append(1)
+ *             if self.use_sent_id:
+ */
+      __pyx_v_word_count = (__pyx_v_word_count + 1);
+    }
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":85
+ *                     self.sent_id.append(line_num)
+ *                 word_count = word_count + 1
+ *             self.data.append(1)             # <<<<<<<<<<<<<<
+ *             if self.use_sent_id:
+ *                 self.sent_id.append(line_num)
+ */
+    __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":86
+ *                 word_count = word_count + 1
+ *             self.data.append(1)
+ *             if self.use_sent_id:             # <<<<<<<<<<<<<<
+ *                 self.sent_id.append(line_num)
+ *             word_count = word_count + 1
+ */
+    if (__pyx_v_self->use_sent_id) {
+
+      /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":87
+ *             self.data.append(1)
+ *             if self.use_sent_id:
+ *                 self.sent_id.append(line_num)             # <<<<<<<<<<<<<<
+ *             word_count = word_count + 1
+ *         self.data.append(0)
+ */
+      __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      goto __pyx_L8;
+    }
+    __pyx_L8:;
+
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":88
+ *             if self.use_sent_id:
+ *                 self.sent_id.append(line_num)
+ *             word_count = word_count + 1             # <<<<<<<<<<<<<<
+ *         self.data.append(0)
+ *         self.sent_index.append(word_count)
+ */
+    __pyx_v_word_count = (__pyx_v_word_count + 1);
+  }
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":89
+ *                 self.sent_id.append(line_num)
+ *             word_count = word_count + 1
+ *         self.data.append(0)             # <<<<<<<<<<<<<<
+ *         self.sent_index.append(word_count)
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 89; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":90
+ *             word_count = word_count + 1
+ *         self.data.append(0)
+ *         self.sent_index.append(word_count)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_9);
   __Pyx_XDECREF(__pyx_t_10);
-  __Pyx_XDECREF(__pyx_t_13);
-  __Pyx_XDECREF(__pyx_t_14);
-  __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_AddTraceback("_sa.DataArray.read_text_data", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   __pyx_L0:;
-  __Pyx_XDECREF(__pyx_v_fp);
   __Pyx_XDECREF(__pyx_v_line_num);
   __Pyx_XDECREF(__pyx_v_line);
   __Pyx_XDECREF(__pyx_v_word);
@@ -7812,14 +8413,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data
 }
 
 /* Python wrapper */
-static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
-static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
+static PyObject *__pyx_pw_3_sa_9DataArray_23read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_23read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
   char *__pyx_v_filename;
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("read_binary (wrapper)", 0);
   assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L3_error:;
@@ -7827,26 +8428,26 @@ static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self,
   __Pyx_RefNannyFinishContext();
   return NULL;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_9DataArray_18read_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
+  __pyx_r = __pyx_pf_3_sa_9DataArray_22read_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":81
- *             self.sent_index.append(word_count)
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":93
+ * 
  * 
  *     def read_binary(self, char* filename):             # <<<<<<<<<<<<<<
  *         cdef FILE* f
  *         f = fopen(filename, "r")
  */
 
-static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
+static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
   FILE *__pyx_v_f;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("read_binary", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":83
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":95
  *     def read_binary(self, char* filename):
  *         cdef FILE* f
  *         f = fopen(filename, "r")             # <<<<<<<<<<<<<<
@@ -7855,7 +8456,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da
  */
   __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":84
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":96
  *         cdef FILE* f
  *         f = fopen(filename, "r")
  *         self.read_handle(f)             # <<<<<<<<<<<<<<
@@ -7864,7 +8465,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da
  */
   ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->read_handle(__pyx_v_self, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":85
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":97
  *         f = fopen(filename, "r")
  *         self.read_handle(f)
  *         fclose(f)             # <<<<<<<<<<<<<<
@@ -7879,7 +8480,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":87
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":99
  *         fclose(f)
  * 
  *     cdef void read_handle(self, FILE* f):             # <<<<<<<<<<<<<<
@@ -7904,7 +8505,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("read_handle", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":92
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":104
  *         cdef char* c_word
  *         cdef bytes py_word
  *         self.data.read_handle(f)             # <<<<<<<<<<<<<<
@@ -7913,7 +8514,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->read_handle(__pyx_v_self->data, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":93
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":105
  *         cdef bytes py_word
  *         self.data.read_handle(f)
  *         self.sent_index.read_handle(f)             # <<<<<<<<<<<<<<
@@ -7922,7 +8523,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->read_handle(__pyx_v_self->sent_index, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":94
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":106
  *         self.data.read_handle(f)
  *         self.sent_index.read_handle(f)
  *         self.sent_id.read_handle(f)             # <<<<<<<<<<<<<<
@@ -7931,7 +8532,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->read_handle(__pyx_v_self->sent_id, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":95
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":107
  *         self.sent_index.read_handle(f)
  *         self.sent_id.read_handle(f)
  *         fread(&(num_words), sizeof(int), 1, f)             # <<<<<<<<<<<<<<
@@ -7940,7 +8541,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
   fread((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":96
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":108
  *         self.sent_id.read_handle(f)
  *         fread(&(num_words), sizeof(int), 1, f)
  *         for i in range(num_words):             # <<<<<<<<<<<<<<
@@ -7951,7 +8552,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
   for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
     __pyx_v_i = __pyx_t_2;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":97
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":109
  *         fread(&(num_words), sizeof(int), 1, f)
  *         for i in range(num_words):
  *             fread(&(word_len), sizeof(int), 1, f)             # <<<<<<<<<<<<<<
@@ -7960,7 +8561,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
     fread((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f);
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":98
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":110
  *         for i in range(num_words):
  *             fread(&(word_len), sizeof(int), 1, f)
  *             c_word = <char*> malloc (word_len * sizeof(char))             # <<<<<<<<<<<<<<
@@ -7969,7 +8570,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
     __pyx_v_c_word = ((char *)malloc((__pyx_v_word_len * (sizeof(char)))));
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":99
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":111
  *             fread(&(word_len), sizeof(int), 1, f)
  *             c_word = <char*> malloc (word_len * sizeof(char))
  *             fread(c_word, sizeof(char), word_len, f)             # <<<<<<<<<<<<<<
@@ -7978,20 +8579,20 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
     fread(__pyx_v_c_word, (sizeof(char)), __pyx_v_word_len, __pyx_v_f);
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":100
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":112
  *             c_word = <char*> malloc (word_len * sizeof(char))
  *             fread(c_word, sizeof(char), word_len, f)
  *             py_word = c_word             # <<<<<<<<<<<<<<
  *             free(c_word)
  *             self.word2id[py_word] = len(self.id2word)
  */
-    __pyx_t_3 = PyBytes_FromString(__pyx_v_c_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyBytes_FromString(__pyx_v_c_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
     __Pyx_XDECREF(((PyObject *)__pyx_v_py_word));
     __pyx_v_py_word = __pyx_t_3;
     __pyx_t_3 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":101
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":113
  *             fread(c_word, sizeof(char), word_len, f)
  *             py_word = c_word
  *             free(c_word)             # <<<<<<<<<<<<<<
@@ -8000,7 +8601,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
     free(__pyx_v_c_word);
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":102
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":114
  *             py_word = c_word
  *             free(c_word)
  *             self.word2id[py_word] = len(self.id2word)             # <<<<<<<<<<<<<<
@@ -8009,26 +8610,26 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
     __pyx_t_3 = __pyx_v_self->id2word;
     __Pyx_INCREF(__pyx_t_3);
-    __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-    __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    if (PyObject_SetItem(__pyx_v_self->word2id, ((PyObject *)__pyx_v_py_word), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    if (PyObject_SetItem(__pyx_v_self->word2id, ((PyObject *)__pyx_v_py_word), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":103
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":115
  *             free(c_word)
  *             self.word2id[py_word] = len(self.id2word)
  *             self.id2word.append(py_word)             # <<<<<<<<<<<<<<
  *         if len(self.sent_id) == 0:
  *             self.use_sent_id = False
  */
-    __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, ((PyObject *)__pyx_v_py_word)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, ((PyObject *)__pyx_v_py_word)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
   }
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":104
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":116
  *             self.word2id[py_word] = len(self.id2word)
  *             self.id2word.append(py_word)
  *         if len(self.sent_id) == 0:             # <<<<<<<<<<<<<<
@@ -8037,12 +8638,12 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
  */
   __pyx_t_3 = ((PyObject *)__pyx_v_self->sent_id);
   __Pyx_INCREF(__pyx_t_3);
-  __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
   __pyx_t_5 = (__pyx_t_4 == 0);
   if (__pyx_t_5) {
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":105
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":117
  *             self.id2word.append(py_word)
  *         if len(self.sent_id) == 0:
  *             self.use_sent_id = False             # <<<<<<<<<<<<<<
@@ -8054,7 +8655,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
   }
   /*else*/ {
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":107
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":119
  *             self.use_sent_id = False
  *         else:
  *             self.use_sent_id = True             # <<<<<<<<<<<<<<
@@ -8074,7 +8675,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray
   __Pyx_RefNannyFinishContext();
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":109
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":121
  *             self.use_sent_id = True
  * 
  *     cdef void write_handle(self, FILE* f):             # <<<<<<<<<<<<<<
@@ -8098,7 +8699,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("write_handle", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":113
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":125
  *         cdef int num_words
  *         cdef char* c_word
  *         self.data.write_handle(f)             # <<<<<<<<<<<<<<
@@ -8107,7 +8708,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->write_handle(__pyx_v_self->data, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":114
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":126
  *         cdef char* c_word
  *         self.data.write_handle(f)
  *         self.sent_index.write_handle(f)             # <<<<<<<<<<<<<<
@@ -8116,7 +8717,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->write_handle(__pyx_v_self->sent_index, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":115
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":127
  *         self.data.write_handle(f)
  *         self.sent_index.write_handle(f)
  *         self.sent_id.write_handle(f)             # <<<<<<<<<<<<<<
@@ -8125,7 +8726,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
   ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->write_handle(__pyx_v_self->sent_id, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":116
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":128
  *         self.sent_index.write_handle(f)
  *         self.sent_id.write_handle(f)
  *         num_words = len(self.id2word) - 2             # <<<<<<<<<<<<<<
@@ -8134,11 +8735,11 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
   __pyx_t_1 = __pyx_v_self->id2word;
   __Pyx_INCREF(__pyx_t_1);
-  __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_v_num_words = (__pyx_t_2 - 2);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":117
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":129
  *         self.sent_id.write_handle(f)
  *         num_words = len(self.id2word) - 2
  *         fwrite(&(num_words), sizeof(int), 1, f)             # <<<<<<<<<<<<<<
@@ -8147,20 +8748,20 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
   fwrite((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":118
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":130
  *         num_words = len(self.id2word) - 2
  *         fwrite(&(num_words), sizeof(int), 1, f)
  *         for word in self.id2word[2:]:             # <<<<<<<<<<<<<<
  *             c_word = word
  *             word_len = strlen(c_word) + 1
  */
-  __pyx_t_1 = __Pyx_PySequence_GetSlice(__pyx_v_self->id2word, 2, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = __Pyx_PySequence_GetSlice(__pyx_v_self->id2word, 2, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   if (PyList_CheckExact(__pyx_t_1) || PyTuple_CheckExact(__pyx_t_1)) {
     __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_2 = 0;
     __pyx_t_4 = NULL;
   } else {
-    __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __pyx_t_4 = Py_TYPE(__pyx_t_3)->tp_iternext;
   }
@@ -8171,21 +8772,21 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++;
       #else
-      __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else if (!__pyx_t_4 && PyTuple_CheckExact(__pyx_t_3)) {
       if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++;
       #else
-      __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else {
       __pyx_t_1 = __pyx_t_4(__pyx_t_3);
       if (unlikely(!__pyx_t_1)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         }
         break;
       }
@@ -8195,17 +8796,17 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
     __pyx_v_word = __pyx_t_1;
     __pyx_t_1 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":119
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":131
  *         fwrite(&(num_words), sizeof(int), 1, f)
  *         for word in self.id2word[2:]:
  *             c_word = word             # <<<<<<<<<<<<<<
  *             word_len = strlen(c_word) + 1
  *             fwrite(&(word_len), sizeof(int), 1, f)
  */
-    __pyx_t_5 = PyBytes_AsString(__pyx_v_word); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyBytes_AsString(__pyx_v_word); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_v_c_word = __pyx_t_5;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":120
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":132
  *         for word in self.id2word[2:]:
  *             c_word = word
  *             word_len = strlen(c_word) + 1             # <<<<<<<<<<<<<<
@@ -8214,7 +8815,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
     __pyx_v_word_len = (strlen(__pyx_v_c_word) + 1);
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":121
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133
  *             c_word = word
  *             word_len = strlen(c_word) + 1
  *             fwrite(&(word_len), sizeof(int), 1, f)             # <<<<<<<<<<<<<<
@@ -8223,7 +8824,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
  */
     fwrite((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f);
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":122
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":134
  *             word_len = strlen(c_word) + 1
  *             fwrite(&(word_len), sizeof(int), 1, f)
  *             fwrite(c_word, sizeof(char), word_len, f)             # <<<<<<<<<<<<<<
@@ -8245,14 +8846,14 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray
 }
 
 /* Python wrapper */
-static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
-static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
+static PyObject *__pyx_pw_3_sa_9DataArray_25write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_25write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
   char *__pyx_v_filename;
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("write_binary (wrapper)", 0);
   assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L3_error:;
@@ -8260,12 +8861,12 @@ static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self,
   __Pyx_RefNannyFinishContext();
   return NULL;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_9DataArray_20write_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
+  __pyx_r = __pyx_pf_3_sa_9DataArray_24write_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":124
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136
  *             fwrite(c_word, sizeof(char), word_len, f)
  * 
  *     def write_binary(self, char* filename):             # <<<<<<<<<<<<<<
@@ -8273,13 +8874,13 @@ static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self,
  *         f = fopen(filename, "w")
  */
 
-static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
+static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
   FILE *__pyx_v_f;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("write_binary", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":126
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":138
  *     def write_binary(self, char* filename):
  *         cdef FILE* f
  *         f = fopen(filename, "w")             # <<<<<<<<<<<<<<
@@ -8288,7 +8889,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D
  */
   __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":127
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139
  *         cdef FILE* f
  *         f = fopen(filename, "w")
  *         self.write_handle(f)             # <<<<<<<<<<<<<<
@@ -8297,7 +8898,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D
  */
   ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->write_handle(__pyx_v_self, __pyx_v_f);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":128
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":140
  *         f = fopen(filename, "w")
  *         self.write_handle(f)
  *         fclose(f)             # <<<<<<<<<<<<<<
@@ -8313,17 +8914,17 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D
 }
 
 /* Python wrapper */
-static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f); /*proto*/
-static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f) {
+static PyObject *__pyx_pw_3_sa_9DataArray_27write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_27write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f) {
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("write_enhanced_handle (wrapper)", 0);
-  __pyx_r = __pyx_pf_3_sa_9DataArray_22write_enhanced_handle(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_f));
+  __pyx_r = __pyx_pf_3_sa_9DataArray_26write_enhanced_handle(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_f));
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":130
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142
  *         fclose(f)
  * 
  *     def write_enhanced_handle(self, f):             # <<<<<<<<<<<<<<
@@ -8331,7 +8932,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__py
  *             f.write("%d " %i)
  */
 
-static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f) {
+static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f) {
   PyObject *__pyx_v_i = NULL;
   PyObject *__pyx_v_word = NULL;
   PyObject *__pyx_r = NULL;
@@ -8347,7 +8948,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("write_enhanced_handle", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":131
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":143
  * 
  *     def write_enhanced_handle(self, f):
  *         for i in self.data:             # <<<<<<<<<<<<<<
@@ -8358,7 +8959,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_t_1 = ((PyObject *)__pyx_v_self->data); __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
     __pyx_t_3 = NULL;
   } else {
-    __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext;
   }
@@ -8368,21 +8969,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++;
       #else
-      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) {
       if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++;
       #else
-      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else {
       __pyx_t_4 = __pyx_t_3(__pyx_t_1);
       if (unlikely(!__pyx_t_4)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         }
         break;
       }
@@ -8392,23 +8993,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_v_i = __pyx_t_4;
     __pyx_t_4 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":132
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144
  *     def write_enhanced_handle(self, f):
  *         for i in self.data:
  *             f.write("%d " %i)             # <<<<<<<<<<<<<<
  *         f.write("\n")
  *         for i in self.sent_index:
  */
-    __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_5));
-    __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
     PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_5));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_5));
     __pyx_t_5 = 0;
-    __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_5);
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
@@ -8416,21 +9017,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
   }
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145
  *         for i in self.data:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for i in self.sent_index:
  *             f.write("%d " %i)
  */
-  __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_22), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_5);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":134
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":146
  *             f.write("%d " %i)
  *         f.write("\n")
  *         for i in self.sent_index:             # <<<<<<<<<<<<<<
@@ -8441,7 +9042,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_t_5 = ((PyObject *)__pyx_v_self->sent_index); __Pyx_INCREF(__pyx_t_5); __pyx_t_2 = 0;
     __pyx_t_3 = NULL;
   } else {
-    __pyx_t_2 = -1; __pyx_t_5 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = -1; __pyx_t_5 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_5);
     __pyx_t_3 = Py_TYPE(__pyx_t_5)->tp_iternext;
   }
@@ -8451,21 +9052,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++;
       #else
-      __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_5)) {
       if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_5)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++;
       #else
-      __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else {
       __pyx_t_1 = __pyx_t_3(__pyx_t_5);
       if (unlikely(!__pyx_t_1)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         }
         break;
       }
@@ -8475,23 +9076,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_v_i = __pyx_t_1;
     __pyx_t_1 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":135
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":147
  *         f.write("\n")
  *         for i in self.sent_index:
  *             f.write("%d " %i)             # <<<<<<<<<<<<<<
  *         f.write("\n")
  *         for i in self.sent_id:
  */
-    __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_6));
-    __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
     PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_6));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_6));
     __pyx_t_6 = 0;
-    __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
@@ -8499,21 +9100,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
   }
   __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":148
  *         for i in self.sent_index:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for i in self.sent_id:
  *             f.write("%d " %i)
  */
-  __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_5);
-  __pyx_t_6 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_k_tuple_20), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_6 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_k_tuple_23), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
   __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
   __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":137
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":149
  *             f.write("%d " %i)
  *         f.write("\n")
  *         for i in self.sent_id:             # <<<<<<<<<<<<<<
@@ -8524,7 +9125,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_t_6 = ((PyObject *)__pyx_v_self->sent_id); __Pyx_INCREF(__pyx_t_6); __pyx_t_2 = 0;
     __pyx_t_3 = NULL;
   } else {
-    __pyx_t_2 = -1; __pyx_t_6 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_id)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = -1; __pyx_t_6 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_id)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
     __pyx_t_3 = Py_TYPE(__pyx_t_6)->tp_iternext;
   }
@@ -8534,21 +9135,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++;
       #else
-      __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_6)) {
       if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_6)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++;
       #else
-      __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else {
       __pyx_t_5 = __pyx_t_3(__pyx_t_6);
       if (unlikely(!__pyx_t_5)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         }
         break;
       }
@@ -8558,23 +9159,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_v_i = __pyx_t_5;
     __pyx_t_5 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":138
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":150
  *         f.write("\n")
  *         for i in self.sent_id:
  *             f.write("%d " %i)             # <<<<<<<<<<<<<<
  *         f.write("\n")
  *         for word in self.id2word:
  */
-    __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_5);
-    __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_4));
-    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_4));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_4));
     __pyx_t_4 = 0;
-    __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
@@ -8582,21 +9183,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
   }
   __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":151
  *         for i in self.sent_id:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for word in self.id2word:
  *             f.write("%s %d " % (word, self.word2id[word]))
  */
-  __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
-  __pyx_t_4 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_k_tuple_21), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_4 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_k_tuple_24), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
   __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
   __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":140
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":152
  *             f.write("%d " %i)
  *         f.write("\n")
  *         for word in self.id2word:             # <<<<<<<<<<<<<<
@@ -8607,7 +9208,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_t_4 = __pyx_v_self->id2word; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0;
     __pyx_t_3 = NULL;
   } else {
-    __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_self->id2word); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_self->id2word); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __pyx_t_3 = Py_TYPE(__pyx_t_4)->tp_iternext;
   }
@@ -8617,21 +9218,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_6 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++;
       #else
-      __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_4)) {
       if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
       #if CYTHON_COMPILING_IN_CPYTHON
       __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++;
       #else
-      __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+      __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
       #endif
     } else {
       __pyx_t_6 = __pyx_t_3(__pyx_t_4);
       if (unlikely(!__pyx_t_6)) {
         if (PyErr_Occurred()) {
           if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear();
-          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         }
         break;
       }
@@ -8641,18 +9242,18 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     __pyx_v_word = __pyx_t_6;
     __pyx_t_6 = 0;
 
-    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":141
+    /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":153
  *         f.write("\n")
  *         for word in self.id2word:
  *             f.write("%s %d " % (word, self.word2id[word]))             # <<<<<<<<<<<<<<
  *         f.write("\n")
  * 
  */
-    __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
-    __pyx_t_1 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_5);
     __Pyx_INCREF(__pyx_v_word);
     PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_word);
@@ -8660,15 +9261,15 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
     PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_1);
     __Pyx_GIVEREF(__pyx_t_1);
     __pyx_t_1 = 0;
-    __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_22), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_25), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_1));
     __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
-    __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_5);
     PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_1));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
     __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
     __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
@@ -8676,16 +9277,16 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
   }
   __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":154
  *         for word in self.id2word:
  *             f.write("%s %d " % (word, self.word2id[word]))
  *         f.write("\n")             # <<<<<<<<<<<<<<
  * 
  *     def write_enhanced(self, char* filename):
  */
-  __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
-  __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_23), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_26), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -8708,14 +9309,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o
 }
 
 /* Python wrapper */
-static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
-static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
+static PyObject *__pyx_pw_3_sa_9DataArray_29write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
+static PyObject *__pyx_pw_3_sa_9DataArray_29write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
   char *__pyx_v_filename;
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("write_enhanced (wrapper)", 0);
   assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L3_error:;
@@ -8723,12 +9324,12 @@ static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_sel
   __Pyx_RefNannyFinishContext();
   return NULL;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_9DataArray_24write_enhanced(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
+  __pyx_r = __pyx_pf_3_sa_9DataArray_28write_enhanced(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename));
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144
+/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":156
  *         f.write("\n")
  * 
  *     def write_enhanced(self, char* filename):             # <<<<<<<<<<<<<<
@@ -8736,7 +9337,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_sel
  *             self.write_enhanced_handle(self, f)
  */
 
-static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
+static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) {
   PyObject *__pyx_v_f = NULL;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
@@ -8756,16 +9357,16 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("write_enhanced", 0);
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157
  * 
  *     def write_enhanced(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
  *             self.write_enhanced_handle(self, f)
  */
   /*with:*/ {
-    __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-    __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1));
     __Pyx_GIVEREF(((PyObject *)__pyx_t_1));
@@ -8773,14 +9374,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
     PyTuple_SET_ITEM(__pyx_t_2, 1, ((PyObject *)__pyx_n_s__w));
     __Pyx_GIVEREF(((PyObject *)__pyx_n_s__w));
     __pyx_t_1 = 0;
-    __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
-    __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -8795,14 +9396,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
           __pyx_v_f = __pyx_t_4;
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
 
-          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":146
+          /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":158
  *     def write_enhanced(self, char* filename):
  *         with open(filename, "w") as f:
  *             self.write_enhanced_handle(self, f)             # <<<<<<<<<<<<<<
  */
-          __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_24); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_27); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_INCREF(((PyObject *)__pyx_v_self));
           PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self));
@@ -8810,7 +9411,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
           __Pyx_INCREF(__pyx_v_f);
           PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_f);
           __Pyx_GIVEREF(__pyx_v_f);
-          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
           __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
@@ -8825,7 +9426,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
         __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145
+        /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157
  * 
  *     def write_enhanced(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
@@ -8833,11 +9434,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
  */
         /*except:*/ {
           __Pyx_AddTraceback("_sa.DataArray.write_enhanced", __pyx_clineno, __pyx_lineno, __pyx_filename);
-          if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_4) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_4) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_8);
           __Pyx_INCREF(__pyx_t_2);
           PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_2);
@@ -8850,11 +9451,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
           __Pyx_GIVEREF(__pyx_t_4);
           __pyx_t_10 = PyObject_Call(__pyx_t_3, __pyx_t_8, NULL);
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-          if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __Pyx_GOTREF(__pyx_t_10);
           __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10);
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-          if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+          if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
           __pyx_t_11 = (!__pyx_t_9);
           if (__pyx_t_11) {
             __Pyx_GIVEREF(__pyx_t_2);
@@ -8862,7 +9463,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
             __Pyx_GIVEREF(__pyx_t_4);
             __Pyx_ErrRestore(__pyx_t_2, __pyx_t_1, __pyx_t_4);
             __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_4 = 0; 
-            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
+            {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;}
             goto __pyx_L18;
           }
           __pyx_L18:;
@@ -8888,13 +9489,13 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_25, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_28, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
         __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7);
         __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
-        if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       }
     }
     goto __pyx_L19;
@@ -9350,7 +9951,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment *
  *         self.sent_index = IntList(1000,1000)
  *         if from_binary:
  */
-  __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_26), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_29), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_GIVEREF(__pyx_t_1);
   __Pyx_GOTREF(__pyx_v_self->links);
@@ -9365,7 +9966,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment *
  *         if from_binary:
  *             self.read_binary(from_binary)
  */
-  __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_27), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_30), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_GIVEREF(__pyx_t_1);
   __Pyx_GOTREF(__pyx_v_self->sent_index);
@@ -9687,7 +10288,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align
  */
               __pyx_t_3 = PyObject_GetAttr(__pyx_v_pair, __pyx_n_s__split); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_3);
-              __pyx_t_12 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_29), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_12 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_32), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_12);
               __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
               __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -9871,7 +10472,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align
     }
     /*finally:*/ {
       if (__pyx_t_4) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_30, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_33, NULL);
         __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -10174,7 +10775,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali
  */
               __pyx_t_10 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_10);
-              __pyx_t_2 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_k_tuple_31), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+              __pyx_t_2 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_k_tuple_34), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
               __Pyx_GOTREF(__pyx_t_2);
               __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
               __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -10213,7 +10814,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali
             __Pyx_GOTREF(__pyx_t_13);
             __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
             __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0;
-            __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_32), __pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_35), __pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_12));
             __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
             __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -10239,7 +10840,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali
  */
           __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_33), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_36), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -10320,7 +10921,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_34, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_37, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -10591,7 +11192,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa
  */
             __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
-            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_2));
             __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_10);
@@ -10615,7 +11216,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa
  */
           __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_35), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_38), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -10674,7 +11275,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa
  */
             __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_4);
-            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_10));
             __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
@@ -10698,7 +11299,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa
  */
           __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
-          __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_36), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_39), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_10);
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
@@ -10777,7 +11378,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_37, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_40, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -11269,7 +11870,7 @@ static int __pyx_pw_3_sa_5BiLex_1__cinit__(PyObject *__pyx_v_self, PyObject *__p
  *         self.id2eword = []
  */
     values[0] = ((PyObject *)Py_None);
-    values[1] = __pyx_k_38;
+    values[1] = __pyx_k_41;
     values[2] = ((PyObject *)Py_None);
 
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":55
@@ -12220,7 +12821,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL
         __pyx_t_5 = 0;
         __pyx_t_11 = 0;
         __pyx_t_12 = 0;
-        __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_39), ((PyObject *)__pyx_t_13)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_42), ((PyObject *)__pyx_t_13)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(((PyObject *)__pyx_t_12));
         __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0;
         __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -14311,7 +14912,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_
  */
           __pyx_t_12 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__seek); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_12);
-          __pyx_t_1 = PyObject_Call(__pyx_t_12, ((PyObject *)__pyx_k_tuple_40), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyObject_Call(__pyx_t_12, ((PyObject *)__pyx_k_tuple_43), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -14642,7 +15243,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_
     }
     /*finally:*/ {
       if (__pyx_t_4) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_41, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_44, NULL);
         __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -14714,7 +15315,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_
  */
     __pyx_t_20 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_20 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __pyx_t_24 = __Pyx_PyInt_AsInt(__pyx_v_j); if (unlikely((__pyx_t_24 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-    __pyx_t_3 = ((PyObject *)__pyx_kp_s_42);
+    __pyx_t_3 = ((PyObject *)__pyx_kp_s_45);
     __Pyx_INCREF(__pyx_t_3);
     __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, __pyx_t_20, __pyx_t_24, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
@@ -14937,7 +15538,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_
  *         if i == j: #empty interval
  *             return
  */
-    __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_44), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_47), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_Raise(__pyx_t_2, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -15094,7 +15695,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_
  *         self.qsort(p+1,j, pad+"    ")
  * 
  */
-  __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_4 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, __pyx_v_i, __pyx_v_p, __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
@@ -15108,7 +15709,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_
  * 
  * 
  */
-  __pyx_t_4 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_4 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
   __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, (__pyx_v_p + 1), __pyx_v_j, __pyx_t_4); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
@@ -15280,7 +15881,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
  */
             __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
-            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_2));
             __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_10);
@@ -15304,7 +15905,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
  */
           __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_46), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_49), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -15450,7 +16051,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
             __Pyx_INCREF(__pyx_v_s2);
             PyTuple_SET_ITEM(__pyx_t_11, 2, __pyx_v_s2);
             __Pyx_GIVEREF(__pyx_v_s2);
-            __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_47), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_50), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_1));
             __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0;
             __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -15475,7 +16076,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
  */
           __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
-          __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_48), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_51), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -15552,7 +16153,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
             __Pyx_INCREF(__pyx_v_w);
             PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_w);
             __Pyx_GIVEREF(__pyx_v_w);
-            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_49), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_52), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_10));
             __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
             __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -15578,7 +16179,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
  */
           __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_50), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_53), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -15655,7 +16256,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
             __Pyx_INCREF(__pyx_v_w);
             PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_w);
             __Pyx_GIVEREF(__pyx_v_w);
-            __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_49), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_52), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_11));
             __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
             __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -15681,7 +16282,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
  */
           __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
-          __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_51), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_54), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -15762,7 +16363,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_52, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_55, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -16448,7 +17049,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex *
             __Pyx_GIVEREF(__pyx_v_score2);
             __pyx_t_4 = 0;
             __pyx_t_2 = 0;
-            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_53), ((PyObject *)__pyx_t_12)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_56), ((PyObject *)__pyx_t_12)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_2));
             __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0;
             __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
@@ -16551,7 +17152,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex *
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_54, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_57, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -17543,7 +18144,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_10__str__(struct __pyx_obj_3_sa_BitSet *_
   __Pyx_XDECREF(__pyx_r);
   __pyx_t_1 = ((PyObject *)__pyx_f_3_sa_dec2bin(__pyx_v_self->b->bitset)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_55)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_58)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(((PyObject *)__pyx_t_2));
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_t_1 = PyInt_FromLong(__pyx_v_self->b->size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -17594,7 +18195,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_10__str__(struct __pyx_obj_3_sa_BitSet *_
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-  __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_56)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_r = __pyx_t_2;
@@ -17848,8 +18449,8 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) {
  *     cdef unsigned d
  *     for d in range(MIN_BOTTOM_SIZE):
  */
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_42));
-  __pyx_v_result = __pyx_kp_s_42;
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_45));
+  __pyx_v_result = __pyx_kp_s_45;
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":160
  *     cdef str result = ""
@@ -19908,7 +20509,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self,
   __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_58), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_61), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -20134,7 +20735,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self,
   __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_60), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_63), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -20184,14 +20785,14 @@ static PyObject *__pyx_pw_3_sa_3LCP_3compute_stats(PyObject *__pyx_v_self, PyObj
  */
 
 static PyObject *__pyx_pf_3_sa_3LCP_2compute_stats(struct __pyx_obj_3_sa_LCP *__pyx_v_self, int __pyx_v_max_n) {
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *__pyx_cur_scope;
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *__pyx_cur_scope;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("compute_stats", 0);
-  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)__pyx_ptype_3_sa___pyx_scope_struct__compute_stats->tp_new(__pyx_ptype_3_sa___pyx_scope_struct__compute_stats, __pyx_empty_tuple, NULL);
+  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats, __pyx_empty_tuple, NULL);
   if (unlikely(!__pyx_cur_scope)) {
     __Pyx_RefNannyFinishContext();
     return NULL;
@@ -20222,7 +20823,7 @@ static PyObject *__pyx_pf_3_sa_3LCP_2compute_stats(struct __pyx_obj_3_sa_LCP *__
 
 static PyObject *__pyx_gb_3_sa_3LCP_4generator(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */
 {
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)__pyx_generator->closure);
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)__pyx_generator->closure);
   PyObject *__pyx_r = NULL;
   PyObject *__pyx_t_1 = NULL;
   int __pyx_t_2;
@@ -21263,7 +21864,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p
       __Pyx_GIVEREF(__pyx_t_5);
       __pyx_t_2 = 0;
       __pyx_t_5 = 0;
-      __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_61), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_64), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_5));
       __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
       if (unlikely(((PyObject *)__pyx_v_self->id2sym) == Py_None)) {
@@ -21285,7 +21886,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p
  */
       __pyx_t_5 = PyBytes_FromString(((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_self->__pyx_vtab)->tocat(__pyx_v_self, __pyx_v_sym)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_5));
-      __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_62), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_65), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_6));
       __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
       if (unlikely(((PyObject *)__pyx_v_self->id2sym) == Py_None)) {
@@ -21435,7 +22036,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p
  */
       __pyx_t_6 = PyBytes_FromString(__pyx_v_s); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_6));
-      __pyx_t_7 = PyNumber_Add(((PyObject *)__pyx_kp_s_63), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_7 = PyNumber_Add(((PyObject *)__pyx_kp_s_66), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_7);
       __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
       __pyx_v_s1 = __pyx_t_7;
@@ -22221,7 +22822,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__
  *     def handle(self):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
@@ -22558,7 +23159,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase *
  *     def arity(self):
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_5);
@@ -23551,14 +24152,14 @@ static PyObject *__pyx_pw_3_sa_6Phrase_29__iter__(PyObject *__pyx_v_self) {
  */
 
 static PyObject *__pyx_pf_3_sa_6Phrase_28__iter__(struct __pyx_obj_3_sa_Phrase *__pyx_v_self) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *__pyx_cur_scope;
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *__pyx_cur_scope;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("__iter__", 0);
-  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)__pyx_ptype_3_sa___pyx_scope_struct_1___iter__->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_1___iter__, __pyx_empty_tuple, NULL);
+  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)__pyx_ptype_3_sa___pyx_scope_struct_3___iter__->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_3___iter__, __pyx_empty_tuple, NULL);
   if (unlikely(!__pyx_cur_scope)) {
     __Pyx_RefNannyFinishContext();
     return NULL;
@@ -23588,7 +24189,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_28__iter__(struct __pyx_obj_3_sa_Phrase *
 
 static PyObject *__pyx_gb_3_sa_6Phrase_30generator1(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */
 {
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)__pyx_generator->closure);
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)__pyx_generator->closure);
   PyObject *__pyx_r = NULL;
   int __pyx_t_1;
   PyObject *__pyx_t_2 = NULL;
@@ -24076,7 +24677,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel
  */
     __pyx_t_2 = PyInt_FromLong(__pyx_v_lhs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_65), __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_68), __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -24806,7 +25407,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx
   __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
   __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
-  __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_7);
@@ -24894,7 +25495,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx
       __Pyx_GIVEREF(__pyx_t_6);
       __pyx_t_8 = 0;
       __pyx_t_6 = 0;
-      __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_66), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_69), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_6));
       __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
       __pyx_t_4 = PyList_Append(__pyx_v_alignstr, ((PyObject *)__pyx_t_6)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -24908,7 +25509,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx
  * 
  *         return " ||| ".join(fields)
  */
-    __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_6);
     __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_7);
@@ -24933,7 +25534,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx
  *     property scores:
  */
   __Pyx_XDECREF(__pyx_r);
-  __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_18), __pyx_n_s__join); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_8);
   __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_7);
@@ -27041,7 +27642,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
   {
-    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__fsarray,&__pyx_n_s__from_stats,&__pyx_n_s__from_binary,&__pyx_n_s__precompute_rank,&__pyx_n_s_68,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s_69,&__pyx_n_s__train_min_gap_size,0};
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__fsarray,&__pyx_n_s__from_stats,&__pyx_n_s__from_binary,&__pyx_n_s__precompute_rank,&__pyx_n_s_70,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,0};
     PyObject* values[9] = {0,0,0,0,0,0,0,0,0};
 
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":200
@@ -27100,7 +27701,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO
         }
         case  4:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_68);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_70);
           if (value) { values[4] = value; kw_args--; }
         }
         case  5:
@@ -27115,7 +27716,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO
         }
         case  7:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_69);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_71);
           if (value) { values[7] = value; kw_args--; }
         }
         case  8:
@@ -28321,7 +28922,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_71), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_73), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
@@ -28622,7 +29223,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__info); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-  __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_73), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_75), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
@@ -28765,7 +29366,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__info); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-  __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_75), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_77), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
@@ -29344,9 +29945,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
         __Pyx_GOTREF(__pyx_t_3);
         __pyx_t_8 = PyTuple_New(2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_8);
-        __Pyx_INCREF(((PyObject *)__pyx_kp_s_76));
-        PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_76));
-        __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_76));
+        __Pyx_INCREF(((PyObject *)__pyx_kp_s_78));
+        PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_78));
+        __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_78));
         PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_3);
         __Pyx_GIVEREF(__pyx_t_3);
         __pyx_t_3 = 0;
@@ -29504,7 +30105,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                     J2_set.add(combined_pattern)
  * 
  */
-        __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_77)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_79)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_8);
         __pyx_t_7 = PyNumber_Add(__pyx_t_8, __pyx_v_pattern2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -29613,7 +30214,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                     IJ_set.add(combined_pattern)
  * 
  */
-        __pyx_t_7 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_78)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_7 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_80)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
         __pyx_t_8 = PyNumber_Add(__pyx_t_7, __pyx_v_pattern2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_8);
@@ -29722,7 +30323,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                     IJ_set.add(combined_pattern)
  *                     combined_pattern = pattern2 + (-1,) + pattern1
  */
-        __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_79)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_81)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_8);
         __pyx_t_7 = PyNumber_Add(__pyx_t_8, __pyx_v_pattern2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -29747,7 +30348,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                     IJ_set.add(combined_pattern)
  * 
  */
-        __pyx_t_7 = PyNumber_Add(__pyx_v_pattern2, ((PyObject *)__pyx_k_tuple_80)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __pyx_t_7 = PyNumber_Add(__pyx_v_pattern2, ((PyObject *)__pyx_k_tuple_82)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
         __pyx_t_8 = PyNumber_Add(__pyx_t_7, __pyx_v_pattern1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_8);
@@ -29867,9 +30468,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                 for word_id in pattern:
  *                     if word_id == -1:
  */
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_42));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_45));
       __Pyx_XDECREF(__pyx_v_s);
-      __pyx_v_s = ((PyObject *)__pyx_kp_s_42);
+      __pyx_v_s = ((PyObject *)__pyx_kp_s_45);
 
       /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":418
  *             if pattern not in IJ_set:
@@ -29936,7 +30537,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
  *                     else:
  *                         s = s + darray.id2word[word_id] + " "
  */
-          __pyx_t_3 = PyNumber_Add(__pyx_v_s, ((PyObject *)__pyx_kp_s_81)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          __pyx_t_3 = PyNumber_Add(__pyx_v_s, ((PyObject *)__pyx_kp_s_83)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
           __Pyx_GOTREF(__pyx_t_3);
           __Pyx_DECREF(__pyx_v_s);
           __pyx_v_s = __pyx_t_3;
@@ -29957,7 +30558,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
           __pyx_t_7 = PyNumber_Add(__pyx_v_s, __pyx_t_3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
           __Pyx_GOTREF(__pyx_t_7);
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
-          __pyx_t_3 = PyNumber_Add(__pyx_t_7, ((PyObject *)__pyx_kp_s_64)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+          __pyx_t_3 = PyNumber_Add(__pyx_t_7, ((PyObject *)__pyx_kp_s_67)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
           __Pyx_GOTREF(__pyx_t_3);
           __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
           __Pyx_DECREF(__pyx_v_s);
@@ -29982,9 +30583,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
       __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
       __pyx_t_8 = PyTuple_New(2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_8);
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_82));
-      PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_82));
-      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_82));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_84));
+      PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_84));
+      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_84));
       __Pyx_INCREF(__pyx_v_s);
       PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_v_s);
       __Pyx_GIVEREF(__pyx_v_s);
@@ -30320,9 +30921,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
     __Pyx_GOTREF(__pyx_t_8);
     __pyx_t_7 = PyTuple_New(6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_7);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_83));
-    PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_83));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_83));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_85));
+    PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_85));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_85));
     PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_1);
     __Pyx_GIVEREF(__pyx_t_1);
     PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_3);
@@ -30443,9 +31044,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __Pyx_GOTREF(__pyx_t_8);
   __pyx_t_6 = PyTuple_New(4); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_84));
-  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_84));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_84));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_86));
+  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_86));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_86));
   __Pyx_INCREF(__pyx_v_num_found_patterns);
   PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_v_num_found_patterns);
   __Pyx_GIVEREF(__pyx_v_num_found_patterns);
@@ -30480,9 +31081,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __Pyx_GOTREF(__pyx_t_8);
   __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_7);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_85));
-  PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_85));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_85));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_87));
+  PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_87));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_87));
   PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_8);
   __Pyx_GIVEREF(__pyx_t_8);
   __pyx_t_8 = 0;
@@ -30506,9 +31107,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s
   __Pyx_GOTREF(__pyx_t_8);
   __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_86));
-  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_86));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_86));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_88));
+  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_88));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_88));
   PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_8);
   __Pyx_GIVEREF(__pyx_t_8);
   __pyx_t_8 = 0;
@@ -30570,26 +31171,29 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje
 static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
   PyObject *__pyx_v_from_binary = 0;
   PyObject *__pyx_v_from_text = 0;
+  PyObject *__pyx_v_side = 0;
   int __pyx_r;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
   {
-    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,0};
-    PyObject* values[2] = {0,0};
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,0};
+    PyObject* values[3] = {0,0,0};
 
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":11
  *     cdef IntList ha
  * 
- *     def __cinit__(self, from_binary=None, from_text=None):             # <<<<<<<<<<<<<<
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None):             # <<<<<<<<<<<<<<
  *         self.darray = DataArray()
  *         self.sa = IntList()
  */
     values[0] = ((PyObject *)Py_None);
     values[1] = ((PyObject *)Py_None);
+    values[2] = ((PyObject *)Py_None);
     if (unlikely(__pyx_kwds)) {
       Py_ssize_t kw_args;
       const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
       switch (pos_args) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
         case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
         case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
         case  0: break;
@@ -30607,12 +31211,18 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje
           PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__from_text);
           if (value) { values[1] = value; kw_args--; }
         }
+        case  2:
+        if (kw_args > 0) {
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side);
+          if (value) { values[2] = value; kw_args--; }
+        }
       }
       if (unlikely(kw_args > 0)) {
         if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
       }
     } else {
       switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
         case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
         case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
         case  0: break;
@@ -30621,21 +31231,22 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje
     }
     __pyx_v_from_binary = values[0];
     __pyx_v_from_text = values[1];
+    __pyx_v_side = values[2];
   }
   goto __pyx_L4_argument_unpacking_done;
   __pyx_L5_argtuple_error:;
-  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   __pyx_L3_error:;
   __Pyx_AddTraceback("_sa.SuffixArray.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __Pyx_RefNannyFinishContext();
   return -1;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_11SuffixArray___cinit__(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text);
+  __pyx_r = __pyx_pf_3_sa_11SuffixArray___cinit__(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_side);
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text) {
+static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side) {
   int __pyx_r;
   __Pyx_RefNannyDeclarations
   PyObject *__pyx_t_1 = NULL;
@@ -30649,7 +31260,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":12
  * 
- *     def __cinit__(self, from_binary=None, from_text=None):
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None):
  *         self.darray = DataArray()             # <<<<<<<<<<<<<<
  *         self.sa = IntList()
  *         self.ha = IntList()
@@ -30663,7 +31274,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr
   __pyx_t_1 = 0;
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":13
- *     def __cinit__(self, from_binary=None, from_text=None):
+ *     def __cinit__(self, from_binary=None, from_text=None, side=None):
  *         self.darray = DataArray()
  *         self.sa = IntList()             # <<<<<<<<<<<<<<
  *         self.ha = IntList()
@@ -30707,7 +31318,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr
  *         if from_binary:
  *             self.read_binary(from_binary)             # <<<<<<<<<<<<<<
  *         elif from_text:
- *             self.read_text(from_text)
+ *             self.read_text(from_text, side)
  */
     __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_binary); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
@@ -30728,7 +31339,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr
  *         if from_binary:
  *             self.read_binary(from_binary)
  *         elif from_text:             # <<<<<<<<<<<<<<
- *             self.read_text(from_text)
+ *             self.read_text(from_text, side)
  * 
  */
   __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -30737,17 +31348,20 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":18
  *             self.read_binary(from_binary)
  *         elif from_text:
- *             self.read_text(from_text)             # <<<<<<<<<<<<<<
+ *             self.read_text(from_text, side)             # <<<<<<<<<<<<<<
  * 
  *     def __getitem__(self, i):
  */
     __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_INCREF(__pyx_v_from_text);
     PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_from_text);
     __Pyx_GIVEREF(__pyx_v_from_text);
+    __Pyx_INCREF(__pyx_v_side);
+    PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_v_side);
+    __Pyx_GIVEREF(__pyx_v_side);
     __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
@@ -30782,7 +31396,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_3__getitem__(PyObject *__pyx_v_self
 }
 
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":20
- *             self.read_text(from_text)
+ *             self.read_text(from_text, side)
  * 
  *     def __getitem__(self, i):             # <<<<<<<<<<<<<<
  *         return self.sa.arr[i]
@@ -30995,7 +31609,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_S
  *     def getSentPos(self, loc):
  *         return self.darray.getSentPos(loc)             # <<<<<<<<<<<<<<
  * 
- *     def read_text(self, char* filename):
+ *     def read_text(self, filename, side):
  */
   __Pyx_XDECREF(__pyx_r);
   __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s__getSentPos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -31028,23 +31642,58 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_S
 }
 
 /* Python wrapper */
-static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/
+static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
 static char __pyx_doc_3_sa_11SuffixArray_10read_text[] = "Constructs suffix array using the algorithm\n        of Larsson & Sadahkane (1999)";
-static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) {
-  char *__pyx_v_filename;
+static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_filename = 0;
+  PyObject *__pyx_v_side = 0;
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("read_text (wrapper)", 0);
-  assert(__pyx_arg_filename); {
-    __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__filename,&__pyx_n_s__side,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__filename)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("read_text", 1, 2, 2, 1); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "read_text") < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+    }
+    __pyx_v_filename = values[0];
+    __pyx_v_side = values[1];
   }
   goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("read_text", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
   __pyx_L3_error:;
   __Pyx_AddTraceback("_sa.SuffixArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __Pyx_RefNannyFinishContext();
   return NULL;
   __pyx_L4_argument_unpacking_done:;
-  __pyx_r = __pyx_pf_3_sa_11SuffixArray_10read_text(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), ((char *)__pyx_v_filename));
+  __pyx_r = __pyx_pf_3_sa_11SuffixArray_10read_text(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_filename, __pyx_v_side);
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
@@ -31052,12 +31701,12 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self,
 /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":32
  *         return self.darray.getSentPos(loc)
  * 
- *     def read_text(self, char* filename):             # <<<<<<<<<<<<<<
+ *     def read_text(self, filename, side):             # <<<<<<<<<<<<<<
  *         '''Constructs suffix array using the algorithm
  *         of Larsson & Sadahkane (1999)'''
  */
 
-static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename) {
+static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_filename, PyObject *__pyx_v_side) {
   int __pyx_v_V;
   int __pyx_v_N;
   int __pyx_v_i;
@@ -31092,16 +31741,14 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":38
  *         cdef IntList isa, word_count
  * 
- *         self.darray = DataArray(from_text=filename, use_sent_id=True)             # <<<<<<<<<<<<<<
+ *         self.darray = DataArray(from_text=filename, side=side, use_sent_id=True)             # <<<<<<<<<<<<<<
  *         N = len(self.darray)
  *         V = len(self.darray.id2word)
  */
   __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(((PyObject *)__pyx_t_1));
-  __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(((PyObject *)__pyx_t_2));
-  if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__from_text), ((PyObject *)__pyx_t_2)) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+  if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__from_text), __pyx_v_filename) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__side), __pyx_v_side) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_t_2 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__use_sent_id), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -31117,7 +31764,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":39
  * 
- *         self.darray = DataArray(from_text=filename, use_sent_id=True)
+ *         self.darray = DataArray(from_text=filename, side=side, use_sent_id=True)
  *         N = len(self.darray)             # <<<<<<<<<<<<<<
  *         V = len(self.darray.id2word)
  * 
@@ -31129,7 +31776,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
   __pyx_v_N = __pyx_t_3;
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":40
- *         self.darray = DataArray(from_text=filename, use_sent_id=True)
+ *         self.darray = DataArray(from_text=filename, side=side, use_sent_id=True)
  *         N = len(self.darray)
  *         V = len(self.darray.id2word)             # <<<<<<<<<<<<<<
  * 
@@ -31461,9 +32108,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_9 = PyTuple_New(2); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_9);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_87));
-  PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_kp_s_87));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_87));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_89));
+  PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_kp_s_89));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_89));
   PyTuple_SET_ITEM(__pyx_t_9, 1, __pyx_t_2);
   __Pyx_GIVEREF(__pyx_t_2);
   __pyx_t_2 = 0;
@@ -31518,9 +32165,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
     __Pyx_GOTREF(__pyx_t_2);
     __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_1);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_88));
-    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_88));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_88));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_90));
+    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_90));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_90));
     PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_2);
     __Pyx_GIVEREF(__pyx_t_2);
     __pyx_t_2 = 0;
@@ -31724,9 +32371,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
     __Pyx_GOTREF(__pyx_t_10);
     __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_89));
-    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_89));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_89));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_91));
+    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_91));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_91));
     PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_10);
     __Pyx_GIVEREF(__pyx_t_10);
     __pyx_t_10 = 0;
@@ -31749,7 +32396,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
   __pyx_t_2 = PyObject_GetAttr(__pyx_t_10, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
-  __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_91), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_93), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_10);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
@@ -31799,9 +32446,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S
   __Pyx_GOTREF(__pyx_t_10);
   __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_11);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_92));
-  PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_kp_s_92));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_92));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_94));
+  PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_kp_s_94));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_94));
   PyTuple_SET_ITEM(__pyx_t_11, 1, __pyx_t_10);
   __Pyx_GIVEREF(__pyx_t_10);
   __pyx_t_10 = 0;
@@ -31844,7 +32491,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_13q3sort(PyObject *__pyx_v_self, Py
   {
     static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__i,&__pyx_n_s__j,&__pyx_n_s__h,&__pyx_n_s__isa,&__pyx_n_s__pad,0};
     PyObject* values[5] = {0,0,0,0,0};
-    values[4] = ((PyObject *)__pyx_kp_s_42);
+    values[4] = ((PyObject *)__pyx_kp_s_45);
     if (unlikely(__pyx_kwds)) {
       Py_ssize_t kw_args;
       const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
@@ -31980,7 +32627,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff
     __Pyx_GIVEREF(__pyx_t_3);
     __pyx_t_2 = 0;
     __pyx_t_3 = 0;
-    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_93), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_95), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(((PyObject *)__pyx_t_3));
     __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
     __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -32339,7 +32986,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_6 = PyInt_FromLong(__pyx_v_h); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
-  __pyx_t_7 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_7 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_7);
   __pyx_t_8 = PyTuple_New(5); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_8);
@@ -32421,7 +33068,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff
   __Pyx_GOTREF(__pyx_t_3);
   __pyx_t_6 = PyInt_FromLong(__pyx_v_h); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_6);
-  __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_4 = PyTuple_New(5); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_4);
@@ -32811,7 +33458,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
  *             for a_i in self.sa:
  *                 f.write("%d " % a_i)
  */
-          __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s_24); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s_27); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
           __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_1);
@@ -32878,7 +33525,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
  */
             __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
-            __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_a_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_a_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_4));
             __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_10);
@@ -32902,7 +33549,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
  */
           __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_2);
-          __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_94), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_96), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
@@ -32961,7 +33608,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
  */
             __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_2);
-            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_w_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+            __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_w_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_10));
             __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
             __Pyx_GOTREF(__pyx_t_1);
@@ -32985,7 +33632,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
  */
           __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_4);
-          __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_95), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
+          __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_97), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;}
           __Pyx_GOTREF(__pyx_t_10);
           __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
@@ -33064,7 +33711,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3
     }
     /*finally:*/ {
       if (__pyx_t_3) {
-        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_96, NULL);
+        __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_98, NULL);
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
         __Pyx_GOTREF(__pyx_t_7);
@@ -34267,7 +34914,7 @@ static int __pyx_pw_3_sa_9TrieTable_1__cinit__(PyObject *__pyx_v_self, PyObject
   {
     static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__extended,0};
     PyObject* values[1] = {0};
-    values[0] = __pyx_k_97;
+    values[0] = __pyx_k_99;
     if (unlikely(__pyx_kwds)) {
       Py_ssize_t kw_args;
       const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
@@ -35014,9 +35661,9 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx
     __Pyx_GOTREF(__pyx_t_2);
     __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_98));
-    PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_98));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_98));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_100));
+    PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_100));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_100));
     PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2);
     __Pyx_GIVEREF(__pyx_t_2);
     __pyx_t_2 = 0;
@@ -35041,7 +35688,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx
     __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_4);
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
-    __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_100), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_102), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -35829,7 +36476,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
   {
-    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__alignment,&__pyx_n_s__by_slack_factor,&__pyx_n_s__category,&__pyx_n_s__max_chunks,&__pyx_n_s__max_initial_size,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s__max_target_chunks,&__pyx_n_s__max_target_length,&__pyx_n_s__min_gap_size,&__pyx_n_s__precompute_file,&__pyx_n_s_68,&__pyx_n_s__precompute_rank,&__pyx_n_s_101,&__pyx_n_s_102,&__pyx_n_s_69,&__pyx_n_s__train_min_gap_size,&__pyx_n_s__tight_phrases,&__pyx_n_s__use_baeza_yates,&__pyx_n_s__use_collocations,&__pyx_n_s__use_index,0};
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__alignment,&__pyx_n_s__by_slack_factor,&__pyx_n_s__category,&__pyx_n_s__max_chunks,&__pyx_n_s__max_initial_size,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s__max_target_chunks,&__pyx_n_s__max_target_length,&__pyx_n_s__min_gap_size,&__pyx_n_s__precompute_file,&__pyx_n_s_70,&__pyx_n_s__precompute_rank,&__pyx_n_s_103,&__pyx_n_s_104,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,&__pyx_n_s__tight_phrases,&__pyx_n_s__use_baeza_yates,&__pyx_n_s__use_collocations,&__pyx_n_s__use_index,0};
     PyObject* values[21] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
     /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":257
@@ -35952,7 +36599,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_
         }
         case 11:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_68);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_70);
           if (value) { values[11] = value; kw_args--; }
         }
         case 12:
@@ -35962,17 +36609,17 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_
         }
         case 13:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_101);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_103);
           if (value) { values[13] = value; kw_args--; }
         }
         case 14:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_102);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_104);
           if (value) { values[14] = value; kw_args--; }
         }
         case 15:
         if (kw_args > 0) {
-          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_69);
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_71);
           if (value) { values[15] = value; kw_args--; }
         }
         case 16:
@@ -36048,7 +36695,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_
     if (values[2]) {
       __pyx_v_category = PyBytes_AsString(values[2]); if (unlikely((!__pyx_v_category) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
     } else {
-      __pyx_v_category = ((char *)__pyx_k_103);
+      __pyx_v_category = ((char *)__pyx_k_105);
     }
     __pyx_v_max_chunks = values[3];
     if (values[4]) {
@@ -36275,7 +36922,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_
  *         self.alignment = alignment
  * 
  */
-    __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_105), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_107), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_Raise(__pyx_t_2, 0, 0, 0);
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
@@ -37756,9 +38403,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
     __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_106));
-    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_106));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_106));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_108));
+    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_108));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_108));
     __Pyx_INCREF(__pyx_v_self->precompute_file);
     PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_self->precompute_file);
     __Pyx_GIVEREF(__pyx_v_self->precompute_file);
@@ -37812,9 +38459,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GOTREF(__pyx_t_3);
       __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_5);
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_107));
-      PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_107));
-      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_107));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_109));
+      PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_109));
+      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_109));
       PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2);
       __Pyx_GIVEREF(__pyx_t_2);
       PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3);
@@ -37858,9 +38505,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GOTREF(__pyx_t_4);
       __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_2);
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_108));
-      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_108));
-      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_108));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_110));
+      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_110));
+      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_110));
       PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_3);
       __Pyx_GIVEREF(__pyx_t_3);
       PyTuple_SET_ITEM(__pyx_t_2, 2, __pyx_t_4);
@@ -37905,7 +38552,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GIVEREF(__pyx_t_2);
       __pyx_t_4 = 0;
       __pyx_t_2 = 0;
-      __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_109), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_111), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_2));
       __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
       __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -37952,7 +38599,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GIVEREF(__pyx_t_5);
       __pyx_t_2 = 0;
       __pyx_t_5 = 0;
-      __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_110), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_112), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_5));
       __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
       __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -37999,9 +38646,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GOTREF(__pyx_t_5);
       __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_2);
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_111));
-      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_111));
-      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_111));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_113));
+      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_113));
+      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_113));
       PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5);
       __Pyx_GIVEREF(__pyx_t_5);
       __pyx_t_5 = 0;
@@ -38153,9 +38800,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
       __Pyx_GOTREF(__pyx_t_5);
       __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_2);
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_112));
-      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_112));
-      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_112));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_114));
+      PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_114));
+      __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_114));
       PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5);
       __Pyx_GIVEREF(__pyx_t_5);
       __pyx_t_5 = 0;
@@ -38259,9 +38906,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py
     __Pyx_GOTREF(__pyx_t_5);
     __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
-    __Pyx_INCREF(((PyObject *)__pyx_kp_s_113));
-    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_113));
-    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_113));
+    __Pyx_INCREF(((PyObject *)__pyx_kp_s_115));
+    PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_115));
+    __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_115));
     PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5);
     __Pyx_GIVEREF(__pyx_t_5);
     __pyx_t_5 = 0;
@@ -40981,8 +41628,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st
  *         i = 0
  *         while i < loc.arr_high:
  */
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_114));
-  __pyx_v_result = ((PyObject *)__pyx_kp_s_114);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_116));
+  __pyx_v_result = ((PyObject *)__pyx_kp_s_116);
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":793
  *         cdef int i, j
@@ -41011,7 +41658,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st
  *             for j from i <= j < i + loc.num_subpatterns:
  *                 result = result + ("%d " %loc.arr[j])
  */
-    __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_115)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_117)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(__pyx_v_result);
     __pyx_v_result = __pyx_t_2;
@@ -41036,7 +41683,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st
  */
       __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_loc->arr), __pyx_v_j, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(__pyx_t_2);
-      __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
       __Pyx_GOTREF(((PyObject *)__pyx_t_4));
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
       __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -41054,7 +41701,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st
  *             i = i + loc.num_subpatterns
  *         result = result + "}"
  */
-    __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_56)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
     __Pyx_GOTREF(__pyx_t_2);
     __Pyx_DECREF(__pyx_v_result);
     __pyx_v_result = __pyx_t_2;
@@ -41077,7 +41724,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st
  *         return result
  * 
  */
-  __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_116)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_118)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_v_result);
   __pyx_v_result = __pyx_t_2;
@@ -41194,7 +41841,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact
  *         if result is not None:
  *             intersect_method = "precomputed"
  */
-  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_117); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_119); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
@@ -41272,9 +41919,9 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact
  *             else:
  *                 result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, MERGE)
  */
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_118));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_120));
       __Pyx_XDECREF(__pyx_v_intersect_method);
-      __pyx_v_intersect_method = ((PyObject *)__pyx_kp_s_118);
+      __pyx_v_intersect_method = ((PyObject *)__pyx_kp_s_120);
       goto __pyx_L5;
     }
     /*else*/ {
@@ -43798,14 +44445,14 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_23input(PyObject *__pyx
  */
 
 static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_22input(struct __pyx_obj_3_sa_HieroCachingRuleFactory *__pyx_v_self, PyObject *__pyx_v_fwords, PyObject *__pyx_v_models) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *__pyx_cur_scope;
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *__pyx_cur_scope;
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   int __pyx_lineno = 0;
   const char *__pyx_filename = NULL;
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("input", 0);
-  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)__pyx_ptype_3_sa___pyx_scope_struct_2_input->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_2_input, __pyx_empty_tuple, NULL);
+  __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)__pyx_ptype_3_sa___pyx_scope_struct_4_input->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_4_input, __pyx_empty_tuple, NULL);
   if (unlikely(!__pyx_cur_scope)) {
     __Pyx_RefNannyFinishContext();
     return NULL;
@@ -43841,7 +44488,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_22input(struct __pyx_ob
 
 static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */
 {
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)__pyx_generator->closure);
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)__pyx_generator->closure);
   PyObject *__pyx_r = NULL;
   Py_ssize_t __pyx_t_1;
   PyObject *__pyx_t_2 = NULL;
@@ -44879,7 +45526,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene
  *                 # checking whether lookup_required
  *                 if lookup_required:
  */
-            __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_120), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+            __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_122), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
             __Pyx_GOTREF(__pyx_t_3);
             __Pyx_Raise(__pyx_t_3, 0, 0, 0);
             __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
@@ -46643,7 +47290,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene
  *                             nodes_isteps_away_buffer[key] = frontier_nodes
  * 
  */
-            __pyx_t_10 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_121); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+            __pyx_t_10 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_123); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
             __Pyx_GOTREF(__pyx_t_10);
             __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
             __Pyx_GOTREF(__pyx_t_9);
@@ -46907,9 +47554,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene
   __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
   __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_12);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_122));
-  PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_kp_s_122));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_122));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_124));
+  PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_kp_s_124));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_124));
   PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_14);
   __Pyx_GIVEREF(__pyx_t_14);
   __pyx_t_14 = 0;
@@ -46952,9 +47599,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene
   __Pyx_GOTREF(__pyx_t_14);
   __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_15);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_123));
-  PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_kp_s_123));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_123));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_125));
+  PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_kp_s_125));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_125));
   PyTuple_SET_ITEM(__pyx_t_15, 1, __pyx_t_14);
   __Pyx_GIVEREF(__pyx_t_14);
   __pyx_t_14 = 0;
@@ -49754,8 +50401,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  * 
  *         for i from 0 <= i < e_sent_len:
  */
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_42));
-  __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_42);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_45));
+  __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_45);
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1446
  *         reason_for_failure = ""
@@ -50129,9 +50776,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                 met_constraints = 0
  *             if self.require_aligned_chunks and num_aligned_chunks < num_chunks:
  */
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_124));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_126));
       __Pyx_DECREF(__pyx_v_reason_for_failure);
-      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_124);
+      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126);
 
       /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1486
  *             if num_aligned_chunks == 0:
@@ -50167,9 +50814,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                 met_constraints = 0
  * 
  */
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_125));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_127));
       __Pyx_DECREF(__pyx_v_reason_for_failure);
-      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_125);
+      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_127);
 
       /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1489
  *             if self.require_aligned_chunks and num_aligned_chunks < num_chunks:
@@ -50227,9 +50874,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                     met_constraints = 0
  *                     break
  */
-        __Pyx_INCREF(((PyObject *)__pyx_kp_s_126));
+        __Pyx_INCREF(((PyObject *)__pyx_kp_s_128));
         __Pyx_DECREF(__pyx_v_reason_for_failure);
-        __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126);
+        __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128);
 
         /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1496
  *                 if f_links_low[matching.arr[matching.start+i]+chunklen[i]-f_sent_start] == -1:
@@ -50269,9 +50916,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                     met_constraints = 0
  *                     break
  */
-        __Pyx_INCREF(((PyObject *)__pyx_kp_s_126));
+        __Pyx_INCREF(((PyObject *)__pyx_kp_s_128));
         __Pyx_DECREF(__pyx_v_reason_for_failure);
-        __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126);
+        __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128);
 
         /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1500
  *                 if f_links_low[matching.arr[matching.start+i+1]-1-f_sent_start] == -1:
@@ -50483,9 +51130,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                 else:
  *                     gap_start = 1
  */
-            __Pyx_INCREF(((PyObject *)__pyx_kp_s_127));
+            __Pyx_INCREF(((PyObject *)__pyx_kp_s_129));
             __Pyx_DECREF(__pyx_v_reason_for_failure);
-            __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_127);
+            __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_129);
             goto __pyx_L38;
           }
           __pyx_L38:;
@@ -50681,9 +51328,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  *                 else:
  *                     if self.tight_phrases and f_links_low[f_high-1] == -1:
  */
-            __Pyx_INCREF(((PyObject *)__pyx_kp_s_128));
+            __Pyx_INCREF(((PyObject *)__pyx_kp_s_130));
             __Pyx_DECREF(__pyx_v_reason_for_failure);
-            __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128);
+            __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_130);
             goto __pyx_L45;
           }
           __pyx_L45:;
@@ -50801,7 +51448,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
             __Pyx_GIVEREF(__pyx_t_1);
             __pyx_t_10 = 0;
             __pyx_t_1 = 0;
-            __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_129), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1565; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+            __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_131), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1565; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_1));
             __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
             __Pyx_DECREF(__pyx_v_reason_for_failure);
@@ -51117,7 +51764,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
             __pyx_t_2 = 0;
             __pyx_t_1 = 0;
             __pyx_t_14 = 0;
-            __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_130), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1595; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+            __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_132), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1595; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
             __Pyx_GOTREF(((PyObject *)__pyx_t_14));
             __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0;
             __Pyx_DECREF(__pyx_v_reason_for_failure);
@@ -53335,9 +53982,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj
  * 
  *         free(sent_links)
  */
-      __Pyx_INCREF(((PyObject *)__pyx_kp_s_131));
+      __Pyx_INCREF(((PyObject *)__pyx_kp_s_133));
       __Pyx_DECREF(__pyx_v_reason_for_failure);
-      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_131);
+      __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_133);
     }
     __pyx_L34:;
     goto __pyx_L33;
@@ -54631,10 +55278,12 @@ static PyMethodDef __pyx_methods_3_sa_DataArray[] = {
   {__Pyx_NAMESTR("get_word"), (PyCFunction)__pyx_pw_3_sa_9DataArray_13get_word, METH_O, __Pyx_DOCSTR(0)},
   {__Pyx_NAMESTR("write_text"), (PyCFunction)__pyx_pw_3_sa_9DataArray_15write_text, METH_O, __Pyx_DOCSTR(0)},
   {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_9DataArray_17read_text, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_19read_binary, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("write_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_21write_binary, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("write_enhanced_handle"), (PyCFunction)__pyx_pw_3_sa_9DataArray_23write_enhanced_handle, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("write_enhanced"), (PyCFunction)__pyx_pw_3_sa_9DataArray_25write_enhanced, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("read_bitext"), (PyCFunction)__pyx_pw_3_sa_9DataArray_19read_bitext, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("read_text_data"), (PyCFunction)__pyx_pw_3_sa_9DataArray_21read_text_data, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_23read_binary, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("write_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_25write_binary, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("write_enhanced_handle"), (PyCFunction)__pyx_pw_3_sa_9DataArray_27write_enhanced_handle, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("write_enhanced"), (PyCFunction)__pyx_pw_3_sa_9DataArray_29write_enhanced, METH_O, __Pyx_DOCSTR(0)},
   {0, 0, 0, 0}
 };
 
@@ -56869,7 +57518,7 @@ static PyMethodDef __pyx_methods_3_sa_SuffixArray[] = {
   {__Pyx_NAMESTR("getSentId"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_5getSentId, METH_O, __Pyx_DOCSTR(0)},
   {__Pyx_NAMESTR("getSent"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_7getSent, METH_O, __Pyx_DOCSTR(0)},
   {__Pyx_NAMESTR("getSentPos"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_9getSentPos, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_11read_text, METH_O, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_10read_text)},
+  {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_11read_text, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_10read_text)},
   {__Pyx_NAMESTR("q3sort"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_13q3sort, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_12q3sort)},
   {__Pyx_NAMESTR("write_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_15write_text, METH_O, __Pyx_DOCSTR(0)},
   {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_17read_binary, METH_O, __Pyx_DOCSTR(0)},
@@ -58040,7 +58689,517 @@ static PyNumberMethods __pyx_tp_as_number_Sampler = {
   #endif
 };
 
-static PySequenceMethods __pyx_tp_as_sequence_Sampler = {
+static PySequenceMethods __pyx_tp_as_sequence_Sampler = {
+  0, /*sq_length*/
+  0, /*sq_concat*/
+  0, /*sq_repeat*/
+  0, /*sq_item*/
+  0, /*sq_slice*/
+  0, /*sq_ass_item*/
+  0, /*sq_ass_slice*/
+  0, /*sq_contains*/
+  0, /*sq_inplace_concat*/
+  0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_Sampler = {
+  0, /*mp_length*/
+  0, /*mp_subscript*/
+  0, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_Sampler = {
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getreadbuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getwritebuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getsegcount*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getcharbuffer*/
+  #endif
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*bf_getbuffer*/
+  #endif
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*bf_releasebuffer*/
+  #endif
+};
+
+static PyTypeObject __pyx_type_3_sa_Sampler = {
+  PyVarObject_HEAD_INIT(0, 0)
+  __Pyx_NAMESTR("_sa.Sampler"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa_Sampler), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc_3_sa_Sampler, /*tp_dealloc*/
+  0, /*tp_print*/
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #else
+  0, /*reserved*/
+  #endif
+  0, /*tp_repr*/
+  &__pyx_tp_as_number_Sampler, /*tp_as_number*/
+  &__pyx_tp_as_sequence_Sampler, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping_Sampler, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  0, /*tp_str*/
+  0, /*tp_getattro*/
+  0, /*tp_setattro*/
+  &__pyx_tp_as_buffer_Sampler, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  __Pyx_DOCSTR("A Sampler implements a logic for choosing\n    samples from a population range"), /*tp_doc*/
+  __pyx_tp_traverse_3_sa_Sampler, /*tp_traverse*/
+  __pyx_tp_clear_3_sa_Sampler, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods_3_sa_Sampler, /*tp_methods*/
+  0, /*tp_members*/
+  0, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  0, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new_3_sa_Sampler, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*tp_version_tag*/
+  #endif
+};
+static struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory __pyx_vtable_3_sa_HieroCachingRuleFactory;
+
+static PyObject *__pyx_tp_new_3_sa_HieroCachingRuleFactory(PyTypeObject *t, PyObject *a, PyObject *k) {
+  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p;
+  PyObject *o = (*t->tp_alloc)(t, 0);
+  if (!o) return 0;
+  p = ((struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o);
+  p->__pyx_vtab = __pyx_vtabptr_3_sa_HieroCachingRuleFactory;
+  p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None);
+  p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None);
+  p->precomputed_index = Py_None; Py_INCREF(Py_None);
+  p->precomputed_collocations = Py_None; Py_INCREF(Py_None);
+  p->precompute_file = Py_None; Py_INCREF(Py_None);
+  p->max_rank = Py_None; Py_INCREF(Py_None);
+  p->prev_norm_prefix = Py_None; Py_INCREF(Py_None);
+  p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None);
+  p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
+  p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
+  p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None);
+  p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  if (__pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(o, a, k) < 0) {
+    Py_DECREF(o); o = 0;
+  }
+  return o;
+}
+
+static void __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory(PyObject *o) {
+  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
+  Py_XDECREF(((PyObject *)p->rules));
+  Py_XDECREF(((PyObject *)p->sampler));
+  Py_XDECREF(p->precomputed_index);
+  Py_XDECREF(p->precomputed_collocations);
+  Py_XDECREF(p->precompute_file);
+  Py_XDECREF(p->max_rank);
+  Py_XDECREF(p->prev_norm_prefix);
+  Py_XDECREF(((PyObject *)p->fsa));
+  Py_XDECREF(((PyObject *)p->fda));
+  Py_XDECREF(((PyObject *)p->eda));
+  Py_XDECREF(((PyObject *)p->alignment));
+  Py_XDECREF(((PyObject *)p->eid2symid));
+  Py_XDECREF(((PyObject *)p->fid2symid));
+  Py_XDECREF(((PyObject *)p->findexes));
+  Py_XDECREF(((PyObject *)p->findexes1));
+  (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_3_sa_HieroCachingRuleFactory(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
+  if (p->rules) {
+    e = (*v)(((PyObject*)p->rules), a); if (e) return e;
+  }
+  if (p->sampler) {
+    e = (*v)(((PyObject*)p->sampler), a); if (e) return e;
+  }
+  if (p->precomputed_index) {
+    e = (*v)(p->precomputed_index, a); if (e) return e;
+  }
+  if (p->precomputed_collocations) {
+    e = (*v)(p->precomputed_collocations, a); if (e) return e;
+  }
+  if (p->precompute_file) {
+    e = (*v)(p->precompute_file, a); if (e) return e;
+  }
+  if (p->max_rank) {
+    e = (*v)(p->max_rank, a); if (e) return e;
+  }
+  if (p->prev_norm_prefix) {
+    e = (*v)(p->prev_norm_prefix, a); if (e) return e;
+  }
+  if (p->fsa) {
+    e = (*v)(((PyObject*)p->fsa), a); if (e) return e;
+  }
+  if (p->fda) {
+    e = (*v)(((PyObject*)p->fda), a); if (e) return e;
+  }
+  if (p->eda) {
+    e = (*v)(((PyObject*)p->eda), a); if (e) return e;
+  }
+  if (p->alignment) {
+    e = (*v)(((PyObject*)p->alignment), a); if (e) return e;
+  }
+  if (p->eid2symid) {
+    e = (*v)(((PyObject*)p->eid2symid), a); if (e) return e;
+  }
+  if (p->fid2symid) {
+    e = (*v)(((PyObject*)p->fid2symid), a); if (e) return e;
+  }
+  if (p->findexes) {
+    e = (*v)(((PyObject*)p->findexes), a); if (e) return e;
+  }
+  if (p->findexes1) {
+    e = (*v)(((PyObject*)p->findexes1), a); if (e) return e;
+  }
+  return 0;
+}
+
+static int __pyx_tp_clear_3_sa_HieroCachingRuleFactory(PyObject *o) {
+  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
+  PyObject* tmp;
+  tmp = ((PyObject*)p->rules);
+  p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->sampler);
+  p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->precomputed_index);
+  p->precomputed_index = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->precomputed_collocations);
+  p->precomputed_collocations = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->precompute_file);
+  p->precompute_file = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->max_rank);
+  p->max_rank = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->prev_norm_prefix);
+  p->prev_norm_prefix = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->fsa);
+  p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->fda);
+  p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->eda);
+  p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->alignment);
+  p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->eid2symid);
+  p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->fid2symid);
+  p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->findexes);
+  p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->findexes1);
+  p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  return 0;
+}
+
+static PyMethodDef __pyx_methods_3_sa_HieroCachingRuleFactory[] = {
+  {__Pyx_NAMESTR("configure"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_2configure)},
+  {__Pyx_NAMESTR("pattern2phrase"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_5pattern2phrase, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("pattern2phrase_plus"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_7pattern2phrase_plus, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("precompute"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_9precompute, METH_NOARGS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("get_precomputed_collocation"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_11get_precomputed_collocation, METH_O, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("advance"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("get_all_nodes_isteps_away"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_away, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("reachable"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("shortest"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("get_next_states"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
+  {__Pyx_NAMESTR("input"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_23input, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_22input)},
+  {0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = {
+  0, /*nb_add*/
+  0, /*nb_subtract*/
+  0, /*nb_multiply*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_divide*/
+  #endif
+  0, /*nb_remainder*/
+  0, /*nb_divmod*/
+  0, /*nb_power*/
+  0, /*nb_negative*/
+  0, /*nb_positive*/
+  0, /*nb_absolute*/
+  0, /*nb_nonzero*/
+  0, /*nb_invert*/
+  0, /*nb_lshift*/
+  0, /*nb_rshift*/
+  0, /*nb_and*/
+  0, /*nb_xor*/
+  0, /*nb_or*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_coerce*/
+  #endif
+  0, /*nb_int*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_long*/
+  #else
+  0, /*reserved*/
+  #endif
+  0, /*nb_float*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_oct*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_hex*/
+  #endif
+  0, /*nb_inplace_add*/
+  0, /*nb_inplace_subtract*/
+  0, /*nb_inplace_multiply*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_inplace_divide*/
+  #endif
+  0, /*nb_inplace_remainder*/
+  0, /*nb_inplace_power*/
+  0, /*nb_inplace_lshift*/
+  0, /*nb_inplace_rshift*/
+  0, /*nb_inplace_and*/
+  0, /*nb_inplace_xor*/
+  0, /*nb_inplace_or*/
+  0, /*nb_floor_divide*/
+  0, /*nb_true_divide*/
+  0, /*nb_inplace_floor_divide*/
+  0, /*nb_inplace_true_divide*/
+  #if PY_VERSION_HEX >= 0x02050000
+  0, /*nb_index*/
+  #endif
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = {
+  0, /*sq_length*/
+  0, /*sq_concat*/
+  0, /*sq_repeat*/
+  0, /*sq_item*/
+  0, /*sq_slice*/
+  0, /*sq_ass_item*/
+  0, /*sq_ass_slice*/
+  0, /*sq_contains*/
+  0, /*sq_inplace_concat*/
+  0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_HieroCachingRuleFactory = {
+  0, /*mp_length*/
+  0, /*mp_subscript*/
+  0, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = {
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getreadbuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getwritebuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getsegcount*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getcharbuffer*/
+  #endif
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*bf_getbuffer*/
+  #endif
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*bf_releasebuffer*/
+  #endif
+};
+
+static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = {
+  PyVarObject_HEAD_INIT(0, 0)
+  __Pyx_NAMESTR("_sa.HieroCachingRuleFactory"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa_HieroCachingRuleFactory), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory, /*tp_dealloc*/
+  0, /*tp_print*/
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #else
+  0, /*reserved*/
+  #endif
+  0, /*tp_repr*/
+  &__pyx_tp_as_number_HieroCachingRuleFactory, /*tp_as_number*/
+  &__pyx_tp_as_sequence_HieroCachingRuleFactory, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping_HieroCachingRuleFactory, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  0, /*tp_str*/
+  0, /*tp_getattro*/
+  0, /*tp_setattro*/
+  &__pyx_tp_as_buffer_HieroCachingRuleFactory, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  __Pyx_DOCSTR("This RuleFactory implements a caching \n    method using TrieTable, which makes phrase\n    generation somewhat speedier -- phrases only\n    need to be extracted once (however, it is\n    quite possible they need to be scored \n    for each input sentence, for contextual models)"), /*tp_doc*/
+  __pyx_tp_traverse_3_sa_HieroCachingRuleFactory, /*tp_traverse*/
+  __pyx_tp_clear_3_sa_HieroCachingRuleFactory, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods_3_sa_HieroCachingRuleFactory, /*tp_methods*/
+  0, /*tp_members*/
+  0, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  0, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new_3_sa_HieroCachingRuleFactory, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  #if PY_VERSION_HEX >= 0x02060000
+  0, /*tp_version_tag*/
+  #endif
+};
+
+static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__read_bitext(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p;
+  PyObject *o = (*t->tp_alloc)(t, 0);
+  if (!o) return 0;
+  p = ((struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o);
+  p->__pyx_v_fp = 0;
+  return o;
+}
+
+static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__read_bitext(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o;
+  Py_XDECREF(p->__pyx_v_fp);
+  (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_3_sa___pyx_scope_struct__read_bitext(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o;
+  if (p->__pyx_v_fp) {
+    e = (*v)(p->__pyx_v_fp, a); if (e) return e;
+  }
+  return 0;
+}
+
+static int __pyx_tp_clear_3_sa___pyx_scope_struct__read_bitext(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o;
+  PyObject* tmp;
+  tmp = ((PyObject*)p->__pyx_v_fp);
+  p->__pyx_v_fp = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  return 0;
+}
+
+static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct__read_bitext[] = {
+  {0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__read_bitext = {
+  0, /*nb_add*/
+  0, /*nb_subtract*/
+  0, /*nb_multiply*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_divide*/
+  #endif
+  0, /*nb_remainder*/
+  0, /*nb_divmod*/
+  0, /*nb_power*/
+  0, /*nb_negative*/
+  0, /*nb_positive*/
+  0, /*nb_absolute*/
+  0, /*nb_nonzero*/
+  0, /*nb_invert*/
+  0, /*nb_lshift*/
+  0, /*nb_rshift*/
+  0, /*nb_and*/
+  0, /*nb_xor*/
+  0, /*nb_or*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_coerce*/
+  #endif
+  0, /*nb_int*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_long*/
+  #else
+  0, /*reserved*/
+  #endif
+  0, /*nb_float*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_oct*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_hex*/
+  #endif
+  0, /*nb_inplace_add*/
+  0, /*nb_inplace_subtract*/
+  0, /*nb_inplace_multiply*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*nb_inplace_divide*/
+  #endif
+  0, /*nb_inplace_remainder*/
+  0, /*nb_inplace_power*/
+  0, /*nb_inplace_lshift*/
+  0, /*nb_inplace_rshift*/
+  0, /*nb_inplace_and*/
+  0, /*nb_inplace_xor*/
+  0, /*nb_inplace_or*/
+  0, /*nb_floor_divide*/
+  0, /*nb_true_divide*/
+  0, /*nb_inplace_floor_divide*/
+  0, /*nb_inplace_true_divide*/
+  #if PY_VERSION_HEX >= 0x02050000
+  0, /*nb_index*/
+  #endif
+};
+
+static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__read_bitext = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -58053,13 +59212,13 @@ static PySequenceMethods __pyx_tp_as_sequence_Sampler = {
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping_Sampler = {
+static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct__read_bitext = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer_Sampler = {
+static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__read_bitext = {
   #if PY_MAJOR_VERSION < 3
   0, /*bf_getreadbuffer*/
   #endif
@@ -58080,12 +59239,12 @@ static PyBufferProcs __pyx_tp_as_buffer_Sampler = {
   #endif
 };
 
-static PyTypeObject __pyx_type_3_sa_Sampler = {
+static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__read_bitext = {
   PyVarObject_HEAD_INIT(0, 0)
-  __Pyx_NAMESTR("_sa.Sampler"), /*tp_name*/
-  sizeof(struct __pyx_obj_3_sa_Sampler), /*tp_basicsize*/
+  __Pyx_NAMESTR("_sa.__pyx_scope_struct__read_bitext"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_3_sa_Sampler, /*tp_dealloc*/
+  __pyx_tp_dealloc_3_sa___pyx_scope_struct__read_bitext, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
@@ -58095,24 +59254,24 @@ static PyTypeObject __pyx_type_3_sa_Sampler = {
   0, /*reserved*/
   #endif
   0, /*tp_repr*/
-  &__pyx_tp_as_number_Sampler, /*tp_as_number*/
-  &__pyx_tp_as_sequence_Sampler, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping_Sampler, /*tp_as_mapping*/
+  &__pyx_tp_as_number___pyx_scope_struct__read_bitext, /*tp_as_number*/
+  &__pyx_tp_as_sequence___pyx_scope_struct__read_bitext, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping___pyx_scope_struct__read_bitext, /*tp_as_mapping*/
   0, /*tp_hash*/
   0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer_Sampler, /*tp_as_buffer*/
-  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
-  __Pyx_DOCSTR("A Sampler implements a logic for choosing\n    samples from a population range"), /*tp_doc*/
-  __pyx_tp_traverse_3_sa_Sampler, /*tp_traverse*/
-  __pyx_tp_clear_3_sa_Sampler, /*tp_clear*/
+  &__pyx_tp_as_buffer___pyx_scope_struct__read_bitext, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  0, /*tp_doc*/
+  __pyx_tp_traverse_3_sa___pyx_scope_struct__read_bitext, /*tp_traverse*/
+  __pyx_tp_clear_3_sa___pyx_scope_struct__read_bitext, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_3_sa_Sampler, /*tp_methods*/
+  __pyx_methods_3_sa___pyx_scope_struct__read_bitext, /*tp_methods*/
   0, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
@@ -58122,7 +59281,7 @@ static PyTypeObject __pyx_type_3_sa_Sampler = {
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_3_sa_Sampler, /*tp_new*/
+  __pyx_tp_new_3_sa___pyx_scope_struct__read_bitext, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -58135,173 +59294,61 @@ static PyTypeObject __pyx_type_3_sa_Sampler = {
   0, /*tp_version_tag*/
   #endif
 };
-static struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory __pyx_vtable_3_sa_HieroCachingRuleFactory;
 
-static PyObject *__pyx_tp_new_3_sa_HieroCachingRuleFactory(PyTypeObject *t, PyObject *a, PyObject *k) {
-  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p;
+static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_1_genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p;
   PyObject *o = (*t->tp_alloc)(t, 0);
   if (!o) return 0;
-  p = ((struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o);
-  p->__pyx_vtab = __pyx_vtabptr_3_sa_HieroCachingRuleFactory;
-  p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None);
-  p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None);
-  p->precomputed_index = Py_None; Py_INCREF(Py_None);
-  p->precomputed_collocations = Py_None; Py_INCREF(Py_None);
-  p->precompute_file = Py_None; Py_INCREF(Py_None);
-  p->max_rank = Py_None; Py_INCREF(Py_None);
-  p->prev_norm_prefix = Py_None; Py_INCREF(Py_None);
-  p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None);
-  p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
-  p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
-  p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None);
-  p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  if (__pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(o, a, k) < 0) {
-    Py_DECREF(o); o = 0;
-  }
+  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o);
+  p->__pyx_outer_scope = 0;
+  p->__pyx_v_line = 0;
+  p->__pyx_t_0 = 0;
   return o;
 }
 
-static void __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory(PyObject *o) {
-  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
-  Py_XDECREF(((PyObject *)p->rules));
-  Py_XDECREF(((PyObject *)p->sampler));
-  Py_XDECREF(p->precomputed_index);
-  Py_XDECREF(p->precomputed_collocations);
-  Py_XDECREF(p->precompute_file);
-  Py_XDECREF(p->max_rank);
-  Py_XDECREF(p->prev_norm_prefix);
-  Py_XDECREF(((PyObject *)p->fsa));
-  Py_XDECREF(((PyObject *)p->fda));
-  Py_XDECREF(((PyObject *)p->eda));
-  Py_XDECREF(((PyObject *)p->alignment));
-  Py_XDECREF(((PyObject *)p->eid2symid));
-  Py_XDECREF(((PyObject *)p->fid2symid));
-  Py_XDECREF(((PyObject *)p->findexes));
-  Py_XDECREF(((PyObject *)p->findexes1));
+static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_1_genexpr(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o;
+  Py_XDECREF(((PyObject *)p->__pyx_outer_scope));
+  Py_XDECREF(p->__pyx_v_line);
+  Py_XDECREF(p->__pyx_t_0);
   (*Py_TYPE(o)->tp_free)(o);
 }
 
-static int __pyx_tp_traverse_3_sa_HieroCachingRuleFactory(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_3_sa___pyx_scope_struct_1_genexpr(PyObject *o, visitproc v, void *a) {
   int e;
-  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
-  if (p->rules) {
-    e = (*v)(((PyObject*)p->rules), a); if (e) return e;
-  }
-  if (p->sampler) {
-    e = (*v)(((PyObject*)p->sampler), a); if (e) return e;
-  }
-  if (p->precomputed_index) {
-    e = (*v)(p->precomputed_index, a); if (e) return e;
-  }
-  if (p->precomputed_collocations) {
-    e = (*v)(p->precomputed_collocations, a); if (e) return e;
-  }
-  if (p->precompute_file) {
-    e = (*v)(p->precompute_file, a); if (e) return e;
-  }
-  if (p->max_rank) {
-    e = (*v)(p->max_rank, a); if (e) return e;
-  }
-  if (p->prev_norm_prefix) {
-    e = (*v)(p->prev_norm_prefix, a); if (e) return e;
-  }
-  if (p->fsa) {
-    e = (*v)(((PyObject*)p->fsa), a); if (e) return e;
-  }
-  if (p->fda) {
-    e = (*v)(((PyObject*)p->fda), a); if (e) return e;
-  }
-  if (p->eda) {
-    e = (*v)(((PyObject*)p->eda), a); if (e) return e;
-  }
-  if (p->alignment) {
-    e = (*v)(((PyObject*)p->alignment), a); if (e) return e;
-  }
-  if (p->eid2symid) {
-    e = (*v)(((PyObject*)p->eid2symid), a); if (e) return e;
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o;
+  if (p->__pyx_outer_scope) {
+    e = (*v)(((PyObject*)p->__pyx_outer_scope), a); if (e) return e;
   }
-  if (p->fid2symid) {
-    e = (*v)(((PyObject*)p->fid2symid), a); if (e) return e;
+  if (p->__pyx_v_line) {
+    e = (*v)(p->__pyx_v_line, a); if (e) return e;
   }
-  if (p->findexes) {
-    e = (*v)(((PyObject*)p->findexes), a); if (e) return e;
-  }
-  if (p->findexes1) {
-    e = (*v)(((PyObject*)p->findexes1), a); if (e) return e;
+  if (p->__pyx_t_0) {
+    e = (*v)(p->__pyx_t_0, a); if (e) return e;
   }
   return 0;
 }
 
-static int __pyx_tp_clear_3_sa_HieroCachingRuleFactory(PyObject *o) {
-  struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o;
+static int __pyx_tp_clear_3_sa___pyx_scope_struct_1_genexpr(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o;
   PyObject* tmp;
-  tmp = ((PyObject*)p->rules);
-  p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None);
+  tmp = ((PyObject*)p->__pyx_outer_scope);
+  p->__pyx_outer_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)Py_None); Py_INCREF(Py_None);
   Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->sampler);
-  p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None);
+  tmp = ((PyObject*)p->__pyx_v_line);
+  p->__pyx_v_line = Py_None; Py_INCREF(Py_None);
   Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->precomputed_index);
-  p->precomputed_index = Py_None; Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->precomputed_collocations);
-  p->precomputed_collocations = Py_None; Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->precompute_file);
-  p->precompute_file = Py_None; Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->max_rank);
-  p->max_rank = Py_None; Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->prev_norm_prefix);
-  p->prev_norm_prefix = Py_None; Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->fsa);
-  p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->fda);
-  p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->eda);
-  p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->alignment);
-  p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->eid2symid);
-  p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->fid2symid);
-  p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->findexes);
-  p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
-  Py_XDECREF(tmp);
-  tmp = ((PyObject*)p->findexes1);
-  p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None);
+  tmp = ((PyObject*)p->__pyx_t_0);
+  p->__pyx_t_0 = Py_None; Py_INCREF(Py_None);
   Py_XDECREF(tmp);
   return 0;
 }
 
-static PyMethodDef __pyx_methods_3_sa_HieroCachingRuleFactory[] = {
-  {__Pyx_NAMESTR("configure"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_2configure)},
-  {__Pyx_NAMESTR("pattern2phrase"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_5pattern2phrase, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("pattern2phrase_plus"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_7pattern2phrase_plus, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("precompute"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_9precompute, METH_NOARGS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("get_precomputed_collocation"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_11get_precomputed_collocation, METH_O, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("advance"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("get_all_nodes_isteps_away"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_away, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("reachable"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("shortest"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("get_next_states"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
-  {__Pyx_NAMESTR("input"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_23input, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_22input)},
+static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_1_genexpr[] = {
   {0, 0, 0, 0}
 };
 
-static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = {
+static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1_genexpr = {
   0, /*nb_add*/
   0, /*nb_subtract*/
   0, /*nb_multiply*/
@@ -58359,7 +59406,7 @@ static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = {
   #endif
 };
 
-static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = {
+static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1_genexpr = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -58372,13 +59419,13 @@ static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = {
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping_HieroCachingRuleFactory = {
+static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_1_genexpr = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = {
+static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1_genexpr = {
   #if PY_MAJOR_VERSION < 3
   0, /*bf_getreadbuffer*/
   #endif
@@ -58399,12 +59446,12 @@ static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = {
   #endif
 };
 
-static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = {
+static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1_genexpr = {
   PyVarObject_HEAD_INIT(0, 0)
-  __Pyx_NAMESTR("_sa.HieroCachingRuleFactory"), /*tp_name*/
-  sizeof(struct __pyx_obj_3_sa_HieroCachingRuleFactory), /*tp_basicsize*/
+  __Pyx_NAMESTR("_sa.__pyx_scope_struct_1_genexpr"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory, /*tp_dealloc*/
+  __pyx_tp_dealloc_3_sa___pyx_scope_struct_1_genexpr, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
@@ -58414,24 +59461,24 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = {
   0, /*reserved*/
   #endif
   0, /*tp_repr*/
-  &__pyx_tp_as_number_HieroCachingRuleFactory, /*tp_as_number*/
-  &__pyx_tp_as_sequence_HieroCachingRuleFactory, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping_HieroCachingRuleFactory, /*tp_as_mapping*/
+  &__pyx_tp_as_number___pyx_scope_struct_1_genexpr, /*tp_as_number*/
+  &__pyx_tp_as_sequence___pyx_scope_struct_1_genexpr, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping___pyx_scope_struct_1_genexpr, /*tp_as_mapping*/
   0, /*tp_hash*/
   0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer_HieroCachingRuleFactory, /*tp_as_buffer*/
-  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
-  __Pyx_DOCSTR("This RuleFactory implements a caching \n    method using TrieTable, which makes phrase\n    generation somewhat speedier -- phrases only\n    need to be extracted once (however, it is\n    quite possible they need to be scored \n    for each input sentence, for contextual models)"), /*tp_doc*/
-  __pyx_tp_traverse_3_sa_HieroCachingRuleFactory, /*tp_traverse*/
-  __pyx_tp_clear_3_sa_HieroCachingRuleFactory, /*tp_clear*/
+  &__pyx_tp_as_buffer___pyx_scope_struct_1_genexpr, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  0, /*tp_doc*/
+  __pyx_tp_traverse_3_sa___pyx_scope_struct_1_genexpr, /*tp_traverse*/
+  __pyx_tp_clear_3_sa___pyx_scope_struct_1_genexpr, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_3_sa_HieroCachingRuleFactory, /*tp_methods*/
+  __pyx_methods_3_sa___pyx_scope_struct_1_genexpr, /*tp_methods*/
   0, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
@@ -58441,7 +59488,7 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = {
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_3_sa_HieroCachingRuleFactory, /*tp_new*/
+  __pyx_tp_new_3_sa___pyx_scope_struct_1_genexpr, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -58455,11 +59502,11 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = {
   #endif
 };
 
-static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__compute_stats(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p;
+static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_compute_stats(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p;
   PyObject *o = (*t->tp_alloc)(t, 0);
   if (!o) return 0;
-  p = ((struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o);
+  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o);
   p->__pyx_v_ngram = 0;
   p->__pyx_v_ngram_start = 0;
   p->__pyx_v_ngram_starts = 0;
@@ -58469,8 +59516,8 @@ static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__compute_stats(PyTypeObjec
   return o;
 }
 
-static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o;
+static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o;
   Py_XDECREF(((PyObject *)p->__pyx_v_ngram));
   Py_XDECREF(((PyObject *)p->__pyx_v_ngram_start));
   Py_XDECREF(((PyObject *)p->__pyx_v_ngram_starts));
@@ -58480,9 +59527,9 @@ static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats(PyObject *o)
   (*Py_TYPE(o)->tp_free)(o);
 }
 
-static int __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o, visitproc v, void *a) {
   int e;
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o;
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o;
   if (p->__pyx_v_ngram) {
     e = (*v)(p->__pyx_v_ngram, a); if (e) return e;
   }
@@ -58504,8 +59551,8 @@ static int __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats(PyObject *o,
   return 0;
 }
 
-static int __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o;
+static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o;
   PyObject* tmp;
   tmp = ((PyObject*)p->__pyx_v_ngram);
   p->__pyx_v_ngram = ((PyObject*)Py_None); Py_INCREF(Py_None);
@@ -58528,11 +59575,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats(PyObject *o) {
   return 0;
 }
 
-static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct__compute_stats[] = {
+static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_2_compute_stats[] = {
   {0, 0, 0, 0}
 };
 
-static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__compute_stats = {
+static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_compute_stats = {
   0, /*nb_add*/
   0, /*nb_subtract*/
   0, /*nb_multiply*/
@@ -58590,7 +59637,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__compute_stats = {
   #endif
 };
 
-static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__compute_stats = {
+static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_compute_stats = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -58603,13 +59650,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__compute_stats
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct__compute_stats = {
+static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_2_compute_stats = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__compute_stats = {
+static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_compute_stats = {
   #if PY_MAJOR_VERSION < 3
   0, /*bf_getreadbuffer*/
   #endif
@@ -58630,12 +59677,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__compute_stats = {
   #endif
 };
 
-static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = {
+static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_compute_stats = {
   PyVarObject_HEAD_INIT(0, 0)
-  __Pyx_NAMESTR("_sa.__pyx_scope_struct__compute_stats"), /*tp_name*/
-  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats), /*tp_basicsize*/
+  __Pyx_NAMESTR("_sa.__pyx_scope_struct_2_compute_stats"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats, /*tp_dealloc*/
+  __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_compute_stats, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
@@ -58645,24 +59692,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = {
   0, /*reserved*/
   #endif
   0, /*tp_repr*/
-  &__pyx_tp_as_number___pyx_scope_struct__compute_stats, /*tp_as_number*/
-  &__pyx_tp_as_sequence___pyx_scope_struct__compute_stats, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping___pyx_scope_struct__compute_stats, /*tp_as_mapping*/
+  &__pyx_tp_as_number___pyx_scope_struct_2_compute_stats, /*tp_as_number*/
+  &__pyx_tp_as_sequence___pyx_scope_struct_2_compute_stats, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping___pyx_scope_struct_2_compute_stats, /*tp_as_mapping*/
   0, /*tp_hash*/
   0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer___pyx_scope_struct__compute_stats, /*tp_as_buffer*/
+  &__pyx_tp_as_buffer___pyx_scope_struct_2_compute_stats, /*tp_as_buffer*/
   Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
   0, /*tp_doc*/
-  __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats, /*tp_traverse*/
-  __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats, /*tp_clear*/
+  __pyx_tp_traverse_3_sa___pyx_scope_struct_2_compute_stats, /*tp_traverse*/
+  __pyx_tp_clear_3_sa___pyx_scope_struct_2_compute_stats, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_3_sa___pyx_scope_struct__compute_stats, /*tp_methods*/
+  __pyx_methods_3_sa___pyx_scope_struct_2_compute_stats, /*tp_methods*/
   0, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
@@ -58672,7 +59719,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = {
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_3_sa___pyx_scope_struct__compute_stats, /*tp_new*/
+  __pyx_tp_new_3_sa___pyx_scope_struct_2_compute_stats, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -58686,32 +59733,32 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = {
   #endif
 };
 
-static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_1___iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p;
+static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_3___iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p;
   PyObject *o = (*t->tp_alloc)(t, 0);
   if (!o) return 0;
-  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o);
+  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o);
   p->__pyx_v_self = 0;
   return o;
 }
 
-static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_1___iter__(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o;
+static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_3___iter__(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o;
   Py_XDECREF(((PyObject *)p->__pyx_v_self));
   (*Py_TYPE(o)->tp_free)(o);
 }
 
-static int __pyx_tp_traverse_3_sa___pyx_scope_struct_1___iter__(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_3_sa___pyx_scope_struct_3___iter__(PyObject *o, visitproc v, void *a) {
   int e;
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o;
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o;
   if (p->__pyx_v_self) {
     e = (*v)(((PyObject*)p->__pyx_v_self), a); if (e) return e;
   }
   return 0;
 }
 
-static int __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o;
+static int __pyx_tp_clear_3_sa___pyx_scope_struct_3___iter__(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o;
   PyObject* tmp;
   tmp = ((PyObject*)p->__pyx_v_self);
   p->__pyx_v_self = ((struct __pyx_obj_3_sa_Phrase *)Py_None); Py_INCREF(Py_None);
@@ -58719,11 +59766,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__(PyObject *o) {
   return 0;
 }
 
-static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_1___iter__[] = {
+static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_3___iter__[] = {
   {0, 0, 0, 0}
 };
 
-static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1___iter__ = {
+static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_3___iter__ = {
   0, /*nb_add*/
   0, /*nb_subtract*/
   0, /*nb_multiply*/
@@ -58781,7 +59828,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1___iter__ = {
   #endif
 };
 
-static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1___iter__ = {
+static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_3___iter__ = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -58794,13 +59841,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1___iter__ = {
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_1___iter__ = {
+static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_3___iter__ = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1___iter__ = {
+static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_3___iter__ = {
   #if PY_MAJOR_VERSION < 3
   0, /*bf_getreadbuffer*/
   #endif
@@ -58821,12 +59868,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1___iter__ = {
   #endif
 };
 
-static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = {
+static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_3___iter__ = {
   PyVarObject_HEAD_INIT(0, 0)
-  __Pyx_NAMESTR("_sa.__pyx_scope_struct_1___iter__"), /*tp_name*/
-  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__), /*tp_basicsize*/
+  __Pyx_NAMESTR("_sa.__pyx_scope_struct_3___iter__"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_3_sa___pyx_scope_struct_1___iter__, /*tp_dealloc*/
+  __pyx_tp_dealloc_3_sa___pyx_scope_struct_3___iter__, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
@@ -58836,24 +59883,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = {
   0, /*reserved*/
   #endif
   0, /*tp_repr*/
-  &__pyx_tp_as_number___pyx_scope_struct_1___iter__, /*tp_as_number*/
-  &__pyx_tp_as_sequence___pyx_scope_struct_1___iter__, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping___pyx_scope_struct_1___iter__, /*tp_as_mapping*/
+  &__pyx_tp_as_number___pyx_scope_struct_3___iter__, /*tp_as_number*/
+  &__pyx_tp_as_sequence___pyx_scope_struct_3___iter__, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping___pyx_scope_struct_3___iter__, /*tp_as_mapping*/
   0, /*tp_hash*/
   0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer___pyx_scope_struct_1___iter__, /*tp_as_buffer*/
+  &__pyx_tp_as_buffer___pyx_scope_struct_3___iter__, /*tp_as_buffer*/
   Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
   0, /*tp_doc*/
-  __pyx_tp_traverse_3_sa___pyx_scope_struct_1___iter__, /*tp_traverse*/
-  __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__, /*tp_clear*/
+  __pyx_tp_traverse_3_sa___pyx_scope_struct_3___iter__, /*tp_traverse*/
+  __pyx_tp_clear_3_sa___pyx_scope_struct_3___iter__, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_3_sa___pyx_scope_struct_1___iter__, /*tp_methods*/
+  __pyx_methods_3_sa___pyx_scope_struct_3___iter__, /*tp_methods*/
   0, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
@@ -58863,7 +59910,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = {
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_3_sa___pyx_scope_struct_1___iter__, /*tp_new*/
+  __pyx_tp_new_3_sa___pyx_scope_struct_3___iter__, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -58877,11 +59924,11 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = {
   #endif
 };
 
-static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_input(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p;
+static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_4_input(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p;
   PyObject *o = (*t->tp_alloc)(t, 0);
   if (!o) return 0;
-  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o);
+  p = ((struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o);
   p->__pyx_v_alignment = 0;
   p->__pyx_v_als = 0;
   p->__pyx_v_alslist = 0;
@@ -58935,8 +59982,8 @@ static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_input(PyTypeObject *t, C
   return o;
 }
 
-static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o;
+static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_4_input(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o;
   Py_XDECREF(p->__pyx_v_alignment);
   Py_XDECREF(p->__pyx_v_als);
   Py_XDECREF(p->__pyx_v_alslist);
@@ -58990,9 +60037,9 @@ static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input(PyObject *o) {
   (*Py_TYPE(o)->tp_free)(o);
 }
 
-static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_3_sa___pyx_scope_struct_4_input(PyObject *o, visitproc v, void *a) {
   int e;
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o;
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o;
   if (p->__pyx_v_alignment) {
     e = (*v)(p->__pyx_v_alignment, a); if (e) return e;
   }
@@ -59146,8 +60193,8 @@ static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input(PyObject *o, visitp
   return 0;
 }
 
-static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_input(PyObject *o) {
-  struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o;
+static int __pyx_tp_clear_3_sa___pyx_scope_struct_4_input(PyObject *o) {
+  struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o;
   PyObject* tmp;
   tmp = ((PyObject*)p->__pyx_v_alignment);
   p->__pyx_v_alignment = Py_None; Py_INCREF(Py_None);
@@ -59302,11 +60349,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_input(PyObject *o) {
   return 0;
 }
 
-static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_2_input[] = {
+static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_4_input[] = {
   {0, 0, 0, 0}
 };
 
-static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_input = {
+static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_4_input = {
   0, /*nb_add*/
   0, /*nb_subtract*/
   0, /*nb_multiply*/
@@ -59364,7 +60411,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_input = {
   #endif
 };
 
-static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_input = {
+static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_4_input = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -59377,13 +60424,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_input = {
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_2_input = {
+static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_4_input = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_input = {
+static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_4_input = {
   #if PY_MAJOR_VERSION < 3
   0, /*bf_getreadbuffer*/
   #endif
@@ -59404,12 +60451,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_input = {
   #endif
 };
 
-static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = {
+static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_4_input = {
   PyVarObject_HEAD_INIT(0, 0)
-  __Pyx_NAMESTR("_sa.__pyx_scope_struct_2_input"), /*tp_name*/
-  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_2_input), /*tp_basicsize*/
+  __Pyx_NAMESTR("_sa.__pyx_scope_struct_4_input"), /*tp_name*/
+  sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_4_input), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input, /*tp_dealloc*/
+  __pyx_tp_dealloc_3_sa___pyx_scope_struct_4_input, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
@@ -59419,24 +60466,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = {
   0, /*reserved*/
   #endif
   0, /*tp_repr*/
-  &__pyx_tp_as_number___pyx_scope_struct_2_input, /*tp_as_number*/
-  &__pyx_tp_as_sequence___pyx_scope_struct_2_input, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping___pyx_scope_struct_2_input, /*tp_as_mapping*/
+  &__pyx_tp_as_number___pyx_scope_struct_4_input, /*tp_as_number*/
+  &__pyx_tp_as_sequence___pyx_scope_struct_4_input, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping___pyx_scope_struct_4_input, /*tp_as_mapping*/
   0, /*tp_hash*/
   0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer___pyx_scope_struct_2_input, /*tp_as_buffer*/
+  &__pyx_tp_as_buffer___pyx_scope_struct_4_input, /*tp_as_buffer*/
   Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
   0, /*tp_doc*/
-  __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input, /*tp_traverse*/
-  __pyx_tp_clear_3_sa___pyx_scope_struct_2_input, /*tp_clear*/
+  __pyx_tp_traverse_3_sa___pyx_scope_struct_4_input, /*tp_traverse*/
+  __pyx_tp_clear_3_sa___pyx_scope_struct_4_input, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_3_sa___pyx_scope_struct_2_input, /*tp_methods*/
+  __pyx_methods_3_sa___pyx_scope_struct_4_input, /*tp_methods*/
   0, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
@@ -59446,7 +60493,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = {
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_3_sa___pyx_scope_struct_2_input, /*tp_new*/
+  __pyx_tp_new_3_sa___pyx_scope_struct_4_input, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -59480,11 +60527,11 @@ static struct PyModuleDef __pyx_moduledef = {
 
 static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 1, 0},
-  {&__pyx_n_s_101, __pyx_k_101, sizeof(__pyx_k_101), 0, 0, 1, 1},
-  {&__pyx_n_s_102, __pyx_k_102, sizeof(__pyx_k_102), 0, 0, 1, 1},
-  {&__pyx_kp_s_104, __pyx_k_104, sizeof(__pyx_k_104), 0, 0, 1, 0},
+  {&__pyx_kp_s_100, __pyx_k_100, sizeof(__pyx_k_100), 0, 0, 1, 0},
+  {&__pyx_kp_s_101, __pyx_k_101, sizeof(__pyx_k_101), 0, 0, 1, 0},
+  {&__pyx_n_s_103, __pyx_k_103, sizeof(__pyx_k_103), 0, 0, 1, 1},
+  {&__pyx_n_s_104, __pyx_k_104, sizeof(__pyx_k_104), 0, 0, 1, 1},
   {&__pyx_kp_s_106, __pyx_k_106, sizeof(__pyx_k_106), 0, 0, 1, 0},
-  {&__pyx_kp_s_107, __pyx_k_107, sizeof(__pyx_k_107), 0, 0, 1, 0},
   {&__pyx_kp_s_108, __pyx_k_108, sizeof(__pyx_k_108), 0, 0, 1, 0},
   {&__pyx_kp_s_109, __pyx_k_109, sizeof(__pyx_k_109), 0, 0, 1, 0},
   {&__pyx_kp_s_110, __pyx_k_110, sizeof(__pyx_k_110), 0, 0, 1, 0},
@@ -59494,12 +60541,12 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_114, __pyx_k_114, sizeof(__pyx_k_114), 0, 0, 1, 0},
   {&__pyx_kp_s_115, __pyx_k_115, sizeof(__pyx_k_115), 0, 0, 1, 0},
   {&__pyx_kp_s_116, __pyx_k_116, sizeof(__pyx_k_116), 0, 0, 1, 0},
-  {&__pyx_n_s_117, __pyx_k_117, sizeof(__pyx_k_117), 0, 0, 1, 1},
+  {&__pyx_kp_s_117, __pyx_k_117, sizeof(__pyx_k_117), 0, 0, 1, 0},
   {&__pyx_kp_s_118, __pyx_k_118, sizeof(__pyx_k_118), 0, 0, 1, 0},
-  {&__pyx_kp_s_119, __pyx_k_119, sizeof(__pyx_k_119), 0, 0, 1, 0},
-  {&__pyx_n_s_121, __pyx_k_121, sizeof(__pyx_k_121), 0, 0, 1, 1},
-  {&__pyx_kp_s_122, __pyx_k_122, sizeof(__pyx_k_122), 0, 0, 1, 0},
-  {&__pyx_kp_s_123, __pyx_k_123, sizeof(__pyx_k_123), 0, 0, 1, 0},
+  {&__pyx_n_s_119, __pyx_k_119, sizeof(__pyx_k_119), 0, 0, 1, 1},
+  {&__pyx_kp_s_120, __pyx_k_120, sizeof(__pyx_k_120), 0, 0, 1, 0},
+  {&__pyx_kp_s_121, __pyx_k_121, sizeof(__pyx_k_121), 0, 0, 1, 0},
+  {&__pyx_n_s_123, __pyx_k_123, sizeof(__pyx_k_123), 0, 0, 1, 1},
   {&__pyx_kp_s_124, __pyx_k_124, sizeof(__pyx_k_124), 0, 0, 1, 0},
   {&__pyx_kp_s_125, __pyx_k_125, sizeof(__pyx_k_125), 0, 0, 1, 0},
   {&__pyx_kp_s_126, __pyx_k_126, sizeof(__pyx_k_126), 0, 0, 1, 0},
@@ -59509,49 +60556,49 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_13, __pyx_k_13, sizeof(__pyx_k_13), 0, 0, 1, 0},
   {&__pyx_kp_s_130, __pyx_k_130, sizeof(__pyx_k_130), 0, 0, 1, 0},
   {&__pyx_kp_s_131, __pyx_k_131, sizeof(__pyx_k_131), 0, 0, 1, 0},
-  {&__pyx_kp_s_134, __pyx_k_134, sizeof(__pyx_k_134), 0, 0, 1, 0},
-  {&__pyx_kp_s_135, __pyx_k_135, sizeof(__pyx_k_135), 0, 0, 1, 0},
-  {&__pyx_kp_s_139, __pyx_k_139, sizeof(__pyx_k_139), 0, 0, 1, 0},
+  {&__pyx_kp_s_132, __pyx_k_132, sizeof(__pyx_k_132), 0, 0, 1, 0},
+  {&__pyx_kp_s_133, __pyx_k_133, sizeof(__pyx_k_133), 0, 0, 1, 0},
+  {&__pyx_kp_s_136, __pyx_k_136, sizeof(__pyx_k_136), 0, 0, 1, 0},
+  {&__pyx_kp_s_137, __pyx_k_137, sizeof(__pyx_k_137), 0, 0, 1, 0},
   {&__pyx_kp_s_14, __pyx_k_14, sizeof(__pyx_k_14), 0, 0, 1, 0},
-  {&__pyx_kp_s_140, __pyx_k_140, sizeof(__pyx_k_140), 0, 0, 1, 0},
+  {&__pyx_kp_s_141, __pyx_k_141, sizeof(__pyx_k_141), 0, 0, 1, 0},
+  {&__pyx_kp_s_142, __pyx_k_142, sizeof(__pyx_k_142), 0, 0, 1, 0},
   {&__pyx_kp_s_18, __pyx_k_18, sizeof(__pyx_k_18), 0, 0, 1, 0},
   {&__pyx_kp_s_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 0, 1, 0},
-  {&__pyx_kp_s_22, __pyx_k_22, sizeof(__pyx_k_22), 0, 0, 1, 0},
-  {&__pyx_n_s_24, __pyx_k_24, sizeof(__pyx_k_24), 0, 0, 1, 1},
-  {&__pyx_kp_s_28, __pyx_k_28, sizeof(__pyx_k_28), 0, 0, 1, 0},
+  {&__pyx_kp_s_21, __pyx_k_21, sizeof(__pyx_k_21), 0, 0, 1, 0},
+  {&__pyx_kp_s_25, __pyx_k_25, sizeof(__pyx_k_25), 0, 0, 1, 0},
+  {&__pyx_n_s_27, __pyx_k_27, sizeof(__pyx_k_27), 0, 0, 1, 1},
   {&__pyx_kp_s_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 0, 1, 0},
-  {&__pyx_kp_s_32, __pyx_k_32, sizeof(__pyx_k_32), 0, 0, 1, 0},
-  {&__pyx_kp_s_39, __pyx_k_39, sizeof(__pyx_k_39), 0, 0, 1, 0},
+  {&__pyx_kp_s_31, __pyx_k_31, sizeof(__pyx_k_31), 0, 0, 1, 0},
+  {&__pyx_kp_s_35, __pyx_k_35, sizeof(__pyx_k_35), 0, 0, 1, 0},
   {&__pyx_kp_s_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 0, 1, 0},
   {&__pyx_kp_s_42, __pyx_k_42, sizeof(__pyx_k_42), 0, 0, 1, 0},
-  {&__pyx_kp_s_43, __pyx_k_43, sizeof(__pyx_k_43), 0, 0, 1, 0},
   {&__pyx_kp_s_45, __pyx_k_45, sizeof(__pyx_k_45), 0, 0, 1, 0},
-  {&__pyx_kp_s_47, __pyx_k_47, sizeof(__pyx_k_47), 0, 0, 1, 0},
-  {&__pyx_kp_s_49, __pyx_k_49, sizeof(__pyx_k_49), 0, 0, 1, 0},
+  {&__pyx_kp_s_46, __pyx_k_46, sizeof(__pyx_k_46), 0, 0, 1, 0},
+  {&__pyx_kp_s_48, __pyx_k_48, sizeof(__pyx_k_48), 0, 0, 1, 0},
   {&__pyx_kp_s_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 0, 1, 0},
-  {&__pyx_kp_s_53, __pyx_k_53, sizeof(__pyx_k_53), 0, 0, 1, 0},
-  {&__pyx_kp_s_55, __pyx_k_55, sizeof(__pyx_k_55), 0, 0, 1, 0},
+  {&__pyx_kp_s_50, __pyx_k_50, sizeof(__pyx_k_50), 0, 0, 1, 0},
+  {&__pyx_kp_s_52, __pyx_k_52, sizeof(__pyx_k_52), 0, 0, 1, 0},
   {&__pyx_kp_s_56, __pyx_k_56, sizeof(__pyx_k_56), 0, 0, 1, 0},
-  {&__pyx_kp_s_57, __pyx_k_57, sizeof(__pyx_k_57), 0, 0, 1, 0},
+  {&__pyx_kp_s_58, __pyx_k_58, sizeof(__pyx_k_58), 0, 0, 1, 0},
   {&__pyx_kp_s_59, __pyx_k_59, sizeof(__pyx_k_59), 0, 0, 1, 0},
   {&__pyx_kp_s_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 0, 1, 0},
-  {&__pyx_kp_s_61, __pyx_k_61, sizeof(__pyx_k_61), 0, 0, 1, 0},
+  {&__pyx_kp_s_60, __pyx_k_60, sizeof(__pyx_k_60), 0, 0, 1, 0},
   {&__pyx_kp_s_62, __pyx_k_62, sizeof(__pyx_k_62), 0, 0, 1, 0},
-  {&__pyx_kp_s_63, __pyx_k_63, sizeof(__pyx_k_63), 0, 0, 1, 0},
   {&__pyx_kp_s_64, __pyx_k_64, sizeof(__pyx_k_64), 0, 0, 1, 0},
   {&__pyx_kp_s_65, __pyx_k_65, sizeof(__pyx_k_65), 0, 0, 1, 0},
   {&__pyx_kp_s_66, __pyx_k_66, sizeof(__pyx_k_66), 0, 0, 1, 0},
   {&__pyx_kp_s_67, __pyx_k_67, sizeof(__pyx_k_67), 0, 0, 1, 0},
-  {&__pyx_n_s_68, __pyx_k_68, sizeof(__pyx_k_68), 0, 0, 1, 1},
-  {&__pyx_n_s_69, __pyx_k_69, sizeof(__pyx_k_69), 0, 0, 1, 1},
+  {&__pyx_kp_s_68, __pyx_k_68, sizeof(__pyx_k_68), 0, 0, 1, 0},
+  {&__pyx_kp_s_69, __pyx_k_69, sizeof(__pyx_k_69), 0, 0, 1, 0},
   {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0},
-  {&__pyx_kp_s_70, __pyx_k_70, sizeof(__pyx_k_70), 0, 0, 1, 0},
+  {&__pyx_n_s_70, __pyx_k_70, sizeof(__pyx_k_70), 0, 0, 1, 1},
+  {&__pyx_n_s_71, __pyx_k_71, sizeof(__pyx_k_71), 0, 0, 1, 1},
   {&__pyx_kp_s_72, __pyx_k_72, sizeof(__pyx_k_72), 0, 0, 1, 0},
   {&__pyx_kp_s_74, __pyx_k_74, sizeof(__pyx_k_74), 0, 0, 1, 0},
   {&__pyx_kp_s_76, __pyx_k_76, sizeof(__pyx_k_76), 0, 0, 1, 0},
+  {&__pyx_kp_s_78, __pyx_k_78, sizeof(__pyx_k_78), 0, 0, 1, 0},
   {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0},
-  {&__pyx_kp_s_81, __pyx_k_81, sizeof(__pyx_k_81), 0, 0, 1, 0},
-  {&__pyx_kp_s_82, __pyx_k_82, sizeof(__pyx_k_82), 0, 0, 1, 0},
   {&__pyx_kp_s_83, __pyx_k_83, sizeof(__pyx_k_83), 0, 0, 1, 0},
   {&__pyx_kp_s_84, __pyx_k_84, sizeof(__pyx_k_84), 0, 0, 1, 0},
   {&__pyx_kp_s_85, __pyx_k_85, sizeof(__pyx_k_85), 0, 0, 1, 0},
@@ -59561,10 +60608,10 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_89, __pyx_k_89, sizeof(__pyx_k_89), 0, 0, 1, 0},
   {&__pyx_kp_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 0},
   {&__pyx_kp_s_90, __pyx_k_90, sizeof(__pyx_k_90), 0, 0, 1, 0},
+  {&__pyx_kp_s_91, __pyx_k_91, sizeof(__pyx_k_91), 0, 0, 1, 0},
   {&__pyx_kp_s_92, __pyx_k_92, sizeof(__pyx_k_92), 0, 0, 1, 0},
-  {&__pyx_kp_s_93, __pyx_k_93, sizeof(__pyx_k_93), 0, 0, 1, 0},
-  {&__pyx_kp_s_98, __pyx_k_98, sizeof(__pyx_k_98), 0, 0, 1, 0},
-  {&__pyx_kp_s_99, __pyx_k_99, sizeof(__pyx_k_99), 0, 0, 1, 0},
+  {&__pyx_kp_s_94, __pyx_k_94, sizeof(__pyx_k_94), 0, 0, 1, 0},
+  {&__pyx_kp_s_95, __pyx_k_95, sizeof(__pyx_k_95), 0, 0, 1, 0},
   {&__pyx_kp_s__0, __pyx_k__0, sizeof(__pyx_k__0), 0, 0, 1, 0},
   {&__pyx_kp_s__1, __pyx_k__1, sizeof(__pyx_k__1), 0, 0, 1, 0},
   {&__pyx_n_s__END_OF_FILE, __pyx_k__END_OF_FILE, sizeof(__pyx_k__END_OF_FILE), 0, 0, 1, 1},
@@ -59682,7 +60729,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s__reachable, __pyx_k__reachable, sizeof(__pyx_k__reachable), 0, 0, 1, 1},
   {&__pyx_n_s__reachable_buffer, __pyx_k__reachable_buffer, sizeof(__pyx_k__reachable_buffer), 0, 0, 1, 1},
   {&__pyx_n_s__read_binary, __pyx_k__read_binary, sizeof(__pyx_k__read_binary), 0, 0, 1, 1},
+  {&__pyx_n_s__read_bitext, __pyx_k__read_bitext, sizeof(__pyx_k__read_bitext), 0, 0, 1, 1},
   {&__pyx_n_s__read_text, __pyx_k__read_text, sizeof(__pyx_k__read_text), 0, 0, 1, 1},
+  {&__pyx_n_s__read_text_data, __pyx_k__read_text_data, sizeof(__pyx_k__read_text_data), 0, 0, 1, 1},
   {&__pyx_n_s__res, __pyx_k__res, sizeof(__pyx_k__res), 0, 0, 1, 1},
   {&__pyx_n_s__reset, __pyx_k__reset, sizeof(__pyx_k__reset), 0, 0, 1, 1},
   {&__pyx_n_s__resource, __pyx_k__resource, sizeof(__pyx_k__resource), 0, 0, 1, 1},
@@ -59699,9 +60748,11 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s__seek, __pyx_k__seek, sizeof(__pyx_k__seek), 0, 0, 1, 1},
   {&__pyx_n_s__setdefault, __pyx_k__setdefault, sizeof(__pyx_k__setdefault), 0, 0, 1, 1},
   {&__pyx_n_s__shortest, __pyx_k__shortest, sizeof(__pyx_k__shortest), 0, 0, 1, 1},
+  {&__pyx_n_s__side, __pyx_k__side, sizeof(__pyx_k__side), 0, 0, 1, 1},
   {&__pyx_n_s__size, __pyx_k__size, sizeof(__pyx_k__size), 0, 0, 1, 1},
   {&__pyx_n_s__skip, __pyx_k__skip, sizeof(__pyx_k__skip), 0, 0, 1, 1},
   {&__pyx_n_s__sorted, __pyx_k__sorted, sizeof(__pyx_k__sorted), 0, 0, 1, 1},
+  {&__pyx_n_s__source, __pyx_k__source, sizeof(__pyx_k__source), 0, 0, 1, 1},
   {&__pyx_n_s__spanlen, __pyx_k__spanlen, sizeof(__pyx_k__spanlen), 0, 0, 1, 1},
   {&__pyx_n_s__split, __pyx_k__split, sizeof(__pyx_k__split), 0, 0, 1, 1},
   {&__pyx_n_s__start, __pyx_k__start, sizeof(__pyx_k__start), 0, 0, 1, 1},
@@ -59734,7 +60785,7 @@ static int __Pyx_InitCachedBuiltins(void) {
   __pyx_builtin_IndexError = __Pyx_GetName(__pyx_b, __pyx_n_s__IndexError); if (!__pyx_builtin_IndexError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_builtin_TypeError = __Pyx_GetName(__pyx_b, __pyx_n_s__TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_builtin_map = __Pyx_GetName(__pyx_b, __pyx_n_s__map); if (!__pyx_builtin_map) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_builtin_Exception = __Pyx_GetName(__pyx_b, __pyx_n_s__Exception); if (!__pyx_builtin_Exception) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_builtin_zip = __Pyx_GetName(__pyx_b, __pyx_n_s__zip); if (!__pyx_builtin_zip) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -59802,28 +60853,28 @@ static int __Pyx_InitCachedConstants(void) {
   __Pyx_GIVEREF(__pyx_int_1000);
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_12));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65
  *                     f.write("%s " % self.get_word(w_id))
  *                 if w_id == 1:
  *                     f.write("\n")             # <<<<<<<<<<<<<<
  * 
  *     def read_text(self, char* filename):
  */
-  __pyx_k_tuple_15 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_15)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_15 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_15)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_15);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
   PyTuple_SET_ITEM(__pyx_k_tuple_15, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_15));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60
  * 
  *     def write_text(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
  *             for w_id in self.data:
  *                 if w_id > 1:
  */
-  __pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_16);
   __Pyx_INCREF(Py_None);
   PyTuple_SET_ITEM(__pyx_k_tuple_16, 0, Py_None);
@@ -59836,14 +60887,14 @@ static int __Pyx_InitCachedConstants(void) {
   __Pyx_GIVEREF(Py_None);
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_16));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68
+ * 
  *     def read_text(self, char* filename):
- *         cdef int word_count = 0
  *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
- *             for line_num, line in enumerate(fp):
- *                 self.sent_index.append(word_count)
+ *             self.read_text_data(fp)
+ * 
  */
-  __pyx_k_tuple_17 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_17)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_k_tuple_17 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_17)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_k_tuple_17);
   __Pyx_INCREF(Py_None);
   PyTuple_SET_ITEM(__pyx_k_tuple_17, 0, Py_None);
@@ -59856,80 +60907,114 @@ static int __Pyx_InitCachedConstants(void) {
   __Pyx_GIVEREF(Py_None);
   __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_17));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73
+ *     def read_bitext(self, char* filename, int side):
+ *         with gzip_or_text(filename) as fp:
+ *             data = (line.split(' ||| ')[side] for line in fp)             # <<<<<<<<<<<<<<
+ *             self.read_text_data(data)
+ * 
+ */
+  __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_19);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_18));
+  PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_18));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_18));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19));
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72
+ * 
+ *     def read_bitext(self, char* filename, int side):
+ *         with gzip_or_text(filename) as fp:             # <<<<<<<<<<<<<<
+ *             data = (line.split(' ||| ')[side] for line in fp)
+ *             self.read_text_data(data)
+ */
+  __pyx_k_tuple_20 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_20)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_20);
+  __Pyx_INCREF(Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_20, 0, Py_None);
+  __Pyx_GIVEREF(Py_None);
+  __Pyx_INCREF(Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_20, 1, Py_None);
+  __Pyx_GIVEREF(Py_None);
+  __Pyx_INCREF(Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_20, 2, Py_None);
+  __Pyx_GIVEREF(Py_None);
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_20));
+
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145
  *         for i in self.data:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for i in self.sent_index:
  *             f.write("%d " %i)
  */
-  __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_19);
+  __pyx_k_tuple_22 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_22)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_22);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_22, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_22));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":148
  *         for i in self.sent_index:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for i in self.sent_id:
  *             f.write("%d " %i)
  */
-  __pyx_k_tuple_20 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_20)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_20);
+  __pyx_k_tuple_23 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_23)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_23);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_20, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_23, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_20));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_23));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":151
  *         for i in self.sent_id:
  *             f.write("%d " %i)
  *         f.write("\n")             # <<<<<<<<<<<<<<
  *         for word in self.id2word:
  *             f.write("%s %d " % (word, self.word2id[word]))
  */
-  __pyx_k_tuple_21 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_21);
+  __pyx_k_tuple_24 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_24)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_24);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_21, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_21));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_24));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":154
  *         for word in self.id2word:
  *             f.write("%s %d " % (word, self.word2id[word]))
  *         f.write("\n")             # <<<<<<<<<<<<<<
  * 
  *     def write_enhanced(self, char* filename):
  */
-  __pyx_k_tuple_23 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_23)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_23);
+  __pyx_k_tuple_26 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_26)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_26);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_23, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_26, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_23));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_26));
 
-  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145
+  /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157
  * 
  *     def write_enhanced(self, char* filename):
  *         with open(filename, "w") as f:             # <<<<<<<<<<<<<<
  *             self.write_enhanced_handle(self, f)
  */
-  __pyx_k_tuple_25 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_25)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_25);
+  __pyx_k_tuple_28 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_28)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_28);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_25, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_28, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_25, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_28, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_25, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_28, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_25));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_28));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":46
  * 
@@ -59938,15 +61023,15 @@ static int __Pyx_InitCachedConstants(void) {
  *         self.sent_index = IntList(1000,1000)
  *         if from_binary:
  */
-  __pyx_k_tuple_26 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_26)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_26);
+  __pyx_k_tuple_29 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_29)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_29);
   __Pyx_INCREF(__pyx_int_1000);
-  PyTuple_SET_ITEM(__pyx_k_tuple_26, 0, __pyx_int_1000);
+  PyTuple_SET_ITEM(__pyx_k_tuple_29, 0, __pyx_int_1000);
   __Pyx_GIVEREF(__pyx_int_1000);
   __Pyx_INCREF(__pyx_int_1000);
-  PyTuple_SET_ITEM(__pyx_k_tuple_26, 1, __pyx_int_1000);
+  PyTuple_SET_ITEM(__pyx_k_tuple_29, 1, __pyx_int_1000);
   __Pyx_GIVEREF(__pyx_int_1000);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_26));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_29));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":47
  *     def __cinit__(self, from_binary=None, from_text=None):
@@ -59955,15 +61040,15 @@ static int __Pyx_InitCachedConstants(void) {
  *         if from_binary:
  *             self.read_binary(from_binary)
  */
-  __pyx_k_tuple_27 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_27)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_27);
+  __pyx_k_tuple_30 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_30)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_30);
   __Pyx_INCREF(__pyx_int_1000);
-  PyTuple_SET_ITEM(__pyx_k_tuple_27, 0, __pyx_int_1000);
+  PyTuple_SET_ITEM(__pyx_k_tuple_30, 0, __pyx_int_1000);
   __Pyx_GIVEREF(__pyx_int_1000);
   __Pyx_INCREF(__pyx_int_1000);
-  PyTuple_SET_ITEM(__pyx_k_tuple_27, 1, __pyx_int_1000);
+  PyTuple_SET_ITEM(__pyx_k_tuple_30, 1, __pyx_int_1000);
   __Pyx_GIVEREF(__pyx_int_1000);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_27));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_30));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":59
  *                 pairs = line.split()
@@ -59972,12 +61057,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     self.links.append(self.link(i, j))
  *             self.sent_index.append(len(self.links))
  */
-  __pyx_k_tuple_29 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_29)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_29);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_28));
-  PyTuple_SET_ITEM(__pyx_k_tuple_29, 0, ((PyObject *)__pyx_kp_s_28));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_28));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_29));
+  __pyx_k_tuple_32 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_32)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_32);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_31));
+  PyTuple_SET_ITEM(__pyx_k_tuple_32, 0, ((PyObject *)__pyx_kp_s_31));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_31));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_32));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":54
  * 
@@ -59986,18 +61071,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             for line in f:
  *                 self.sent_index.append(len(self.links))
  */
-  __pyx_k_tuple_30 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_30)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_30);
+  __pyx_k_tuple_33 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_33)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_33);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_30, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_33, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_30, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_33, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_30, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_33, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_30));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_33));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":75
  *             for i, link in enumerate(self.links):
@@ -60006,12 +61091,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     sent_num = sent_num + 1
  *                 f.write("%d-%d " % self.unlink(link))
  */
-  __pyx_k_tuple_31 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_31)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_31);
+  __pyx_k_tuple_34 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_34)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_34);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_31, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_34, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_31));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_34));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":78
  *                     sent_num = sent_num + 1
@@ -60020,12 +61105,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  *     def write_binary(self, char* filename):
  */
-  __pyx_k_tuple_33 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_33)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_33);
+  __pyx_k_tuple_36 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_36)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_36);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_33, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_36, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_33));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_36));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":71
  * 
@@ -60034,18 +61119,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             sent_num = 0
  *             for i, link in enumerate(self.links):
  */
-  __pyx_k_tuple_34 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_34)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_34);
+  __pyx_k_tuple_37 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_37)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_37);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_34, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_37, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_34, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_37, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_34, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_37, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_34));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_37));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":92
  *             for link in self.links:
@@ -60054,12 +61139,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for i in self.sent_index:
  *                 f.write("%d " % i)
  */
-  __pyx_k_tuple_35 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_35)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_35);
+  __pyx_k_tuple_38 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_38)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_38);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_35, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_38, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_35));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_38));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":95
  *             for i in self.sent_index:
@@ -60068,12 +61153,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  *     def alignment(self, i):
  */
-  __pyx_k_tuple_36 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_36)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_36);
+  __pyx_k_tuple_39 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_39)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_39);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_36, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_39, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_36));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_39));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":88
  * 
@@ -60082,18 +61167,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             sent_num = 1
  *             for link in self.links:
  */
-  __pyx_k_tuple_37 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_37)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_37);
+  __pyx_k_tuple_40 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_40)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_40);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_37, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_40, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_37, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_40, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_37, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_40, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_37));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":302
  * 
@@ -60102,12 +61187,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for line in f:
  *                 (fword, eword, score1, score2) = line.split()
  */
-  __pyx_k_tuple_40 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_40)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_40);
+  __pyx_k_tuple_43 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_43)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_43);
   __Pyx_INCREF(__pyx_int_0);
-  PyTuple_SET_ITEM(__pyx_k_tuple_40, 0, __pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_k_tuple_43, 0, __pyx_int_0);
   __Pyx_GIVEREF(__pyx_int_0);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_43));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":278
  * 
@@ -60116,18 +61201,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             # first loop merely establishes size of array objects
  *             for line in f:
  */
-  __pyx_k_tuple_41 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_41)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_41);
+  __pyx_k_tuple_44 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_44);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_41, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_44, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_41, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_44, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_41, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_44, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_41));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":344
  * 
@@ -60136,12 +61221,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         if i == j: #empty interval
  *             return
  */
-  __pyx_k_tuple_44 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_44);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_43));
-  PyTuple_SET_ITEM(__pyx_k_tuple_44, 0, ((PyObject *)__pyx_kp_s_43));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_43));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44));
+  __pyx_k_tuple_47 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_47)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_47);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_46));
+  PyTuple_SET_ITEM(__pyx_k_tuple_47, 0, ((PyObject *)__pyx_kp_s_46));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_46));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_47));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":367
  *             for i in self.f_index:
@@ -60150,12 +61235,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for i, s1, s2 in zip(self.e_index, self.col1, self.col2):
  *                 f.write("%d %f %f " % (i, s1, s2))
  */
-  __pyx_k_tuple_46 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_46)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_46);
+  __pyx_k_tuple_49 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_49)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_49);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_46, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_49, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_46));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_49));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":370
  *             for i, s1, s2 in zip(self.e_index, self.col1, self.col2):
@@ -60164,12 +61249,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for i, w in enumerate(self.id2fword):
  *                 f.write("%d %s " % (i, w))
  */
-  __pyx_k_tuple_48 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_48)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_48);
+  __pyx_k_tuple_51 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_51);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_48, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_48));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":373
  *             for i, w in enumerate(self.id2fword):
@@ -60178,12 +61263,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for i, w in enumerate(self.id2eword):
  *                 f.write("%d %s " % (i, w))
  */
-  __pyx_k_tuple_50 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_50)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_50);
+  __pyx_k_tuple_53 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_53)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_53);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_50, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_53, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_50));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_53));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":376
  *             for i, w in enumerate(self.id2eword):
@@ -60192,12 +61277,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  * 
  */
-  __pyx_k_tuple_51 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_51);
+  __pyx_k_tuple_54 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_54)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_54);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_54, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":364
  * 
@@ -60206,18 +61291,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             for i in self.f_index:
  *                 f.write("%d " % i)
  */
-  __pyx_k_tuple_52 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_52)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_52);
+  __pyx_k_tuple_55 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_55)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_55);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_52, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_55, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_52, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_55, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_52, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_55, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_52));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_55));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":409
  *         cdef i, N, e_id, f_id
@@ -60226,18 +61311,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             N = len(self.e_index)
  *             f_id = 0
  */
-  __pyx_k_tuple_54 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_54)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_54);
+  __pyx_k_tuple_57 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_57)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_57);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_54, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_57, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_54, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_57, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_54, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_57, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_57));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":13
  *         cdef IntList rank
@@ -60246,12 +61331,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         self.sa = sa
  *         n = self.sa.sa.len
  */
-  __pyx_k_tuple_58 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_58)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_58);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_57));
-  PyTuple_SET_ITEM(__pyx_k_tuple_58, 0, ((PyObject *)__pyx_kp_s_57));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_57));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_58));
+  __pyx_k_tuple_61 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_61)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_61);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_60));
+  PyTuple_SET_ITEM(__pyx_k_tuple_61, 0, ((PyObject *)__pyx_kp_s_60));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_60));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_61));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":34
  *             if h > 0:
@@ -60260,12 +61345,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  *     def compute_stats(self, int max_n):
  */
-  __pyx_k_tuple_60 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_60)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_60);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_59));
-  PyTuple_SET_ITEM(__pyx_k_tuple_60, 0, ((PyObject *)__pyx_kp_s_59));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_59));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_60));
+  __pyx_k_tuple_63 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_63)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_63);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_62));
+  PyTuple_SET_ITEM(__pyx_k_tuple_63, 0, ((PyObject *)__pyx_kp_s_62));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_62));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_63));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":297
  *         pattern_rank = {}
@@ -60274,12 +61359,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         cdef float start_time = monitor_cpu()
  * 
  */
-  __pyx_k_tuple_71 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_71)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_71);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_70));
-  PyTuple_SET_ITEM(__pyx_k_tuple_71, 0, ((PyObject *)__pyx_kp_s_70));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_70));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_71));
+  __pyx_k_tuple_73 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_73)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_73);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_72));
+  PyTuple_SET_ITEM(__pyx_k_tuple_73, 0, ((PyObject *)__pyx_kp_s_72));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_72));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_73));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":314
  *         queue = IntList(increment=1000)
@@ -60288,12 +61373,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         N = len(data)
  *         for i from 0 <= i < N:
  */
-  __pyx_k_tuple_73 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_73)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_73);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_72));
-  PyTuple_SET_ITEM(__pyx_k_tuple_73, 0, ((PyObject *)__pyx_kp_s_72));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_72));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_73));
+  __pyx_k_tuple_75 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_75)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_75);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_74));
+  PyTuple_SET_ITEM(__pyx_k_tuple_75, 0, ((PyObject *)__pyx_kp_s_74));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_74));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_75));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":329
  *                     trie_node_data_append(node, i)
@@ -60302,12 +61387,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         N = len(queue)
  *         ptr1 = 0
  */
-  __pyx_k_tuple_75 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_75)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_75);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_74));
-  PyTuple_SET_ITEM(__pyx_k_tuple_75, 0, ((PyObject *)__pyx_kp_s_74));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_74));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_75));
+  __pyx_k_tuple_77 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_77)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_77);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_76));
+  PyTuple_SET_ITEM(__pyx_k_tuple_77, 0, ((PyObject *)__pyx_kp_s_76));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_76));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_77));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":393
  *             for pattern2 in J_set:
@@ -60316,12 +61401,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     J2_set.add(combined_pattern)
  * 
  */
-  __pyx_k_tuple_77 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_77)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_77);
+  __pyx_k_tuple_79 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_79)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_79);
   __Pyx_INCREF(__pyx_int_neg_1);
-  PyTuple_SET_ITEM(__pyx_k_tuple_77, 0, __pyx_int_neg_1);
+  PyTuple_SET_ITEM(__pyx_k_tuple_79, 0, __pyx_int_neg_1);
   __Pyx_GIVEREF(__pyx_int_neg_1);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_77));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_79));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":400
  *                 x = x+1
@@ -60330,12 +61415,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     IJ_set.add(combined_pattern)
  * 
  */
-  __pyx_k_tuple_78 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_78)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_78);
+  __pyx_k_tuple_80 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_80)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_80);
   __Pyx_INCREF(__pyx_int_neg_1);
-  PyTuple_SET_ITEM(__pyx_k_tuple_78, 0, __pyx_int_neg_1);
+  PyTuple_SET_ITEM(__pyx_k_tuple_80, 0, __pyx_int_neg_1);
   __Pyx_GIVEREF(__pyx_int_neg_1);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_78));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_80));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":407
  *                 x = x+2
@@ -60344,12 +61429,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     IJ_set.add(combined_pattern)
  *                     combined_pattern = pattern2 + (-1,) + pattern1
  */
-  __pyx_k_tuple_79 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_79)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_79);
+  __pyx_k_tuple_81 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_81)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_81);
   __Pyx_INCREF(__pyx_int_neg_1);
-  PyTuple_SET_ITEM(__pyx_k_tuple_79, 0, __pyx_int_neg_1);
+  PyTuple_SET_ITEM(__pyx_k_tuple_81, 0, __pyx_int_neg_1);
   __Pyx_GIVEREF(__pyx_int_neg_1);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_79));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_81));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":409
  *                     combined_pattern = pattern1 + (-1,) + pattern2
@@ -60358,12 +61443,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                     IJ_set.add(combined_pattern)
  * 
  */
-  __pyx_k_tuple_80 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_80)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_80);
+  __pyx_k_tuple_82 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_82)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_82);
   __Pyx_INCREF(__pyx_int_neg_1);
-  PyTuple_SET_ITEM(__pyx_k_tuple_80, 0, __pyx_int_neg_1);
+  PyTuple_SET_ITEM(__pyx_k_tuple_82, 0, __pyx_int_neg_1);
   __Pyx_GIVEREF(__pyx_int_neg_1);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_80));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_82));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":103
  * 
@@ -60372,12 +61457,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         for i from 0 <= i < N:
  *             j = isa.arr[i]
  */
-  __pyx_k_tuple_91 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_91)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_91);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_90));
-  PyTuple_SET_ITEM(__pyx_k_tuple_91, 0, ((PyObject *)__pyx_kp_s_90));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_90));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_91));
+  __pyx_k_tuple_93 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_93)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_93);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_92));
+  PyTuple_SET_ITEM(__pyx_k_tuple_93, 0, ((PyObject *)__pyx_kp_s_92));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_92));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_93));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":202
  *             for a_i in self.sa:
@@ -60386,12 +61471,12 @@ static int __Pyx_InitCachedConstants(void) {
  *             for w_i in self.ha:
  *                 f.write("%d " % w_i)
  */
-  __pyx_k_tuple_94 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_94)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_94);
+  __pyx_k_tuple_96 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_96)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_96);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_94, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_96, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_94));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_96));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":205
  *             for w_i in self.ha:
@@ -60400,12 +61485,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  *     cdef int __search_high(self, int word_id, int offset, int low, int high):
  */
-  __pyx_k_tuple_95 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_95)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_95);
+  __pyx_k_tuple_97 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_97)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_97);
   __Pyx_INCREF(((PyObject *)__pyx_kp_s_14));
-  PyTuple_SET_ITEM(__pyx_k_tuple_95, 0, ((PyObject *)__pyx_kp_s_14));
+  PyTuple_SET_ITEM(__pyx_k_tuple_97, 0, ((PyObject *)__pyx_kp_s_14));
   __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_95));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_97));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":198
  * 
@@ -60414,18 +61499,18 @@ static int __Pyx_InitCachedConstants(void) {
  *             self.darray.write_enhanced_handle(f)
  *             for a_i in self.sa:
  */
-  __pyx_k_tuple_96 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_96)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_96);
+  __pyx_k_tuple_98 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_98)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_98);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_96, 0, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_98, 0, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_96, 1, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_98, 1, Py_None);
   __Pyx_GIVEREF(Py_None);
   __Pyx_INCREF(Py_None);
-  PyTuple_SET_ITEM(__pyx_k_tuple_96, 2, Py_None);
+  PyTuple_SET_ITEM(__pyx_k_tuple_98, 2, Py_None);
   __Pyx_GIVEREF(Py_None);
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_96));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_98));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":92
  *             logger.info("Sampling strategy: uniform, max sample size = %d", sample_size)
@@ -60434,12 +61519,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  *     def sample(self, PhraseLocation phrase_location):
  */
-  __pyx_k_tuple_100 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_100)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_100);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_99));
-  PyTuple_SET_ITEM(__pyx_k_tuple_100, 0, ((PyObject *)__pyx_kp_s_99));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_99));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_100));
+  __pyx_k_tuple_102 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_102)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_102);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_101));
+  PyTuple_SET_ITEM(__pyx_k_tuple_102, 0, ((PyObject *)__pyx_kp_s_101));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_101));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_102));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":300
  *         self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation())
@@ -60448,12 +61533,12 @@ static int __Pyx_InitCachedConstants(void) {
  *         self.alignment = alignment
  * 
  */
-  __pyx_k_tuple_105 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_105)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_105);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_104));
-  PyTuple_SET_ITEM(__pyx_k_tuple_105, 0, ((PyObject *)__pyx_kp_s_104));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_104));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_105));
+  __pyx_k_tuple_107 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_107)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_107);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_106));
+  PyTuple_SET_ITEM(__pyx_k_tuple_107, 0, ((PyObject *)__pyx_kp_s_106));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_106));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_107));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1004
  *                         else:
@@ -60462,12 +61547,12 @@ static int __Pyx_InitCachedConstants(void) {
  *                 # checking whether lookup_required
  *                 if lookup_required:
  */
-  __pyx_k_tuple_120 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_120)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_120);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_119));
-  PyTuple_SET_ITEM(__pyx_k_tuple_120, 0, ((PyObject *)__pyx_kp_s_119));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_119));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_120));
+  __pyx_k_tuple_122 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_122)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_122);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_121));
+  PyTuple_SET_ITEM(__pyx_k_tuple_122, 0, ((PyObject *)__pyx_kp_s_121));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_121));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_122));
 
   /* "_sa.pyx":9
  *             resource.getrusage(resource.RUSAGE_SELF).ru_stime)
@@ -60476,16 +61561,16 @@ static int __Pyx_InitCachedConstants(void) {
  *     if filename.endswith('.gz'):
  *         return gzip.GzipFile(filename)
  */
-  __pyx_k_tuple_132 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_132)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_132);
+  __pyx_k_tuple_134 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_134)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_134);
   __Pyx_INCREF(((PyObject *)__pyx_n_s__filename));
-  PyTuple_SET_ITEM(__pyx_k_tuple_132, 0, ((PyObject *)__pyx_n_s__filename));
+  PyTuple_SET_ITEM(__pyx_k_tuple_134, 0, ((PyObject *)__pyx_n_s__filename));
   __Pyx_GIVEREF(((PyObject *)__pyx_n_s__filename));
   __Pyx_INCREF(((PyObject *)__pyx_n_s__filename));
-  PyTuple_SET_ITEM(__pyx_k_tuple_132, 1, ((PyObject *)__pyx_n_s__filename));
+  PyTuple_SET_ITEM(__pyx_k_tuple_134, 1, ((PyObject *)__pyx_n_s__filename));
   __Pyx_GIVEREF(((PyObject *)__pyx_n_s__filename));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_132));
-  __pyx_k_codeobj_133 = (PyObject*)__Pyx_PyCode_New(1, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_132, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_134, __pyx_n_s__gzip_or_text, 9, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_133)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_134));
+  __pyx_k_codeobj_135 = (PyObject*)__Pyx_PyCode_New(1, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_134, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_136, __pyx_n_s__gzip_or_text, 9, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_135)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 
   /* "_sa.pyx":15
  *         return open(filename)
@@ -60494,12 +61579,12 @@ static int __Pyx_InitCachedConstants(void) {
  * 
  * include "float_list.pxi"
  */
-  __pyx_k_tuple_136 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_136)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_136);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_135));
-  PyTuple_SET_ITEM(__pyx_k_tuple_136, 0, ((PyObject *)__pyx_kp_s_135));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_135));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_136));
+  __pyx_k_tuple_138 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_138)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_138);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_137));
+  PyTuple_SET_ITEM(__pyx_k_tuple_138, 0, ((PyObject *)__pyx_kp_s_137));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_137));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_138));
 
   /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104
  *     return ALPHABET.setindex(sym, id)
@@ -60507,16 +61592,16 @@ static int __Pyx_InitCachedConstants(void) {
  * def sym_fromstring(char* string, bint terminal):             # <<<<<<<<<<<<<<
  *     return ALPHABET.fromstring(string, terminal)
  */
-  __pyx_k_tuple_137 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_137)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __Pyx_GOTREF(__pyx_k_tuple_137);
+  __pyx_k_tuple_139 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_139)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_139);
   __Pyx_INCREF(((PyObject *)__pyx_n_s__string));
-  PyTuple_SET_ITEM(__pyx_k_tuple_137, 0, ((PyObject *)__pyx_n_s__string));
+  PyTuple_SET_ITEM(__pyx_k_tuple_139, 0, ((PyObject *)__pyx_n_s__string));
   __Pyx_GIVEREF(((PyObject *)__pyx_n_s__string));
   __Pyx_INCREF(((PyObject *)__pyx_n_s__terminal));
-  PyTuple_SET_ITEM(__pyx_k_tuple_137, 1, ((PyObject *)__pyx_n_s__terminal));
+  PyTuple_SET_ITEM(__pyx_k_tuple_139, 1, ((PyObject *)__pyx_n_s__terminal));
   __Pyx_GIVEREF(((PyObject *)__pyx_n_s__terminal));
-  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_137));
-  __pyx_k_codeobj_138 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_137, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_139, __pyx_n_s__sym_fromstring, 104, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_138)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_139));
+  __pyx_k_codeobj_140 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_139, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_141, __pyx_n_s__sym_fromstring, 104, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_140)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_RefNannyFinishContext();
   return 0;
   __pyx_L1_error:;
@@ -60776,12 +61861,16 @@ PyMODINIT_FUNC PyInit__sa(void)
   if (__Pyx_SetVtable(__pyx_type_3_sa_HieroCachingRuleFactory.tp_dict, __pyx_vtabptr_3_sa_HieroCachingRuleFactory) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   if (__Pyx_SetAttrString(__pyx_m, "HieroCachingRuleFactory", (PyObject *)&__pyx_type_3_sa_HieroCachingRuleFactory) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __pyx_ptype_3_sa_HieroCachingRuleFactory = &__pyx_type_3_sa_HieroCachingRuleFactory;
-  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct__compute_stats) < 0) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_ptype_3_sa___pyx_scope_struct__compute_stats = &__pyx_type_3_sa___pyx_scope_struct__compute_stats;
-  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_1___iter__) < 0) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_ptype_3_sa___pyx_scope_struct_1___iter__ = &__pyx_type_3_sa___pyx_scope_struct_1___iter__;
-  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_2_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
-  __pyx_ptype_3_sa___pyx_scope_struct_2_input = &__pyx_type_3_sa___pyx_scope_struct_2_input;
+  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct__read_bitext) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_3_sa___pyx_scope_struct__read_bitext = &__pyx_type_3_sa___pyx_scope_struct__read_bitext;
+  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_1_genexpr) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_3_sa___pyx_scope_struct_1_genexpr = &__pyx_type_3_sa___pyx_scope_struct_1_genexpr;
+  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_2_compute_stats) < 0) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats = &__pyx_type_3_sa___pyx_scope_struct_2_compute_stats;
+  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_3___iter__) < 0) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_3_sa___pyx_scope_struct_3___iter__ = &__pyx_type_3_sa___pyx_scope_struct_3___iter__;
+  if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_4_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_3_sa___pyx_scope_struct_4_input = &__pyx_type_3_sa___pyx_scope_struct_4_input;
   /*--- Type import code ---*/
   /*--- Variable import code ---*/
   /*--- Function import code ---*/
@@ -60844,7 +61933,7 @@ PyMODINIT_FUNC PyInit__sa(void)
   __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__getLogger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_136), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_138), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
   if (PyObject_SetAttr(__pyx_m, __pyx_n_s__logger, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -60859,7 +61948,7 @@ PyMODINIT_FUNC PyInit__sa(void)
  */
   __pyx_t_1 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_k_38 = __pyx_t_1;
+  __pyx_k_41 = __pyx_t_1;
   __Pyx_GIVEREF(__pyx_t_1);
   __pyx_t_1 = 0;
 
@@ -60986,9 +62075,9 @@ PyMODINIT_FUNC PyInit__sa(void)
   __Pyx_GOTREF(__pyx_t_2);
   __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_3);
-  __Pyx_INCREF(((PyObject *)__pyx_kp_s_140));
-  PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_kp_s_140));
-  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_140));
+  __Pyx_INCREF(((PyObject *)__pyx_kp_s_142));
+  PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_kp_s_142));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_142));
   PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2);
   __Pyx_GIVEREF(__pyx_t_2);
   __pyx_t_2 = 0;
@@ -61009,7 +62098,7 @@ PyMODINIT_FUNC PyInit__sa(void)
  */
   __pyx_t_2 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
   __Pyx_GOTREF(__pyx_t_2);
-  __pyx_k_97 = __pyx_t_2;
+  __pyx_k_99 = __pyx_t_2;
   __Pyx_GIVEREF(__pyx_t_2);
   __pyx_t_2 = 0;
 
@@ -61451,6 +62540,10 @@ bad:
     return -1;
 }
 
+static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname) {
+    PyErr_Format(PyExc_NameError, "free variable '%s' referenced before assignment in enclosing scope", varname);
+}
+
 static CYTHON_INLINE long __Pyx_div_long(long a, long b) {
     long q = a / b;
     long r = a - q*b;
@@ -61836,6 +62929,86 @@ bad:
     return module;
 }
 
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+    if (s1 == s2) {
+        return (equals == Py_EQ);
+    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
+        if (PyBytes_GET_SIZE(s1) != PyBytes_GET_SIZE(s2)) {
+            return (equals == Py_NE);
+        } else if (PyBytes_GET_SIZE(s1) == 1) {
+            if (equals == Py_EQ)
+                return (PyBytes_AS_STRING(s1)[0] == PyBytes_AS_STRING(s2)[0]);
+            else
+                return (PyBytes_AS_STRING(s1)[0] != PyBytes_AS_STRING(s2)[0]);
+        } else {
+            int result = memcmp(PyBytes_AS_STRING(s1), PyBytes_AS_STRING(s2), (size_t)PyBytes_GET_SIZE(s1));
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
+        return (equals == Py_NE);
+    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
+        return (equals == Py_NE);
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+#endif
+}
+
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+    if (s1 == s2) {
+        return (equals == Py_EQ);
+    } else if (PyUnicode_CheckExact(s1) & PyUnicode_CheckExact(s2)) {
+        #if CYTHON_PEP393_ENABLED
+        if ((PyUnicode_READY(s1) < 0) || (PyUnicode_READY(s2) < 0))
+            return -1;
+        if (PyUnicode_GET_LENGTH(s1) != PyUnicode_GET_LENGTH(s2)) {
+            return (equals == Py_NE);
+        } else if (PyUnicode_GET_LENGTH(s1) == 1) {
+            Py_UCS4 ch1 = PyUnicode_READ_CHAR(s1, 0);
+            Py_UCS4 ch2 = PyUnicode_READ_CHAR(s2, 0);
+            return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
+        #else
+        if (PyUnicode_GET_SIZE(s1) != PyUnicode_GET_SIZE(s2)) {
+            return (equals == Py_NE);
+        } else if (PyUnicode_GET_SIZE(s1) == 1) {
+            Py_UNICODE ch1 = PyUnicode_AS_UNICODE(s1)[0];
+            Py_UNICODE ch2 = PyUnicode_AS_UNICODE(s2)[0];
+            return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
+        #endif
+        } else {
+            int result = PyUnicode_Compare(s1, s2);
+            if ((result == -1) && unlikely(PyErr_Occurred()))
+                return -1;
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & PyUnicode_CheckExact(s2)) {
+        return (equals == Py_NE);
+    } else if ((s2 == Py_None) & PyUnicode_CheckExact(s1)) {
+        return (equals == Py_NE);
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+#endif
+}
+
 static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject* x) {
     const unsigned char neg_one = (unsigned char)-1, const_zero = 0;
     const int is_unsigned = neg_one > const_zero;
diff --git a/python/src/sa/data_array.pxi b/python/src/sa/data_array.pxi
index 1c044694..7a102a7e 100644
--- a/python/src/sa/data_array.pxi
+++ b/python/src/sa/data_array.pxi
@@ -14,7 +14,7 @@ cdef class DataArray:
     cdef IntList sent_index
     cdef bint use_sent_id
 
-    def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False):
+    def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False):
         self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1}
         self.id2word = ["END_OF_FILE", "END_OF_LINE"]
         self.data = IntList(1000,1000)
@@ -24,7 +24,10 @@ cdef class DataArray:
         if from_binary:
             self.read_binary(from_binary)
         elif from_text:
-            self.read_text(from_text)
+            if side:
+                self.read_bitext(from_text, (0 if side == 'source' else 1))
+            else:
+                self.read_text(from_text)
 
     def __len__(self):
         return len(self.data)
@@ -62,21 +65,30 @@ cdef class DataArray:
                     f.write("\n")
 
     def read_text(self, char* filename):
-        cdef int word_count = 0
         with gzip_or_text(filename) as fp:
-            for line_num, line in enumerate(fp):
-                self.sent_index.append(word_count)
-                for word in line.split():
-                    self.data.append(self.get_id(word))
-                    if self.use_sent_id:
-                        self.sent_id.append(line_num)
-                    word_count = word_count + 1
-                self.data.append(1)
+            self.read_text_data(fp)
+
+    def read_bitext(self, char* filename, int side):
+        with gzip_or_text(filename) as fp:
+            data = (line.split(' ||| ')[side] for line in fp)
+            self.read_text_data(data)
+
+    def read_text_data(self, data):
+        cdef int word_count = 0
+        for line_num, line in enumerate(data):
+            self.sent_index.append(word_count)
+            for word in line.split():
+                self.data.append(self.get_id(word))
                 if self.use_sent_id:
                     self.sent_id.append(line_num)
                 word_count = word_count + 1
-            self.data.append(0)
-            self.sent_index.append(word_count)
+            self.data.append(1)
+            if self.use_sent_id:
+                self.sent_id.append(line_num)
+            word_count = word_count + 1
+        self.data.append(0)
+        self.sent_index.append(word_count)
+
 
     def read_binary(self, char* filename):
         cdef FILE* f
diff --git a/python/src/sa/suffix_array.pxi b/python/src/sa/suffix_array.pxi
index 20e6261d..d86e8ea6 100644
--- a/python/src/sa/suffix_array.pxi
+++ b/python/src/sa/suffix_array.pxi
@@ -8,14 +8,14 @@ cdef class SuffixArray:
     cdef IntList sa
     cdef IntList ha
 
-    def __cinit__(self, from_binary=None, from_text=None):
+    def __cinit__(self, from_binary=None, from_text=None, side=None):
         self.darray = DataArray()
         self.sa = IntList()
         self.ha = IntList()
         if from_binary:
             self.read_binary(from_binary)
         elif from_text:
-            self.read_text(from_text)
+            self.read_text(from_text, side)
 
     def __getitem__(self, i):
         return self.sa.arr[i]
@@ -29,13 +29,13 @@ cdef class SuffixArray:
     def getSentPos(self, loc):
         return self.darray.getSentPos(loc)
 
-    def read_text(self, char* filename):
+    def read_text(self, filename, side):
         '''Constructs suffix array using the algorithm
         of Larsson & Sadahkane (1999)'''
         cdef int V, N, i, j, h, a_i, n, current_run, skip
         cdef IntList isa, word_count
 
-        self.darray = DataArray(from_text=filename, use_sent_id=True)
+        self.darray = DataArray(from_text=filename, side=side, use_sent_id=True)
         N = len(self.darray)
         V = len(self.darray.id2word)
 
-- 
cgit v1.2.3