From ee5e376e263d9aeabdeee6968b4457f53d3fc772 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Fri, 27 Jul 2012 23:33:45 -0400 Subject: [python] Move python files to avoid pythonpath conflicts --- python/pkg/cdec/__init__.py | 1 + python/pkg/cdec/configobj.py | 2468 +++++++++++++++++++++++++++++++++++++++ python/pkg/cdec/sa/__init__.py | 4 + python/pkg/cdec/sa/compile.py | 94 ++ python/pkg/cdec/sa/extract.py | 31 + python/pkg/cdec/sa/extractor.py | 78 ++ python/pkg/cdec/sa/features.py | 57 + python/pkg/cdec/score.py | 1 + 8 files changed, 2734 insertions(+) create mode 100644 python/pkg/cdec/__init__.py create mode 100644 python/pkg/cdec/configobj.py create mode 100644 python/pkg/cdec/sa/__init__.py create mode 100644 python/pkg/cdec/sa/compile.py create mode 100644 python/pkg/cdec/sa/extract.py create mode 100644 python/pkg/cdec/sa/extractor.py create mode 100644 python/pkg/cdec/sa/features.py create mode 100644 python/pkg/cdec/score.py (limited to 'python/pkg/cdec') diff --git a/python/pkg/cdec/__init__.py b/python/pkg/cdec/__init__.py new file mode 100644 index 00000000..503ac787 --- /dev/null +++ b/python/pkg/cdec/__init__.py @@ -0,0 +1 @@ +from cdec._cdec import Decoder, Lattice, TRule, NT, NTRef, ParseFailed, InvalidConfig diff --git a/python/pkg/cdec/configobj.py b/python/pkg/cdec/configobj.py new file mode 100644 index 00000000..c1f6e6df --- /dev/null +++ b/python/pkg/cdec/configobj.py @@ -0,0 +1,2468 @@ +# configobj.py +# A config file reader/writer that supports nested sections in config files. +# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa +# E-mail: fuzzyman AT voidspace DOT org DOT uk +# nico AT tekNico DOT net + +# ConfigObj 4 +# http://www.voidspace.org.uk/python/configobj.html + +# Released subject to the BSD License +# Please see http://www.voidspace.org.uk/python/license.shtml + +# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml +# For information about bugfixes, updates and support, please join the +# ConfigObj mailing list: +# http://lists.sourceforge.net/lists/listinfo/configobj-develop +# Comments, suggestions and bug reports welcome. + +from __future__ import generators + +import os +import re +import sys + +from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE + + +# imported lazily to avoid startup performance hit if it isn't used +compiler = None + +# A dictionary mapping BOM to +# the encoding to decode with, and what to set the +# encoding attribute to. +BOMS = { + BOM_UTF8: ('utf_8', None), + BOM_UTF16_BE: ('utf16_be', 'utf_16'), + BOM_UTF16_LE: ('utf16_le', 'utf_16'), + BOM_UTF16: ('utf_16', 'utf_16'), + } +# All legal variants of the BOM codecs. +# TODO: the list of aliases is not meant to be exhaustive, is there a +# better way ? +BOM_LIST = { + 'utf_16': 'utf_16', + 'u16': 'utf_16', + 'utf16': 'utf_16', + 'utf-16': 'utf_16', + 'utf16_be': 'utf16_be', + 'utf_16_be': 'utf16_be', + 'utf-16be': 'utf16_be', + 'utf16_le': 'utf16_le', + 'utf_16_le': 'utf16_le', + 'utf-16le': 'utf16_le', + 'utf_8': 'utf_8', + 'u8': 'utf_8', + 'utf': 'utf_8', + 'utf8': 'utf_8', + 'utf-8': 'utf_8', + } + +# Map of encodings to the BOM to write. +BOM_SET = { + 'utf_8': BOM_UTF8, + 'utf_16': BOM_UTF16, + 'utf16_be': BOM_UTF16_BE, + 'utf16_le': BOM_UTF16_LE, + None: BOM_UTF8 + } + + +def match_utf8(encoding): + return BOM_LIST.get(encoding.lower()) == 'utf_8' + + +# Quote strings used for writing values +squot = "'%s'" +dquot = '"%s"' +noquot = "%s" +wspace_plus = ' \r\n\v\t\'"' +tsquot = '"""%s"""' +tdquot = "'''%s'''" + +# Sentinel for use in getattr calls to replace hasattr +MISSING = object() + +__version__ = '4.7.2' + +try: + any +except NameError: + def any(iterable): + for entry in iterable: + if entry: + return True + return False + + +__all__ = ( + '__version__', + 'DEFAULT_INDENT_TYPE', + 'DEFAULT_INTERPOLATION', + 'ConfigObjError', + 'NestingError', + 'ParseError', + 'DuplicateError', + 'ConfigspecError', + 'ConfigObj', + 'SimpleVal', + 'InterpolationError', + 'InterpolationLoopError', + 'MissingInterpolationOption', + 'RepeatSectionError', + 'ReloadError', + 'UnreprError', + 'UnknownType', + 'flatten_errors', + 'get_extra_values' +) + +DEFAULT_INTERPOLATION = 'configparser' +DEFAULT_INDENT_TYPE = ' ' +MAX_INTERPOL_DEPTH = 10 + +OPTION_DEFAULTS = { + 'interpolation': True, + 'raise_errors': False, + 'list_values': True, + 'create_empty': False, + 'file_error': False, + 'configspec': None, + 'stringify': True, + # option may be set to one of ('', ' ', '\t') + 'indent_type': None, + 'encoding': None, + 'default_encoding': None, + 'unrepr': False, + 'write_empty_values': False, +} + + + +def getObj(s): + global compiler + if compiler is None: + import compiler + s = "a=" + s + p = compiler.parse(s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + +class UnknownType(Exception): + pass + + +class Builder(object): + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise UnknownType(o.__class__.__name__) + return m(o) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + if o.name == 'None': + return None + if o.name == 'True': + return True + if o.name == 'False': + return False + + # An undefined Name + raise UnknownType('Undefined Name') + + def build_Add(self, o): + real, imag = map(self.build_Const, o.getChildren()) + try: + real = float(real) + except TypeError: + raise UnknownType('Add') + if not isinstance(imag, complex) or imag.real != 0.0: + raise UnknownType('Add') + return real+imag + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_UnarySub(self, o): + return -self.build_Const(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build_Const(o.getChildren()[0]) + + +_builder = Builder() + + +def unrepr(s): + if not s: + return s + return _builder.build(getObj(s)) + + + +class ConfigObjError(SyntaxError): + """ + This is the base class for all errors that ConfigObj raises. + It is a subclass of SyntaxError. + """ + def __init__(self, message='', line_number=None, line=''): + self.line = line + self.line_number = line_number + SyntaxError.__init__(self, message) + + +class NestingError(ConfigObjError): + """ + This error indicates a level of nesting that doesn't match. + """ + + +class ParseError(ConfigObjError): + """ + This error indicates that a line is badly written. + It is neither a valid ``key = value`` line, + nor a valid section marker line. + """ + + +class ReloadError(IOError): + """ + A 'reload' operation failed. + This exception is a subclass of ``IOError``. + """ + def __init__(self): + IOError.__init__(self, 'reload failed, filename is not set.') + + +class DuplicateError(ConfigObjError): + """ + The keyword or section specified already exists. + """ + + +class ConfigspecError(ConfigObjError): + """ + An error occured whilst parsing a configspec. + """ + + +class InterpolationError(ConfigObjError): + """Base class for the two interpolation errors.""" + + +class InterpolationLoopError(InterpolationError): + """Maximum interpolation depth exceeded in string interpolation.""" + + def __init__(self, option): + InterpolationError.__init__( + self, + 'interpolation loop detected in value "%s".' % option) + + +class RepeatSectionError(ConfigObjError): + """ + This error indicates additional sections in a section with a + ``__many__`` (repeated) section. + """ + + +class MissingInterpolationOption(InterpolationError): + """A value specified for interpolation was missing.""" + def __init__(self, option): + msg = 'missing option "%s" in interpolation.' % option + InterpolationError.__init__(self, msg) + + +class UnreprError(ConfigObjError): + """An error parsing in unrepr mode.""" + + + +class InterpolationEngine(object): + """ + A helper class to help perform string interpolation. + + This class is an abstract base class; its descendants perform + the actual work. + """ + + # compiled regexp to use in self.interpolate() + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + _cookie = '%' + + def __init__(self, section): + # the Section instance that "owns" this engine + self.section = section + + + def interpolate(self, key, value): + # short-cut + if not self._cookie in value: + return value + + def recursive_interpolate(key, value, section, backtrail): + """The function that does the actual work. + + ``value``: the string we're trying to interpolate. + ``section``: the section in which that string was found + ``backtrail``: a dict to keep track of where we've been, + to detect and prevent infinite recursion loops + + This is similar to a depth-first-search algorithm. + """ + # Have we been here already? + if (key, section.name) in backtrail: + # Yes - infinite loop detected + raise InterpolationLoopError(key) + # Place a marker on our backtrail so we won't come back here again + backtrail[(key, section.name)] = 1 + + # Now start the actual work + match = self._KEYCRE.search(value) + while match: + # The actual parsing of the match is implementation-dependent, + # so delegate to our helper function + k, v, s = self._parse_match(match) + if k is None: + # That's the signal that no further interpolation is needed + replacement = v + else: + # Further interpolation may be needed to obtain final value + replacement = recursive_interpolate(k, v, s, backtrail) + # Replace the matched string with its final value + start, end = match.span() + value = ''.join((value[:start], replacement, value[end:])) + new_search_start = start + len(replacement) + # Pick up the next interpolation key, if any, for next time + # through the while loop + match = self._KEYCRE.search(value, new_search_start) + + # Now safe to come back here again; remove marker from backtrail + del backtrail[(key, section.name)] + + return value + + # Back in interpolate(), all we have to do is kick off the recursive + # function with appropriate starting values + value = recursive_interpolate(key, value, self.section, {}) + return value + + + def _fetch(self, key): + """Helper function to fetch values from owning section. + + Returns a 2-tuple: the value, and the section where it was found. + """ + # switch off interpolation before we try and fetch anything ! + save_interp = self.section.main.interpolation + self.section.main.interpolation = False + + # Start at section that "owns" this InterpolationEngine + current_section = self.section + while True: + # try the current section first + val = current_section.get(key) + if val is not None and not isinstance(val, Section): + break + # try "DEFAULT" next + val = current_section.get('DEFAULT', {}).get(key) + if val is not None and not isinstance(val, Section): + break + # move up to parent and try again + # top-level's parent is itself + if current_section.parent is current_section: + # reached top level, time to give up + break + current_section = current_section.parent + + # restore interpolation to previous value before returning + self.section.main.interpolation = save_interp + if val is None: + raise MissingInterpolationOption(key) + return val, current_section + + + def _parse_match(self, match): + """Implementation-dependent helper function. + + Will be passed a match object corresponding to the interpolation + key we just found (e.g., "%(foo)s" or "$foo"). Should look up that + key in the appropriate config file section (using the ``_fetch()`` + helper function) and return a 3-tuple: (key, value, section) + + ``key`` is the name of the key we're looking for + ``value`` is the value found for that key + ``section`` is a reference to the section where it was found + + ``key`` and ``section`` should be None if no further + interpolation should be performed on the resulting value + (e.g., if we interpolated "$$" and returned "$"). + """ + raise NotImplementedError() + + + +class ConfigParserInterpolation(InterpolationEngine): + """Behaves like ConfigParser.""" + _cookie = '%' + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + + def _parse_match(self, match): + key = match.group(1) + value, section = self._fetch(key) + return key, value, section + + + +class TemplateInterpolation(InterpolationEngine): + """Behaves like string.Template.""" + _cookie = '$' + _delimiter = '$' + _KEYCRE = re.compile(r""" + \$(?: + (?P\$) | # Two $ signs + (?P[_a-z][_a-z0-9]*) | # $name format + {(?P[^}]*)} # ${name} format + ) + """, re.IGNORECASE | re.VERBOSE) + + def _parse_match(self, match): + # Valid name (in or out of braces): fetch value from section + key = match.group('named') or match.group('braced') + if key is not None: + value, section = self._fetch(key) + return key, value, section + # Escaped delimiter (e.g., $$): return single delimiter + if match.group('escaped') is not None: + # Return None for key and section to indicate it's time to stop + return None, self._delimiter, None + # Anything else: ignore completely, just return it unchanged + return None, match.group(), None + + +interpolation_engines = { + 'configparser': ConfigParserInterpolation, + 'template': TemplateInterpolation, +} + + +def __newobj__(cls, *args): + # Hack for pickle + return cls.__new__(cls, *args) + +class Section(dict): + """ + A dictionary-like object that represents a section in a config file. + + It does string interpolation if the 'interpolation' attribute + of the 'main' object is set to True. + + Interpolation is tried first from this object, then from the 'DEFAULT' + section of this object, next from the parent and its 'DEFAULT' section, + and so on until the main object is reached. + + A Section will behave like an ordered dictionary - following the + order of the ``scalars`` and ``sections`` attributes. + You can use this to change the order of members. + + Iteration follows the order: scalars, then sections. + """ + + + def __setstate__(self, state): + dict.update(self, state[0]) + self.__dict__.update(state[1]) + + def __reduce__(self): + state = (dict(self), self.__dict__) + return (__newobj__, (self.__class__,), state) + + + def __init__(self, parent, depth, main, indict=None, name=None): + """ + * parent is the section above + * depth is the depth level of this section + * main is the main ConfigObj + * indict is a dictionary to initialise the section with + """ + if indict is None: + indict = {} + dict.__init__(self) + # used for nesting level *and* interpolation + self.parent = parent + # used for the interpolation attribute + self.main = main + # level of nesting depth of this Section + self.depth = depth + # purely for information + self.name = name + # + self._initialise() + # we do this explicitly so that __setitem__ is used properly + # (rather than just passing to ``dict.__init__``) + for entry, value in indict.iteritems(): + self[entry] = value + + + def _initialise(self): + # the sequence of scalar values in this Section + self.scalars = [] + # the sequence of sections in this Section + self.sections = [] + # for comments :-) + self.comments = {} + self.inline_comments = {} + # the configspec + self.configspec = None + # for defaults + self.defaults = [] + self.default_values = {} + self.extra_values = [] + self._created = False + + + def _interpolate(self, key, value): + try: + # do we already have an interpolation engine? + engine = self._interpolation_engine + except AttributeError: + # not yet: first time running _interpolate(), so pick the engine + name = self.main.interpolation + if name == True: # note that "if name:" would be incorrect here + # backwards-compatibility: interpolation=True means use default + name = DEFAULT_INTERPOLATION + name = name.lower() # so that "Template", "template", etc. all work + class_ = interpolation_engines.get(name, None) + if class_ is None: + # invalid value for self.main.interpolation + self.main.interpolation = False + return value + else: + # save reference to engine so we don't have to do this again + engine = self._interpolation_engine = class_(self) + # let the engine do the actual work + return engine.interpolate(key, value) + + + def __getitem__(self, key): + """Fetch the item and do string interpolation.""" + val = dict.__getitem__(self, key) + if self.main.interpolation: + if isinstance(val, basestring): + return self._interpolate(key, val) + if isinstance(val, list): + def _check(entry): + if isinstance(entry, basestring): + return self._interpolate(key, entry) + return entry + new = [_check(entry) for entry in val] + if new != val: + return new + return val + + + def __setitem__(self, key, value, unrepr=False): + """ + Correctly set a value. + + Making dictionary values Section instances. + (We have to special case 'Section' instances - which are also dicts) + + Keys must be strings. + Values need only be strings (or lists of strings) if + ``main.stringify`` is set. + + ``unrepr`` must be set when setting a value to a dictionary, without + creating a new sub-section. + """ + if not isinstance(key, basestring): + raise ValueError('The key "%s" is not a string.' % key) + + # add the comment + if key not in self.comments: + self.comments[key] = [] + self.inline_comments[key] = '' + # remove the entry from defaults + if key in self.defaults: + self.defaults.remove(key) + # + if isinstance(value, Section): + if key not in self: + self.sections.append(key) + dict.__setitem__(self, key, value) + elif isinstance(value, dict) and not unrepr: + # First create the new depth level, + # then create the section + if key not in self: + self.sections.append(key) + new_depth = self.depth + 1 + dict.__setitem__( + self, + key, + Section( + self, + new_depth, + self.main, + indict=value, + name=key)) + else: + if key not in self: + self.scalars.append(key) + if not self.main.stringify: + if isinstance(value, basestring): + pass + elif isinstance(value, (list, tuple)): + for entry in value: + if not isinstance(entry, basestring): + raise TypeError('Value is not a string "%s".' % entry) + else: + raise TypeError('Value is not a string "%s".' % value) + dict.__setitem__(self, key, value) + + + def __delitem__(self, key): + """Remove items from the sequence when deleting.""" + dict. __delitem__(self, key) + if key in self.scalars: + self.scalars.remove(key) + else: + self.sections.remove(key) + del self.comments[key] + del self.inline_comments[key] + + + def get(self, key, default=None): + """A version of ``get`` that doesn't bypass string interpolation.""" + try: + return self[key] + except KeyError: + return default + + + def update(self, indict): + """ + A version of update that uses our ``__setitem__``. + """ + for entry in indict: + self[entry] = indict[entry] + + + def pop(self, key, default=MISSING): + """ + 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised' + """ + try: + val = self[key] + except KeyError: + if default is MISSING: + raise + val = default + else: + del self[key] + return val + + + def popitem(self): + """Pops the first (key,val)""" + sequence = (self.scalars + self.sections) + if not sequence: + raise KeyError(": 'popitem(): dictionary is empty'") + key = sequence[0] + val = self[key] + del self[key] + return key, val + + + def clear(self): + """ + A version of clear that also affects scalars/sections + Also clears comments and configspec. + + Leaves other attributes alone : + depth/main/parent are not affected + """ + dict.clear(self) + self.scalars = [] + self.sections = [] + self.comments = {} + self.inline_comments = {} + self.configspec = None + self.defaults = [] + self.extra_values = [] + + + def setdefault(self, key, default=None): + """A version of setdefault that sets sequence if appropriate.""" + try: + return self[key] + except KeyError: + self[key] = default + return self[key] + + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples""" + return zip((self.scalars + self.sections), self.values()) + + + def keys(self): + """D.keys() -> list of D's keys""" + return (self.scalars + self.sections) + + + def values(self): + """D.values() -> list of D's values""" + return [self[key] for key in (self.scalars + self.sections)] + + + def iteritems(self): + """D.iteritems() -> an iterator over the (key, value) items of D""" + return iter(self.items()) + + + def iterkeys(self): + """D.iterkeys() -> an iterator over the keys of D""" + return iter((self.scalars + self.sections)) + + __iter__ = iterkeys + + + def itervalues(self): + """D.itervalues() -> an iterator over the values of D""" + return iter(self.values()) + + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)]) + + __str__ = __repr__ + __str__.__doc__ = "x.__str__() <==> str(x)" + + + # Extra methods - not in a normal dictionary + + def dict(self): + """ + Return a deepcopy of self as a dictionary. + + All members that are ``Section`` instances are recursively turned to + ordinary dictionaries - by calling their ``dict`` method. + + >>> n = a.dict() + >>> n == a + 1 + >>> n is a + 0 + """ + newdict = {} + for entry in self: + this_entry = self[entry] + if isinstance(this_entry, Section): + this_entry = this_entry.dict() + elif isinstance(this_entry, list): + # create a copy rather than a reference + this_entry = list(this_entry) + elif isinstance(this_entry, tuple): + # create a copy rather than a reference + this_entry = tuple(this_entry) + newdict[entry] = this_entry + return newdict + + + def merge(self, indict): + """ + A recursive update - useful for merging config files. + + >>> a = '''[section1] + ... option1 = True + ... [[subsection]] + ... more_options = False + ... # end of file'''.splitlines() + >>> b = '''# File is user.ini + ... [section1] + ... option1 = False + ... # end of file'''.splitlines() + >>> c1 = ConfigObj(b) + >>> c2 = ConfigObj(a) + >>> c2.merge(c1) + >>> c2 + ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) + """ + for key, val in indict.items(): + if (key in self and isinstance(self[key], dict) and + isinstance(val, dict)): + self[key].merge(val) + else: + self[key] = val + + + def rename(self, oldkey, newkey): + """ + Change a keyname to another, without changing position in sequence. + + Implemented so that transformations can be made on keys, + as well as on values. (used by encode and decode) + + Also renames comments. + """ + if oldkey in self.scalars: + the_list = self.scalars + elif oldkey in self.sections: + the_list = self.sections + else: + raise KeyError('Key "%s" not found.' % oldkey) + pos = the_list.index(oldkey) + # + val = self[oldkey] + dict.__delitem__(self, oldkey) + dict.__setitem__(self, newkey, val) + the_list.remove(oldkey) + the_list.insert(pos, newkey) + comm = self.comments[oldkey] + inline_comment = self.inline_comments[oldkey] + del self.comments[oldkey] + del self.inline_comments[oldkey] + self.comments[newkey] = comm + self.inline_comments[newkey] = inline_comment + + + def walk(self, function, raise_errors=True, + call_on_sections=False, **keywargs): + """ + Walk every member and call a function on the keyword and value. + + Return a dictionary of the return values + + If the function raises an exception, raise the errror + unless ``raise_errors=False``, in which case set the return value to + ``False``. + + Any unrecognised keyword arguments you pass to walk, will be pased on + to the function you pass in. + + Note: if ``call_on_sections`` is ``True`` then - on encountering a + subsection, *first* the function is called for the *whole* subsection, + and then recurses into it's members. This means your function must be + able to handle strings, dictionaries and lists. This allows you + to change the key of subsections as well as for ordinary members. The + return value when called on the whole subsection has to be discarded. + + See the encode and decode methods for examples, including functions. + + .. admonition:: caution + + You can use ``walk`` to transform the names of members of a section + but you mustn't add or delete members. + + >>> config = '''[XXXXsection] + ... XXXXkey = XXXXvalue'''.splitlines() + >>> cfg = ConfigObj(config) + >>> cfg + ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) + >>> def transform(section, key): + ... val = section[key] + ... newkey = key.replace('XXXX', 'CLIENT1') + ... section.rename(key, newkey) + ... if isinstance(val, (tuple, list, dict)): + ... pass + ... else: + ... val = val.replace('XXXX', 'CLIENT1') + ... section[newkey] = val + >>> cfg.walk(transform, call_on_sections=True) + {'CLIENT1section': {'CLIENT1key': None}} + >>> cfg + ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) + """ + out = {} + # scalars first + for i in range(len(self.scalars)): + entry = self.scalars[i] + try: + val = function(self, entry, **keywargs) + # bound again in case name has changed + entry = self.scalars[i] + out[entry] = val + except Exception: + if raise_errors: + raise + else: + entry = self.scalars[i] + out[entry] = False + # then sections + for i in range(len(self.sections)): + entry = self.sections[i] + if call_on_sections: + try: + function(self, entry, **keywargs) + except Exception: + if raise_errors: + raise + else: + entry = self.sections[i] + out[entry] = False + # bound again in case name has changed + entry = self.sections[i] + # previous result is discarded + out[entry] = self[entry].walk( + function, + raise_errors=raise_errors, + call_on_sections=call_on_sections, + **keywargs) + return out + + + def as_bool(self, key): + """ + Accepts a key as input. The corresponding value must be a string or + the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to + retain compatibility with Python 2.2. + + If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns + ``True``. + + If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns + ``False``. + + ``as_bool`` is not case sensitive. + + Any other input will raise a ``ValueError``. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_bool('a') + Traceback (most recent call last): + ValueError: Value "fish" is neither True nor False + >>> a['b'] = 'True' + >>> a.as_bool('b') + 1 + >>> a['b'] = 'off' + >>> a.as_bool('b') + 0 + """ + val = self[key] + if val == True: + return True + elif val == False: + return False + else: + try: + if not isinstance(val, basestring): + # TODO: Why do we raise a KeyError here? + raise KeyError() + else: + return self.main._bools[val.lower()] + except KeyError: + raise ValueError('Value "%s" is neither True nor False' % val) + + + def as_int(self, key): + """ + A convenience method which coerces the specified value to an integer. + + If the value is an invalid literal for ``int``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_int('a') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: 'fish' + >>> a['b'] = '1' + >>> a.as_int('b') + 1 + >>> a['b'] = '3.2' + >>> a.as_int('b') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: '3.2' + """ + return int(self[key]) + + + def as_float(self, key): + """ + A convenience method which coerces the specified value to a float. + + If the value is an invalid literal for ``float``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_float('a') + Traceback (most recent call last): + ValueError: invalid literal for float(): fish + >>> a['b'] = '1' + >>> a.as_float('b') + 1.0 + >>> a['b'] = '3.2' + >>> a.as_float('b') + 3.2000000000000002 + """ + return float(self[key]) + + + def as_list(self, key): + """ + A convenience method which fetches the specified value, guaranteeing + that it is a list. + + >>> a = ConfigObj() + >>> a['a'] = 1 + >>> a.as_list('a') + [1] + >>> a['a'] = (1,) + >>> a.as_list('a') + [1] + >>> a['a'] = [1] + >>> a.as_list('a') + [1] + """ + result = self[key] + if isinstance(result, (tuple, list)): + return list(result) + return [result] + + + def restore_default(self, key): + """ + Restore (and return) default value for the specified key. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + If there is no default value for this key, ``KeyError`` is raised. + """ + default = self.default_values[key] + dict.__setitem__(self, key, default) + if key not in self.defaults: + self.defaults.append(key) + return default + + + def restore_defaults(self): + """ + Recursively restore default values to all members + that have them. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + It doesn't delete or modify entries without default values. + """ + for key in self.default_values: + self.restore_default(key) + + for section in self.sections: + self[section].restore_defaults() + + +class ConfigObj(Section): + """An object to read, create, and write config files.""" + + _keyword = re.compile(r'''^ # line start + (\s*) # indentation + ( # keyword + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'"=].*?) # no quotes + ) + \s*=\s* # divider + (.*) # value (including list values and comments) + $ # line end + ''', + re.VERBOSE) + + _sectionmarker = re.compile(r'''^ + (\s*) # 1: indentation + ((?:\[\s*)+) # 2: section marker open + ( # 3: section name open + (?:"\s*\S.*?\s*")| # at least one non-space with double quotes + (?:'\s*\S.*?\s*')| # at least one non-space with single quotes + (?:[^'"\s].*?) # at least one non-space unquoted + ) # section name close + ((?:\s*\])+) # 4: section marker close + \s*(\#.*)? # 5: optional comment + $''', + re.VERBOSE) + + # this regexp pulls list values out as a single string + # or single values and comments + # FIXME: this regex adds a '' to the end of comma terminated lists + # workaround in ``_handle_value`` + _valueexp = re.compile(r'''^ + (?: + (?: + ( + (?: + (?: + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#][^,\#]*?) # unquoted + ) + \s*,\s* # comma + )* # match all list items ending in a comma (if any) + ) + ( + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#\s][^,]*?)| # unquoted + (?:(? 1: + msg = "Parsing failed with several errors.\nFirst error %s" % info + error = ConfigObjError(msg) + else: + error = self._errors[0] + # set the errors attribute; it's a list of tuples: + # (error_type, message, line_number) + error.errors = self._errors + # set the config attribute + error.config = self + raise error + # delete private attributes + del self._errors + + if configspec is None: + self.configspec = None + else: + self._handle_configspec(configspec) + + + def _initialise(self, options=None): + if options is None: + options = OPTION_DEFAULTS + + # initialise a few variables + self.filename = None + self._errors = [] + self.raise_errors = options['raise_errors'] + self.interpolation = options['interpolation'] + self.list_values = options['list_values'] + self.create_empty = options['create_empty'] + self.file_error = options['file_error'] + self.stringify = options['stringify'] + self.indent_type = options['indent_type'] + self.encoding = options['encoding'] + self.default_encoding = options['default_encoding'] + self.BOM = False + self.newlines = None + self.write_empty_values = options['write_empty_values'] + self.unrepr = options['unrepr'] + + self.initial_comment = [] + self.final_comment = [] + self.configspec = None + + if self._inspec: + self.list_values = False + + # Clear section attributes as well + Section._initialise(self) + + + def __repr__(self): + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return ('ConfigObj({%s})' % + ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)])) + + + def _handle_bom(self, infile): + """ + Handle any BOM, and decode if necessary. + + If an encoding is specified, that *must* be used - but the BOM should + still be removed (and the BOM attribute set). + + (If the encoding is wrongly specified, then a BOM for an alternative + encoding won't be discovered or removed.) + + If an encoding is not specified, UTF8 or UTF16 BOM will be detected and + removed. The BOM attribute will be set. UTF16 will be decoded to + unicode. + + NOTE: This method must not be called with an empty ``infile``. + + Specifying the *wrong* encoding is likely to cause a + ``UnicodeDecodeError``. + + ``infile`` must always be returned as a list of lines, but may be + passed in as a single string. + """ + if ((self.encoding is not None) and + (self.encoding.lower() not in BOM_LIST)): + # No need to check for a BOM + # the encoding specified doesn't have one + # just decode + return self._decode(infile, self.encoding) + + if isinstance(infile, (list, tuple)): + line = infile[0] + else: + line = infile + if self.encoding is not None: + # encoding explicitly supplied + # And it could have an associated BOM + # TODO: if encoding is just UTF16 - we ought to check for both + # TODO: big endian and little endian versions. + enc = BOM_LIST[self.encoding.lower()] + if enc == 'utf_16': + # For UTF16 we try big endian and little endian + for BOM, (encoding, final_encoding) in BOMS.items(): + if not final_encoding: + # skip UTF8 + continue + if infile.startswith(BOM): + ### BOM discovered + ##self.BOM = True + # Don't need to remove BOM + return self._decode(infile, encoding) + + # If we get this far, will *probably* raise a DecodeError + # As it doesn't appear to start with a BOM + return self._decode(infile, self.encoding) + + # Must be UTF8 + BOM = BOM_SET[enc] + if not line.startswith(BOM): + return self._decode(infile, self.encoding) + + newline = line[len(BOM):] + + # BOM removed + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + self.BOM = True + return self._decode(infile, self.encoding) + + # No encoding specified - so we need to check for UTF8/UTF16 + for BOM, (encoding, final_encoding) in BOMS.items(): + if not line.startswith(BOM): + continue + else: + # BOM discovered + self.encoding = final_encoding + if not final_encoding: + self.BOM = True + # UTF8 + # remove BOM + newline = line[len(BOM):] + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + # UTF8 - don't decode + if isinstance(infile, basestring): + return infile.splitlines(True) + else: + return infile + # UTF16 - have to decode + return self._decode(infile, encoding) + + # No BOM discovered and no encoding specified, just return + if isinstance(infile, basestring): + # infile read from a file will be a single string + return infile.splitlines(True) + return infile + + + def _a_to_u(self, aString): + """Decode ASCII strings to unicode if a self.encoding is specified.""" + if self.encoding: + return aString.decode('ascii') + else: + return aString + + + def _decode(self, infile, encoding): + """ + Decode infile to unicode. Using the specified encoding. + + if is a string, it also needs converting to a list. + """ + if isinstance(infile, basestring): + # can't be unicode + # NOTE: Could raise a ``UnicodeDecodeError`` + return infile.decode(encoding).splitlines(True) + for i, line in enumerate(infile): + if not isinstance(line, unicode): + # NOTE: The isinstance test here handles mixed lists of unicode/string + # NOTE: But the decode will break on any non-string values + # NOTE: Or could raise a ``UnicodeDecodeError`` + infile[i] = line.decode(encoding) + return infile + + + def _decode_element(self, line): + """Decode element to unicode if necessary.""" + if not self.encoding: + return line + if isinstance(line, str) and self.default_encoding: + return line.decode(self.default_encoding) + return line + + + def _str(self, value): + """ + Used by ``stringify`` within validate, to turn non-string values + into strings. + """ + if not isinstance(value, basestring): + return str(value) + else: + return value + + + def _parse(self, infile): + """Actually parse the config file.""" + temp_list_values = self.list_values + if self.unrepr: + self.list_values = False + + comment_list = [] + done_start = False + this_section = self + maxline = len(infile) - 1 + cur_index = -1 + reset_comment = False + + while cur_index < maxline: + if reset_comment: + comment_list = [] + cur_index += 1 + line = infile[cur_index] + sline = line.strip() + # do we have anything on the line ? + if not sline or sline.startswith('#'): + reset_comment = False + comment_list.append(line) + continue + + if not done_start: + # preserve initial comment + self.initial_comment = comment_list + comment_list = [] + done_start = True + + reset_comment = True + # first we check if it's a section marker + mat = self._sectionmarker.match(line) + if mat is not None: + # is a section line + (indent, sect_open, sect_name, sect_close, comment) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + cur_depth = sect_open.count('[') + if cur_depth != sect_close.count(']'): + self._handle_error("Cannot compute the section depth at line %s.", + NestingError, infile, cur_index) + continue + + if cur_depth < this_section.depth: + # the new section is dropping back to a previous level + try: + parent = self._match_depth(this_section, + cur_depth).parent + except SyntaxError: + self._handle_error("Cannot compute nesting level at line %s.", + NestingError, infile, cur_index) + continue + elif cur_depth == this_section.depth: + # the new section is a sibling of the current section + parent = this_section.parent + elif cur_depth == this_section.depth + 1: + # the new section is a child the current section + parent = this_section + else: + self._handle_error("Section too nested at line %s.", + NestingError, infile, cur_index) + + sect_name = self._unquote(sect_name) + if sect_name in parent: + self._handle_error('Duplicate section name at line %s.', + DuplicateError, infile, cur_index) + continue + + # create the new section + this_section = Section( + parent, + cur_depth, + self, + name=sect_name) + parent[sect_name] = this_section + parent.inline_comments[sect_name] = comment + parent.comments[sect_name] = comment_list + continue + # + # it's not a section marker, + # so it should be a valid ``key = value`` line + mat = self._keyword.match(line) + if mat is None: + # it neither matched as a keyword + # or a section marker + self._handle_error( + 'Invalid line at line "%s".', + ParseError, infile, cur_index) + else: + # is a keyword value + # value will include any inline comment + (indent, key, value) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + # check for a multiline value + if value[:3] in ['"""', "'''"]: + try: + value, comment, cur_index = self._multiline( + value, infile, cur_index, maxline) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if type(e) == UnknownType: + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if isinstance(e, UnknownType): + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + # extract comment and lists + try: + (value, comment) = self._handle_value(value) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + # + key = self._unquote(key) + if key in this_section: + self._handle_error( + 'Duplicate keyword name at line %s.', + DuplicateError, infile, cur_index) + continue + # add the key. + # we set unrepr because if we have got this far we will never + # be creating a new section + this_section.__setitem__(key, value, unrepr=True) + this_section.inline_comments[key] = comment + this_section.comments[key] = comment_list + continue + # + if self.indent_type is None: + # no indentation used, set the type accordingly + self.indent_type = '' + + # preserve the final comment + if not self and not self.initial_comment: + self.initial_comment = comment_list + elif not reset_comment: + self.final_comment = comment_list + self.list_values = temp_list_values + + + def _match_depth(self, sect, depth): + """ + Given a section and a depth level, walk back through the sections + parents to see if the depth level matches a previous section. + + Return a reference to the right section, + or raise a SyntaxError. + """ + while depth < sect.depth: + if sect is sect.parent: + # we've reached the top level already + raise SyntaxError() + sect = sect.parent + if sect.depth == depth: + return sect + # shouldn't get here + raise SyntaxError() + + + def _handle_error(self, text, ErrorClass, infile, cur_index): + """ + Handle an error according to the error settings. + + Either raise the error or store it. + The error will have occured at ``cur_index`` + """ + line = infile[cur_index] + cur_index += 1 + message = text % cur_index + error = ErrorClass(message, cur_index, line) + if self.raise_errors: + # raise the error - parsing stops here + raise error + # store the error + # reraise when parsing has finished + self._errors.append(error) + + + def _unquote(self, value): + """Return an unquoted version of a value""" + if not value: + # should only happen during parsing of lists + raise SyntaxError + if (value[0] == value[-1]) and (value[0] in ('"', "'")): + value = value[1:-1] + return value + + + def _quote(self, value, multiline=True): + """ + Return a safely quoted version of a value. + + Raise a ConfigObjError if the value cannot be safely quoted. + If multiline is ``True`` (default) then use triple quotes + if necessary. + + * Don't quote values that don't need it. + * Recursively quote members of a list and return a comma joined list. + * Multiline is ``False`` for lists. + * Obey list syntax for empty and single member lists. + + If ``list_values=False`` then the value is only quoted if it contains + a ``\\n`` (is multiline) or '#'. + + If ``write_empty_values`` is set, and the value is an empty string, it + won't be quoted. + """ + if multiline and self.write_empty_values and value == '': + # Only if multiline is set, so that it is used for values not + # keys, and not values that are part of a list + return '' + + if multiline and isinstance(value, (list, tuple)): + if not value: + return ',' + elif len(value) == 1: + return self._quote(value[0], multiline=False) + ',' + return ', '.join([self._quote(val, multiline=False) + for val in value]) + if not isinstance(value, basestring): + if self.stringify: + value = str(value) + else: + raise TypeError('Value "%s" is not a string.' % value) + + if not value: + return '""' + + no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value + need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) + hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) + check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote + + if check_for_single: + if not self.list_values: + # we don't quote if ``list_values=False`` + quot = noquot + # for normal values either single or double quotes will do + elif '\n' in value: + # will only happen if multiline is off - e.g. '\n' in key + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif ((value[0] not in wspace_plus) and + (value[-1] not in wspace_plus) and + (',' not in value)): + quot = noquot + else: + quot = self._get_single_quote(value) + else: + # if value has '\n' or "'" *and* '"', it will need triple quotes + quot = self._get_triple_quote(value) + + if quot == noquot and '#' in value and self.list_values: + quot = self._get_single_quote(value) + + return quot % value + + + def _get_single_quote(self, value): + if ("'" in value) and ('"' in value): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif '"' in value: + quot = squot + else: + quot = dquot + return quot + + + def _get_triple_quote(self, value): + if (value.find('"""') != -1) and (value.find("'''") != -1): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + if value.find('"""') == -1: + quot = tdquot + else: + quot = tsquot + return quot + + + def _handle_value(self, value): + """ + Given a value string, unquote, remove comment, + handle lists. (including empty and single member lists) + """ + if self._inspec: + # Parsing a configspec so don't handle comments + return (value, '') + # do we look for lists in values ? + if not self.list_values: + mat = self._nolistvalue.match(value) + if mat is None: + raise SyntaxError() + # NOTE: we don't unquote here + return mat.groups() + # + mat = self._valueexp.match(value) + if mat is None: + # the value is badly constructed, probably badly quoted, + # or an invalid list + raise SyntaxError() + (list_values, single, empty_list, comment) = mat.groups() + if (list_values == '') and (single is None): + # change this if you want to accept empty values + raise SyntaxError() + # NOTE: note there is no error handling from here if the regex + # is wrong: then incorrect values will slip through + if empty_list is not None: + # the single comma - meaning an empty list + return ([], comment) + if single is not None: + # handle empty values + if list_values and not single: + # FIXME: the '' is a workaround because our regex now matches + # '' at the end of a list if it has a trailing comma + single = None + else: + single = single or '""' + single = self._unquote(single) + if list_values == '': + # not a list value + return (single, comment) + the_list = self._listvalueexp.findall(list_values) + the_list = [self._unquote(val) for val in the_list] + if single is not None: + the_list += [single] + return (the_list, comment) + + + def _multiline(self, value, infile, cur_index, maxline): + """Extract the value, where we are in a multiline situation.""" + quot = value[:3] + newvalue = value[3:] + single_line = self._triple_quote[quot][0] + multi_line = self._triple_quote[quot][1] + mat = single_line.match(value) + if mat is not None: + retval = list(mat.groups()) + retval.append(cur_index) + return retval + elif newvalue.find(quot) != -1: + # somehow the triple quote is missing + raise SyntaxError() + # + while cur_index < maxline: + cur_index += 1 + newvalue += '\n' + line = infile[cur_index] + if line.find(quot) == -1: + newvalue += line + else: + # end of multiline, process it + break + else: + # we've got to the end of the config, oops... + raise SyntaxError() + mat = multi_line.match(line) + if mat is None: + # a badly formed line + raise SyntaxError() + (value, comment) = mat.groups() + return (newvalue + value, comment, cur_index) + + + def _handle_configspec(self, configspec): + """Parse the configspec.""" + # FIXME: Should we check that the configspec was created with the + # correct settings ? (i.e. ``list_values=False``) + if not isinstance(configspec, ConfigObj): + try: + configspec = ConfigObj(configspec, + raise_errors=True, + file_error=True, + _inspec=True) + except ConfigObjError, e: + # FIXME: Should these errors have a reference + # to the already parsed ConfigObj ? + raise ConfigspecError('Parsing configspec failed: %s' % e) + except IOError, e: + raise IOError('Reading configspec failed: %s' % e) + + self.configspec = configspec + + + + def _set_configspec(self, section, copy): + """ + Called by validate. Handles setting the configspec on subsections + including sections to be validated by __many__ + """ + configspec = section.configspec + many = configspec.get('__many__') + if isinstance(many, dict): + for entry in section.sections: + if entry not in configspec: + section[entry].configspec = many + + for entry in configspec.sections: + if entry == '__many__': + continue + if entry not in section: + section[entry] = {} + section[entry]._created = True + if copy: + # copy comments + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + + # Could be a scalar when we expect a section + if isinstance(section[entry], Section): + section[entry].configspec = configspec[entry] + + + def _write_line(self, indent_string, entry, this_entry, comment): + """Write an individual line, for the write method""" + # NOTE: the calls to self._quote here handles non-StringType values. + if not self.unrepr: + val = self._decode_element(self._quote(this_entry)) + else: + val = repr(this_entry) + return '%s%s%s%s%s' % (indent_string, + self._decode_element(self._quote(entry, multiline=False)), + self._a_to_u(' = '), + val, + self._decode_element(comment)) + + + def _write_marker(self, indent_string, depth, entry, comment): + """Write a section marker line""" + return '%s%s%s%s%s' % (indent_string, + self._a_to_u('[' * depth), + self._quote(self._decode_element(entry), multiline=False), + self._a_to_u(']' * depth), + self._decode_element(comment)) + + + def _handle_comment(self, comment): + """Deal with a comment.""" + if not comment: + return '' + start = self.indent_type + if not comment.startswith('#'): + start += self._a_to_u(' # ') + return (start + comment) + + + # Public methods + + def write(self, outfile=None, section=None): + """ + Write the current ConfigObj as a file + + tekNico: FIXME: use StringIO instead of real files + + >>> filename = a.filename + >>> a.filename = 'test.ini' + >>> a.write() + >>> a.filename = filename + >>> a == ConfigObj('test.ini', raise_errors=True) + 1 + >>> import os + >>> os.remove('test.ini') + """ + if self.indent_type is None: + # this can be true if initialised from a dictionary + self.indent_type = DEFAULT_INDENT_TYPE + + out = [] + cs = self._a_to_u('#') + csp = self._a_to_u('# ') + if section is None: + int_val = self.interpolation + self.interpolation = False + section = self + for line in self.initial_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + + indent_string = self.indent_type * section.depth + for entry in (section.scalars + section.sections): + if entry in section.defaults: + # don't write out default values + continue + for comment_line in section.comments[entry]: + comment_line = self._decode_element(comment_line.lstrip()) + if comment_line and not comment_line.startswith(cs): + comment_line = csp + comment_line + out.append(indent_string + comment_line) + this_entry = section[entry] + comment = self._handle_comment(section.inline_comments[entry]) + + if isinstance(this_entry, dict): + # a section + out.append(self._write_marker( + indent_string, + this_entry.depth, + entry, + comment)) + out.extend(self.write(section=this_entry)) + else: + out.append(self._write_line( + indent_string, + entry, + this_entry, + comment)) + + if section is self: + for line in self.final_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + self.interpolation = int_val + + if section is not self: + return out + + if (self.filename is None) and (outfile is None): + # output a list of lines + # might need to encode + # NOTE: This will *screw* UTF16, each line will start with the BOM + if self.encoding: + out = [l.encode(self.encoding) for l in out] + if (self.BOM and ((self.encoding is None) or + (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): + # Add the UTF8 BOM + if not out: + out.append('') + out[0] = BOM_UTF8 + out[0] + return out + + # Turn the list to a string, joined with correct newlines + newline = self.newlines or os.linesep + if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' + and sys.platform == 'win32' and newline == '\r\n'): + # Windows specific hack to avoid writing '\r\r\n' + newline = '\n' + output = self._a_to_u(newline).join(out) + if self.encoding: + output = output.encode(self.encoding) + if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): + # Add the UTF8 BOM + output = BOM_UTF8 + output + + if not output.endswith(newline): + output += newline + if outfile is not None: + outfile.write(output) + else: + h = open(self.filename, 'wb') + h.write(output) + h.close() + + + def validate(self, validator, preserve_errors=False, copy=False, + section=None): + """ + Test the ConfigObj against a configspec. + + It uses the ``validator`` object from *validate.py*. + + To run ``validate`` on the current ConfigObj, call: :: + + test = config.validate(validator) + + (Normally having previously passed in the configspec when the ConfigObj + was created - you can dynamically assign a dictionary of checks to the + ``configspec`` attribute of a section though). + + It returns ``True`` if everything passes, or a dictionary of + pass/fails (True/False). If every member of a subsection passes, it + will just have the value ``True``. (It also returns ``False`` if all + members fail). + + In addition, it converts the values from strings to their native + types if their checks pass (and ``stringify`` is set). + + If ``preserve_errors`` is ``True`` (``False`` is default) then instead + of a marking a fail with a ``False``, it will preserve the actual + exception object. This can contain info about the reason for failure. + For example the ``VdtValueTooSmallError`` indicates that the value + supplied was too small. If a value (or section) is missing it will + still be marked as ``False``. + + You must have the validate module to use ``preserve_errors=True``. + + You can then use the ``flatten_errors`` function to turn your nested + results dictionary into a flattened list of failures - useful for + displaying meaningful error messages. + """ + if section is None: + if self.configspec is None: + raise ValueError('No configspec supplied.') + if preserve_errors: + # We do this once to remove a top level dependency on the validate module + # Which makes importing configobj faster + from validate import VdtMissingValue + self._vdtMissingValue = VdtMissingValue + + section = self + + if copy: + section.initial_comment = section.configspec.initial_comment + section.final_comment = section.configspec.final_comment + section.encoding = section.configspec.encoding + section.BOM = section.configspec.BOM + section.newlines = section.configspec.newlines + section.indent_type = section.configspec.indent_type + + # + # section.default_values.clear() #?? + configspec = section.configspec + self._set_configspec(section, copy) + + + def validate_entry(entry, spec, val, missing, ret_true, ret_false): + section.default_values.pop(entry, None) + + try: + section.default_values[entry] = validator.get_default_value(configspec[entry]) + except (KeyError, AttributeError, validator.baseErrorClass): + # No default, bad default or validator has no 'get_default_value' + # (e.g. SimpleVal) + pass + + try: + check = validator.check(spec, + val, + missing=missing + ) + except validator.baseErrorClass, e: + if not preserve_errors or isinstance(e, self._vdtMissingValue): + out[entry] = False + else: + # preserve the error + out[entry] = e + ret_false = False + ret_true = False + else: + ret_false = False + out[entry] = True + if self.stringify or missing: + # if we are doing type conversion + # or the value is a supplied default + if not self.stringify: + if isinstance(check, (list, tuple)): + # preserve lists + check = [self._str(item) for item in check] + elif missing and check is None: + # convert the None from a default to a '' + check = '' + else: + check = self._str(check) + if (check != val) or missing: + section[entry] = check + if not copy and missing and entry not in section.defaults: + section.defaults.append(entry) + return ret_true, ret_false + + # + out = {} + ret_true = True + ret_false = True + + unvalidated = [k for k in section.scalars if k not in configspec] + incorrect_sections = [k for k in configspec.sections if k in section.scalars] + incorrect_scalars = [k for k in configspec.scalars if k in section.sections] + + for entry in configspec.scalars: + if entry in ('__many__', '___many___'): + # reserved names + continue + if (not entry in section.scalars) or (entry in section.defaults): + # missing entries + # or entries from defaults + missing = True + val = None + if copy and entry not in section.scalars: + # copy comments + section.comments[entry] = ( + configspec.comments.get(entry, [])) + section.inline_comments[entry] = ( + configspec.inline_comments.get(entry, '')) + # + else: + missing = False + val = section[entry] + + ret_true, ret_false = validate_entry(entry, configspec[entry], val, + missing, ret_true, ret_false) + + many = None + if '__many__' in configspec.scalars: + many = configspec['__many__'] + elif '___many___' in configspec.scalars: + many = configspec['___many___'] + + if many is not None: + for entry in unvalidated: + val = section[entry] + ret_true, ret_false = validate_entry(entry, many, val, False, + ret_true, ret_false) + unvalidated = [] + + for entry in incorrect_scalars: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Value %r was provided as a section' % entry + out[entry] = validator.baseErrorClass(msg) + for entry in incorrect_sections: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Section %r was provided as a single value' % entry + out[entry] = validator.baseErrorClass(msg) + + # Missing sections will have been created as empty ones when the + # configspec was read. + for entry in section.sections: + # FIXME: this means DEFAULT is not copied in copy mode + if section is self and entry == 'DEFAULT': + continue + if section[entry].configspec is None: + unvalidated.append(entry) + continue + if copy: + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) + out[entry] = check + if check == False: + ret_true = False + elif check == True: + ret_false = False + else: + ret_true = False + + section.extra_values = unvalidated + if preserve_errors and not section._created: + # If the section wasn't created (i.e. it wasn't missing) + # then we can't return False, we need to preserve errors + ret_false = False + # + if ret_false and preserve_errors and out: + # If we are preserving errors, but all + # the failures are from missing sections / values + # then we can return False. Otherwise there is a + # real failure that we need to preserve. + ret_false = not any(out.values()) + if ret_true: + return True + elif ret_false: + return False + return out + + + def reset(self): + """Clear ConfigObj instance and restore to 'freshly created' state.""" + self.clear() + self._initialise() + # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) + # requires an empty dictionary + self.configspec = None + # Just to be sure ;-) + self._original_configspec = None + + + def reload(self): + """ + Reload a ConfigObj from file. + + This method raises a ``ReloadError`` if the ConfigObj doesn't have + a filename attribute pointing to a file. + """ + if not isinstance(self.filename, basestring): + raise ReloadError() + + filename = self.filename + current_options = {} + for entry in OPTION_DEFAULTS: + if entry == 'configspec': + continue + current_options[entry] = getattr(self, entry) + + configspec = self._original_configspec + current_options['configspec'] = configspec + + self.clear() + self._initialise(current_options) + self._load(filename, configspec) + + + +class SimpleVal(object): + """ + A simple validator. + Can be used to check that all members expected are present. + + To use it, provide a configspec with all your members in (the value given + will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` + method of your ``ConfigObj``. ``validate`` will return ``True`` if all + members are present, or a dictionary with True/False meaning + present/missing. (Whole missing sections will be replaced with ``False``) + """ + + def __init__(self): + self.baseErrorClass = ConfigObjError + + def check(self, check, member, missing=False): + """A dummy check method, always returns the value unchanged.""" + if missing: + raise self.baseErrorClass() + return member + + +def flatten_errors(cfg, res, levels=None, results=None): + """ + An example function that will turn a nested dictionary of results + (as returned by ``ConfigObj.validate``) into a flat list. + + ``cfg`` is the ConfigObj instance being checked, ``res`` is the results + dictionary returned by ``validate``. + + (This is a recursive function, so you shouldn't use the ``levels`` or + ``results`` arguments - they are used by the function.) + + Returns a list of keys that failed. Each member of the list is a tuple:: + + ([list of sections...], key, result) + + If ``validate`` was called with ``preserve_errors=False`` (the default) + then ``result`` will always be ``False``. + + *list of sections* is a flattened list of sections that the key was found + in. + + If the section was missing (or a section was expected and a scalar provided + - or vice-versa) then key will be ``None``. + + If the value (or section) was missing then ``result`` will be ``False``. + + If ``validate`` was called with ``preserve_errors=True`` and a value + was present, but failed the check, then ``result`` will be the exception + object returned. You can use this as a string that describes the failure. + + For example *The value "3" is of the wrong type*. + """ + if levels is None: + # first time called + levels = [] + results = [] + if res == True: + return results + if res == False or isinstance(res, Exception): + results.append((levels[:], None, res)) + if levels: + levels.pop() + return results + for (key, val) in res.items(): + if val == True: + continue + if isinstance(cfg.get(key), dict): + # Go down one level + levels.append(key) + flatten_errors(cfg[key], val, levels, results) + continue + results.append((levels[:], key, val)) + # + # Go up one level + if levels: + levels.pop() + # + return results + + +def get_extra_values(conf, _prepend=()): + """ + Find all the values and sections not in the configspec from a validated + ConfigObj. + + ``get_extra_values`` returns a list of tuples where each tuple represents + either an extra section, or an extra value. + + The tuples contain two values, a tuple representing the section the value + is in and the name of the extra values. For extra values in the top level + section the first member will be an empty tuple. For values in the 'foo' + section the first member will be ``('foo',)``. For members in the 'bar' + subsection of the 'foo' section the first member will be ``('foo', 'bar')``. + + NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't + been validated it will return an empty list. + """ + out = [] + + out.extend([(_prepend, name) for name in conf.extra_values]) + for name in conf.sections: + if name not in conf.extra_values: + out.extend(get_extra_values(conf[name], _prepend + (name,))) + return out + + +"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py new file mode 100644 index 00000000..fd4a4148 --- /dev/null +++ b/python/pkg/cdec/sa/__init__.py @@ -0,0 +1,4 @@ +from cdec.sa._sa import sym_fromstring,\ + SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\ + HieroCachingRuleFactory, Sampler +from cdec.sa.extractor import GrammarExtractor diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py new file mode 100644 index 00000000..30e605a6 --- /dev/null +++ b/python/pkg/cdec/sa/compile.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import argparse +import os +import logging +import cdec.configobj +import cdec.sa + +MAX_PHRASE_LENGTH = 4 +def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): + lcp = cdec.sa.LCP(f_sa) + stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True) + precomp = cdec.sa.Precomputation(from_stats=stats, + fsarray=f_sa, + precompute_rank=rank1, + precompute_secondary_rank=rank2, + max_length=max_len, + max_nonterminals=max_nt, + train_max_initial_size=max_size, + train_min_gap_size=min_gap) + return precomp + +def main(): + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger('cdec.sa.compile') + parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.') + parser.add_argument('--maxnt', '-n', type=int, default=2, + help='Maximum number of non-terminal symbols') + parser.add_argument('--maxlen', '-l', type=int, default=5, + help='Maximum number of terminals') + parser.add_argument('--maxsize', '-s', type=int, default=15, + help='Maximum rule span') + parser.add_argument('--mingap', '-g', type=int, default=1, + help='Minimum gap size') + parser.add_argument('--rank1', '-r1', type=int, default=100, + help='Number of pre-computed frequent patterns') + parser.add_argument('--rank2', '-r2', type=int, default=10, + help='Number of pre-computed super-frequent patterns)') + parser.add_argument('-c', '--config', default='/dev/stdout', + help='Output configuration') + parser.add_argument('-o', '--output', required=True, + help='Output path') + parser.add_argument('-f', '--source', required=True, + help='Source language corpus') + parser.add_argument('-e', '--target', required=True, + help='Target language corpus') + parser.add_argument('-a', '--alignment', required=True, + help='Bitext word alignment') + args = parser.parse_args() + + param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") + params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) + + if not os.path.exists(args.output): + os.mkdir(args.output) + + f_sa_bin = os.path.join(args.output, 'f.sa.bin') + e_bin = os.path.join(args.output, 'e.bin') + precomp_file = 'precomp.{0}.{1}.{2}.{3}.{4}.{5}.bin'.format(*params) + precomp_bin = os.path.join(args.output, precomp_file) + a_bin = os.path.join(args.output, 'a.bin') + lex_bin = os.path.join(args.output, 'lex.bin') + + logger.info('Compiling source suffix array') + f_sa = cdec.sa.SuffixArray(from_text=args.source) + f_sa.write_binary(f_sa_bin) + + logger.info('Compiling target data array') + e = cdec.sa.DataArray(from_text=args.target) + e.write_binary(e_bin) + + logger.info('Precomputing frequent phrases') + precompute(f_sa, *params).write_binary(precomp_bin) + + logger.info('Compiling alignment') + a = cdec.sa.Alignment(from_text=args.alignment) + a.write_binary(a_bin) + + logger.info('Compiling bilexical dictionary') + lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa) + lex.write_binary(lex_bin) + + # Write configuration + config = cdec.configobj.ConfigObj(args.config, unrepr=True) + config['f_sa_file'] = f_sa_bin + config['e_file'] = e_bin + config['a_file'] = a_bin + config['lex_file'] = lex_bin + config['precompute_file'] = precomp_bin + for name, value in zip(param_names, params): + config[name] = value + config.write() + +if __name__ == '__main__': + main() diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py new file mode 100644 index 00000000..918aa3bb --- /dev/null +++ b/python/pkg/cdec/sa/extract.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +import sys +import os +import argparse +import logging +import cdec.sa + +def main(): + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.') + parser.add_argument('-c', '--config', required=True, + help='Extractor configuration') + parser.add_argument('-g', '--grammars', required=True, + help='Grammar output path') + args = parser.parse_args() + + if not os.path.exists(args.grammars): + os.mkdir(args.grammars) + + extractor = cdec.sa.GrammarExtractor(args.config) + for i, sentence in enumerate(sys.stdin): + sentence = sentence[:-1] + grammar_file = os.path.join(args.grammars, 'grammar.{0}'.format(i)) + with open(grammar_file, 'w') as output: + for rule in extractor.grammar(sentence): + output.write(str(rule)+'\n') + grammar_file = os.path.abspath(grammar_file) + print('{1}'.format(grammar_file, sentence)) + +if __name__ == '__main__': + main() diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py new file mode 100644 index 00000000..bb912e16 --- /dev/null +++ b/python/pkg/cdec/sa/extractor.py @@ -0,0 +1,78 @@ +from itertools import chain +import os +import cdec.configobj +from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\ + MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE +import cdec.sa + +# maximum span of a grammar rule in TEST DATA +MAX_INITIAL_SIZE = 15 + +class GrammarExtractor: + def __init__(self, config): + if isinstance(config, str) or isinstance(config, unicode): + if not os.path.exists(config): + raise IOError('cannot read configuration from {0}'.format(config)) + config = cdec.configobj.ConfigObj(config, unrepr=True) + alignment = cdec.sa.Alignment(from_binary=config['a_file']) + self.factory = cdec.sa.HieroCachingRuleFactory( + # compiled alignment object (REQUIRED) + alignment, + # name of generic nonterminal used by Hiero + category="[X]", + # maximum number of contiguous chunks of terminal symbols in RHS of a rule + max_chunks=config['max_nt']+1, + # maximum span of a grammar rule in TEST DATA + max_initial_size=MAX_INITIAL_SIZE, + # maximum number of symbols (both T and NT) allowed in a rule + max_length=config['max_len'], + # maximum number of nonterminals allowed in a rule (set >2 at your own risk) + max_nonterminals=config['max_nt'], + # maximum number of contiguous chunks of terminal symbols + # in target-side RHS of a rule. + max_target_chunks=config['max_nt']+1, + # maximum number of target side symbols (both T and NT) allowed in a rule. + max_target_length=MAX_INITIAL_SIZE, + # minimum span of a nonterminal in the RHS of a rule in TEST DATA + min_gap_size=1, + # filename of file containing precomputed collocations + precompute_file=config['precompute_file'], + # maximum frequency rank of patterns used to compute triples (< 20) + precompute_secondary_rank=config['rank2'], + # maximum frequency rank of patterns used to compute collocations (< 300) + precompute_rank=config['rank1'], + # require extracted rules to have at least one aligned word + require_aligned_terminal=True, + # require each contiguous chunk of extracted rules + # to have at least one aligned word + require_aligned_chunks=False, + # maximum span of a grammar rule extracted from TRAINING DATA + train_max_initial_size=config['max_size'], + # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA + train_min_gap_size=config['min_gap'], + # True if phrases should be tight, False otherwise (better but slower) + tight_phrases=True, + ) + + # lexical weighting tables + tt = cdec.sa.BiLex(from_binary=config['lex_file']) + + self.models = (EgivenFCoherent, SampleCountF, CountEF, + MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE) + + fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file']) + edarray = cdec.sa.DataArray(from_binary=config['e_file']) + + # lower=faster, higher=better; improvements level off above 200-300 range, + # -1 = don't sample, use all data (VERY SLOW!) + sampler = cdec.sa.Sampler(300, fsarray) + + self.factory.configure(fsarray, edarray, sampler) + + def grammar(self, sentence): + if isinstance(sentence, unicode): + sentence = sentence.encode('utf8') + cnet = chain(('',), sentence.split(), ('',)) + cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet) + cnet = tuple(((word, None, 1), ) for word in cnet) + return self.factory.input(cnet, self.models) diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py new file mode 100644 index 00000000..325b9e13 --- /dev/null +++ b/python/pkg/cdec/sa/features.py @@ -0,0 +1,57 @@ +from __future__ import division +import math + +MAXSCORE = 99 + +def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) + return -math.log10(paircount/fcount) + +def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + paircount) + +def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + fsample_count) + +def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): + prob = paircount/fsample_count + return -math.log10(prob) if prob > 0 else MAXSCORE + +def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): + return -math.log10(fcount/fsample_count) + +def MaxLexEgivenF(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + fwords = fphrase.words + fwords.append('NULL') + def score(): + for e in ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def MaxLexFgivenE(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + ewords = ephrase.words + ewords.append('NULL') + def score(): + for f in fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount == 1) + +def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount == 1) + +def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount > 1) + +def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 1) + +def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 0.01) diff --git a/python/pkg/cdec/score.py b/python/pkg/cdec/score.py new file mode 100644 index 00000000..22257774 --- /dev/null +++ b/python/pkg/cdec/score.py @@ -0,0 +1 @@ +from _cdec import BLEU, TER, CER, Metric -- cgit v1.2.3 From 0b0616c6f7400ce52d07350f7a7054a2513d9813 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Sat, 28 Jul 2012 17:12:40 -0400 Subject: [python] Suffix array compiler can read bitext (-b) --- python/pkg/cdec/sa/compile.py | 24 +- python/src/cdec.sa._sa.pxd | 1 + python/src/sa/_sa.c | 4291 +++++++++++++++++++++++++--------------- python/src/sa/data_array.pxi | 38 +- python/src/sa/suffix_array.pxi | 8 +- 5 files changed, 2780 insertions(+), 1582 deletions(-) create mode 120000 python/src/cdec.sa._sa.pxd (limited to 'python/pkg/cdec') diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py index 30e605a6..2a89243b 100644 --- a/python/pkg/cdec/sa/compile.py +++ b/python/pkg/cdec/sa/compile.py @@ -37,16 +37,22 @@ def main(): help='Number of pre-computed super-frequent patterns)') parser.add_argument('-c', '--config', default='/dev/stdout', help='Output configuration') - parser.add_argument('-o', '--output', required=True, - help='Output path') - parser.add_argument('-f', '--source', required=True, + parser.add_argument('-f', '--source', help='Source language corpus') - parser.add_argument('-e', '--target', required=True, + parser.add_argument('-e', '--target', help='Target language corpus') + parser.add_argument('-b', '--bitext', + help='Parallel text (source ||| target)') parser.add_argument('-a', '--alignment', required=True, help='Bitext word alignment') + parser.add_argument('-o', '--output', required=True, + help='Output path') args = parser.parse_args() + if not ((args.source and args.target) or args.bitext): + parser.error('a parallel corpus is required\n' + '\tuse -f (source) with -e (target) or -b (bitext)') + param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) @@ -61,11 +67,17 @@ def main(): lex_bin = os.path.join(args.output, 'lex.bin') logger.info('Compiling source suffix array') - f_sa = cdec.sa.SuffixArray(from_text=args.source) + if args.bitext: + f_sa = cdec.sa.SuffixArray(from_text=args.bitext, side='source') + else: + f_sa = cdec.sa.SuffixArray(from_text=args.source) f_sa.write_binary(f_sa_bin) logger.info('Compiling target data array') - e = cdec.sa.DataArray(from_text=args.target) + if args.bitext: + e = cdec.sa.DataArray(from_text=args.bitext, side='target') + else: + e = cdec.sa.DataArray(from_text=args.target) e.write_binary(e_bin) logger.info('Precomputing frequent phrases') diff --git a/python/src/cdec.sa._sa.pxd b/python/src/cdec.sa._sa.pxd new file mode 120000 index 00000000..3613f643 --- /dev/null +++ b/python/src/cdec.sa._sa.pxd @@ -0,0 +1 @@ +sa/_sa.pxd \ No newline at end of file diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index b7f3627a..0f9b0d22 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.beta1 on Fri Jul 27 23:31:04 2012 */ +/* Generated by Cython 0.17.beta1 on Sat Jul 28 17:07:03 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -383,6 +383,7 @@ static const char *__pyx_f[] = { /*--- Type declarations ---*/ struct __pyx_obj_3_sa_HieroCachingRuleFactory; +struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats; struct __pyx_obj_3_sa_IntList; struct __pyx_obj_3_sa_VEBIterator; struct __pyx_obj_3_sa_BiLex; @@ -390,23 +391,24 @@ struct __pyx_obj_3_sa_VEB; struct __pyx_obj_3_sa_LCP; struct __pyx_obj_3_sa_DataArray; struct __pyx_obj_3_sa_BitSetIterator; +struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext; struct __pyx_obj_3_sa_Precomputation; struct __pyx_obj_3_sa_SuffixArray; +struct __pyx_obj_3_sa___pyx_scope_struct_4_input; struct __pyx_obj_3_sa_Alphabet; struct __pyx_obj_3_sa_Rule; struct __pyx_obj_3_sa_PhraseLocation; -struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats; -struct __pyx_obj_3_sa___pyx_scope_struct_2_input; +struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__; struct __pyx_obj_3_sa_Alignment; struct __pyx_obj_3_sa_BitSet; struct __pyx_obj_3_sa_Sampler; struct __pyx_obj_3_sa_StringMap; -struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__; struct __pyx_obj_3_sa_TrieNode; struct __pyx_obj_3_sa_ExtendedTrieNode; struct __pyx_obj_3_sa_TrieMap; struct __pyx_obj_3_sa_Phrase; struct __pyx_obj_3_sa_TrieTable; +struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr; struct __pyx_obj_3_sa_FloatList; struct __pyx_t_3_sa__node; struct __pyx_t_3_sa__BitSet; @@ -563,6 +565,37 @@ struct __pyx_obj_3_sa_HieroCachingRuleFactory { }; +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":36 + * logger.info("LCP array completed") + * + * def compute_stats(self, int max_n): # <<<<<<<<<<<<<< + * """Note: the output of this function is not exact. In + * particular, the frequency associated with each word is + */ +struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats { + PyObject_HEAD + int __pyx_v_N; + int __pyx_v_freq; + int __pyx_v_h; + int __pyx_v_i; + int __pyx_v_ii; + int __pyx_v_iii; + int __pyx_v_j; + int __pyx_v_k; + int __pyx_v_max_n; + int __pyx_v_n; + PyObject *__pyx_v_ngram; + struct __pyx_obj_3_sa_IntList *__pyx_v_ngram_start; + PyObject *__pyx_v_ngram_starts; + int __pyx_v_rs; + struct __pyx_obj_3_sa_IntList *__pyx_v_run_start; + struct __pyx_obj_3_sa_LCP *__pyx_v_self; + int __pyx_v_valid; + struct __pyx_obj_3_sa_VEB *__pyx_v_veb; + int __pyx_t_0; +}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":9 * from libc.string cimport memset, memcpy * @@ -676,6 +709,20 @@ struct __pyx_obj_3_sa_BitSetIterator { }; +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71 + * self.read_text_data(fp) + * + * def read_bitext(self, char* filename, int side): # <<<<<<<<<<<<<< + * with gzip_or_text(filename) as fp: + * data = (line.split(' ||| ')[side] for line in fp) + */ +struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext { + PyObject_HEAD + PyObject *__pyx_v_fp; + int __pyx_v_side; +}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":188 * * @@ -713,84 +760,6 @@ struct __pyx_obj_3_sa_SuffixArray { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":7 - * cdef int INDEX_MASK = (1< + +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ + +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ + +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals +#else +#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals +#endif + static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject *); static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject *); @@ -1868,9 +1915,11 @@ static PyTypeObject *__pyx_ptype_3_sa_TrieTable = 0; static PyTypeObject *__pyx_ptype_3_sa_PhraseLocation = 0; static PyTypeObject *__pyx_ptype_3_sa_Sampler = 0; static PyTypeObject *__pyx_ptype_3_sa_HieroCachingRuleFactory = 0; -static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct__compute_stats = 0; -static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_1___iter__ = 0; -static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_2_input = 0; +static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct__read_bitext = 0; +static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr = 0; +static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats = 0; +static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_3___iter__ = 0; +static PyTypeObject *__pyx_ptype_3_sa___pyx_scope_struct_4_input = 0; static int __pyx_v_3_sa_MIN_BOTTOM_SIZE; static int __pyx_v_3_sa_MIN_BOTTOM_BITS; static int __pyx_v_3_sa_LOWER_MASK[32]; @@ -1957,7 +2006,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_28write(struct __pyx_obj_3_sa_IntList *_ static PyObject *__pyx_pf_3_sa_7IntList_30read(struct __pyx_obj_3_sa_IntList *__pyx_v_self, char *__pyx_v_filename); /* proto */ static int __pyx_pf_3_sa_9StringMap___cinit__(struct __pyx_obj_3_sa_StringMap *__pyx_v_self); /* proto */ static void __pyx_pf_3_sa_9StringMap_2__dealloc__(struct __pyx_obj_3_sa_StringMap *__pyx_v_self); /* proto */ -static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, int __pyx_v_use_sent_id); /* proto */ +static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side, int __pyx_v_use_sent_id); /* proto */ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */ static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */ @@ -1966,10 +2015,13 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_id); /* proto */ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ -static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ -static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ -static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f); /* proto */ -static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(PyObject *__pyx_self); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename, int __pyx_v_side); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_data); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f); /* proto */ +static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ static PyObject *__pyx_pf_3_sa_9Alignment_unlink(CYTHON_UNUSED struct __pyx_obj_3_sa_Alignment *__pyx_v_self, PyObject *__pyx_v_link); /* proto */ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_Alignment *__pyx_v_self, int __pyx_v_sent_id); /* proto */ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text); /* proto */ @@ -2061,12 +2113,12 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, char *__pyx_v_filename); /* proto */ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, char *__pyx_v_filename); /* proto */ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_sa_Precomputation *__pyx_v_self, PyObject *__pyx_v_stats, struct __pyx_obj_3_sa_SuffixArray *__pyx_v_sarray); /* proto */ -static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text); /* proto */ +static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_2__getitem__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_4getSentId(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_6getSent(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_i); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_loc); /* proto */ -static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_filename, PyObject *__pyx_v_side); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, int __pyx_v_i, int __pyx_v_j, int __pyx_v_h, struct __pyx_obj_3_sa_IntList *__pyx_v_isa, PyObject *__pyx_v_pad); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ static PyObject *__pyx_pf_3_sa_11SuffixArray_16read_binary(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename); /* proto */ @@ -2121,49 +2173,47 @@ static char __pyx_k_8[] = "Requested index %d:%d of %d-length IntList"; static char __pyx_k_9[] = "Illegal key type %s for IntList"; static char __pyx_k_13[] = "%s "; static char __pyx_k_14[] = "\n"; -static char __pyx_k_18[] = "%d "; -static char __pyx_k_22[] = "%s %d "; -static char __pyx_k_24[] = "write_enhanced_handle"; -static char __pyx_k_28[] = "-"; -static char __pyx_k_32[] = "%d-%d "; -static char __pyx_k_39[] = "%d-%d out of bounds (I=%d,J=%d) in line %d\n"; -static char __pyx_k_42[] = ""; -static char __pyx_k_43[] = "Sort error in CLex"; -static char __pyx_k_45[] = " "; -static char __pyx_k_47[] = "%d %f %f "; -static char __pyx_k_49[] = "%d %s "; -static char __pyx_k_53[] = "%s %s %.6f %.6f\n"; -static char __pyx_k_55[] = " ("; -static char __pyx_k_56[] = ")"; -static char __pyx_k_57[] = "Constructing LCP array"; -static char __pyx_k_59[] = "LCP array completed"; -static char __pyx_k_61[] = "[%s,%d]"; -static char __pyx_k_62[] = "[%s]"; -static char __pyx_k_63[] = "\\"; -static char __pyx_k_64[] = " "; -static char __pyx_k_65[] = "Invalid LHS symbol: %d"; -static char __pyx_k_66[] = "%d-%d"; -static char __pyx_k_67[] = " ||| "; -static char __pyx_k_68[] = "precompute_secondary_rank"; -static char __pyx_k_69[] = "train_max_initial_size"; -static char __pyx_k_70[] = "Precomputing frequent intersections"; -static char __pyx_k_72[] = " Computing inverted indexes..."; -static char __pyx_k_74[] = " Computing collocations..."; -static char __pyx_k_76[] = " %d sentences"; -static char __pyx_k_81[] = "X "; -static char __pyx_k_82[] = "ERROR: unexpected pattern %s in set of precomputed collocations"; -static char __pyx_k_83[] = "RANK %d\tCOUNT, COST: %d %d\tCUMUL: %d, %d"; -static char __pyx_k_84[] = "Precomputed collocations for %d patterns out of %d possible (upper bound %d)"; -static char __pyx_k_85[] = "Precomputed inverted index for %d patterns "; -static char __pyx_k_86[] = "Precomputation took %f seconds"; -static char __pyx_k_87[] = " Bucket sort took %f seconds"; -static char __pyx_k_88[] = " Refining, sort depth = %d"; -static char __pyx_k_89[] = " Refinement took %f seconds"; -static char __pyx_k_90[] = " Finalizing sort..."; -static char __pyx_k_92[] = "Suffix array construction took %f seconds"; -static char __pyx_k_93[] = "Unexpected condition found in q3sort: sort from %d to %d"; -static char __pyx_k_98[] = "Sampling strategy: uniform, max sample size = %d"; -static char __pyx_k_99[] = "Sampling strategy: no sampling"; +static char __pyx_k_18[] = " ||| "; +static char __pyx_k_21[] = "%d "; +static char __pyx_k_25[] = "%s %d "; +static char __pyx_k_27[] = "write_enhanced_handle"; +static char __pyx_k_31[] = "-"; +static char __pyx_k_35[] = "%d-%d "; +static char __pyx_k_42[] = "%d-%d out of bounds (I=%d,J=%d) in line %d\n"; +static char __pyx_k_45[] = ""; +static char __pyx_k_46[] = "Sort error in CLex"; +static char __pyx_k_48[] = " "; +static char __pyx_k_50[] = "%d %f %f "; +static char __pyx_k_52[] = "%d %s "; +static char __pyx_k_56[] = "%s %s %.6f %.6f\n"; +static char __pyx_k_58[] = " ("; +static char __pyx_k_59[] = ")"; +static char __pyx_k_60[] = "Constructing LCP array"; +static char __pyx_k_62[] = "LCP array completed"; +static char __pyx_k_64[] = "[%s,%d]"; +static char __pyx_k_65[] = "[%s]"; +static char __pyx_k_66[] = "\\"; +static char __pyx_k_67[] = " "; +static char __pyx_k_68[] = "Invalid LHS symbol: %d"; +static char __pyx_k_69[] = "%d-%d"; +static char __pyx_k_70[] = "precompute_secondary_rank"; +static char __pyx_k_71[] = "train_max_initial_size"; +static char __pyx_k_72[] = "Precomputing frequent intersections"; +static char __pyx_k_74[] = " Computing inverted indexes..."; +static char __pyx_k_76[] = " Computing collocations..."; +static char __pyx_k_78[] = " %d sentences"; +static char __pyx_k_83[] = "X "; +static char __pyx_k_84[] = "ERROR: unexpected pattern %s in set of precomputed collocations"; +static char __pyx_k_85[] = "RANK %d\tCOUNT, COST: %d %d\tCUMUL: %d, %d"; +static char __pyx_k_86[] = "Precomputed collocations for %d patterns out of %d possible (upper bound %d)"; +static char __pyx_k_87[] = "Precomputed inverted index for %d patterns "; +static char __pyx_k_88[] = "Precomputation took %f seconds"; +static char __pyx_k_89[] = " Bucket sort took %f seconds"; +static char __pyx_k_90[] = " Refining, sort depth = %d"; +static char __pyx_k_91[] = " Refinement took %f seconds"; +static char __pyx_k_92[] = " Finalizing sort..."; +static char __pyx_k_94[] = "Suffix array construction took %f seconds"; +static char __pyx_k_95[] = "Unexpected condition found in q3sort: sort from %d to %d"; static char __pyx_k__0[] = "0"; static char __pyx_k__1[] = "1"; static char __pyx_k__e[] = "e"; @@ -2173,39 +2223,41 @@ static char __pyx_k__i[] = "i"; static char __pyx_k__j[] = "j"; static char __pyx_k__r[] = "r"; static char __pyx_k__w[] = "w"; -static char __pyx_k_101[] = "require_aligned_terminal"; -static char __pyx_k_102[] = "require_aligned_chunks"; -static char __pyx_k_103[] = "[X]"; -static char __pyx_k_104[] = "Must specify an alignment object"; -static char __pyx_k_106[] = "Reading precomputed data from file %s... "; -static char __pyx_k_107[] = "Precomputation done with max nonterminals %d, decoder uses %d"; -static char __pyx_k_108[] = "Precomputation done with max terminals %d, decoder uses %d"; -static char __pyx_k_109[] = "Precomputation done with max initial size %d, decoder uses %d"; -static char __pyx_k_110[] = "Precomputation done with min gap size %d, decoder uses %d"; -static char __pyx_k_111[] = "Converting %d hash keys on precomputed inverted index... "; -static char __pyx_k_112[] = "Converting %d hash keys on precomputed collocations... "; -static char __pyx_k_113[] = "Processing precomputations took %f seconds"; -static char __pyx_k_114[] = "{"; - static char __pyx_k_115[] = "("; -static char __pyx_k_116[] = "}"; -static char __pyx_k_117[] = "get_precomputed_collocation"; -static char __pyx_k_118[] = "double binary"; -static char __pyx_k_119[] = "Keyword trie error"; -static char __pyx_k_121[] = "get_all_nodes_isteps_away"; -static char __pyx_k_122[] = "Total time for rule lookup, extraction, and scoring = %f seconds"; -static char __pyx_k_123[] = " Extract time = %f seconds"; -static char __pyx_k_124[] = "No aligned terminals"; -static char __pyx_k_125[] = "Unaligned chunk"; -static char __pyx_k_126[] = "Gaps are not tight phrases"; -static char __pyx_k_127[] = "Inside edges of preceding subphrase are not tight"; -static char __pyx_k_128[] = "Inside edges of following subphrase are not tight"; -static char __pyx_k_129[] = "Subphrase [%d, %d] failed integrity check"; -static char __pyx_k_130[] = "Didn't extract anything from [%d, %d] -> [%d, %d]"; -static char __pyx_k_131[] = "Unable to extract basic phrase"; -static char __pyx_k_134[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/_sa.pyx"; -static char __pyx_k_135[] = "cdec.sa"; -static char __pyx_k_139[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi"; -static char __pyx_k_140[] = "*EPS*"; +static char __pyx_k_100[] = "Sampling strategy: uniform, max sample size = %d"; +static char __pyx_k_101[] = "Sampling strategy: no sampling"; +static char __pyx_k_103[] = "require_aligned_terminal"; +static char __pyx_k_104[] = "require_aligned_chunks"; +static char __pyx_k_105[] = "[X]"; +static char __pyx_k_106[] = "Must specify an alignment object"; +static char __pyx_k_108[] = "Reading precomputed data from file %s... "; +static char __pyx_k_109[] = "Precomputation done with max nonterminals %d, decoder uses %d"; +static char __pyx_k_110[] = "Precomputation done with max terminals %d, decoder uses %d"; +static char __pyx_k_111[] = "Precomputation done with max initial size %d, decoder uses %d"; +static char __pyx_k_112[] = "Precomputation done with min gap size %d, decoder uses %d"; +static char __pyx_k_113[] = "Converting %d hash keys on precomputed inverted index... "; +static char __pyx_k_114[] = "Converting %d hash keys on precomputed collocations... "; +static char __pyx_k_115[] = "Processing precomputations took %f seconds"; +static char __pyx_k_116[] = "{"; + static char __pyx_k_117[] = "("; +static char __pyx_k_118[] = "}"; +static char __pyx_k_119[] = "get_precomputed_collocation"; +static char __pyx_k_120[] = "double binary"; +static char __pyx_k_121[] = "Keyword trie error"; +static char __pyx_k_123[] = "get_all_nodes_isteps_away"; +static char __pyx_k_124[] = "Total time for rule lookup, extraction, and scoring = %f seconds"; +static char __pyx_k_125[] = " Extract time = %f seconds"; +static char __pyx_k_126[] = "No aligned terminals"; +static char __pyx_k_127[] = "Unaligned chunk"; +static char __pyx_k_128[] = "Gaps are not tight phrases"; +static char __pyx_k_129[] = "Inside edges of preceding subphrase are not tight"; +static char __pyx_k_130[] = "Inside edges of following subphrase are not tight"; +static char __pyx_k_131[] = "Subphrase [%d, %d] failed integrity check"; +static char __pyx_k_132[] = "Didn't extract anything from [%d, %d] -> [%d, %d]"; +static char __pyx_k_133[] = "Unable to extract basic phrase"; +static char __pyx_k_136[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/_sa.pyx"; +static char __pyx_k_137[] = "cdec.sa"; +static char __pyx_k_141[] = "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi"; +static char __pyx_k_142[] = "*EPS*"; static char __pyx_k__gc[] = "gc"; static char __pyx_k__sa[] = "sa"; static char __pyx_k___sa[] = "_sa"; @@ -2229,6 +2281,7 @@ static char __pyx_k__info[] = "info"; static char __pyx_k__join[] = "join"; static char __pyx_k__open[] = "open"; static char __pyx_k__seek[] = "seek"; +static char __pyx_k__side[] = "side"; static char __pyx_k__size[] = "size"; static char __pyx_k__skip[] = "skip"; static char __pyx_k__stop[] = "stop"; @@ -2266,6 +2319,7 @@ static char __pyx_k__sample[] = "sample"; static char __pyx_k__sarray[] = "sarray"; static char __pyx_k__scores[] = "scores"; static char __pyx_k__sorted[] = "sorted"; +static char __pyx_k__source[] = "source"; static char __pyx_k__string[] = "string"; static char __pyx_k__unlink[] = "unlink"; static char __pyx_k__advance[] = "advance"; @@ -2334,6 +2388,7 @@ static char __pyx_k__initial_len[] = "initial_len"; static char __pyx_k__next_states[] = "next_states"; static char __pyx_k__precomputed[] = "precomputed"; static char __pyx_k__read_binary[] = "read_binary"; +static char __pyx_k__read_bitext[] = "read_bitext"; static char __pyx_k__sample_size[] = "sample_size"; static char __pyx_k__suffix_link[] = "suffix_link"; static char __pyx_k__use_sent_id[] = "use_sent_id"; @@ -2344,6 +2399,7 @@ static char __pyx_k__StopIteration[] = "StopIteration"; static char __pyx_k__alphabet_size[] = "alphabet_size"; static char __pyx_k__tight_phrases[] = "tight_phrases"; static char __pyx_k__pattern2phrase[] = "pattern2phrase"; +static char __pyx_k__read_text_data[] = "read_text_data"; static char __pyx_k__sym_fromstring[] = "sym_fromstring"; static char __pyx_k__by_slack_factor[] = "by_slack_factor"; static char __pyx_k__get_next_states[] = "get_next_states"; @@ -2362,11 +2418,11 @@ static char __pyx_k__max_target_length[] = "max_target_length"; static char __pyx_k__train_min_gap_size[] = "train_min_gap_size"; static char __pyx_k__pattern2phrase_plus[] = "pattern2phrase_plus"; static PyObject *__pyx_kp_s_1; -static PyObject *__pyx_n_s_101; -static PyObject *__pyx_n_s_102; -static PyObject *__pyx_kp_s_104; +static PyObject *__pyx_kp_s_100; +static PyObject *__pyx_kp_s_101; +static PyObject *__pyx_n_s_103; +static PyObject *__pyx_n_s_104; static PyObject *__pyx_kp_s_106; -static PyObject *__pyx_kp_s_107; static PyObject *__pyx_kp_s_108; static PyObject *__pyx_kp_s_109; static PyObject *__pyx_kp_s_110; @@ -2376,12 +2432,12 @@ static PyObject *__pyx_kp_s_113; static PyObject *__pyx_kp_s_114; static PyObject *__pyx_kp_s_115; static PyObject *__pyx_kp_s_116; -static PyObject *__pyx_n_s_117; +static PyObject *__pyx_kp_s_117; static PyObject *__pyx_kp_s_118; -static PyObject *__pyx_kp_s_119; -static PyObject *__pyx_n_s_121; -static PyObject *__pyx_kp_s_122; -static PyObject *__pyx_kp_s_123; +static PyObject *__pyx_n_s_119; +static PyObject *__pyx_kp_s_120; +static PyObject *__pyx_kp_s_121; +static PyObject *__pyx_n_s_123; static PyObject *__pyx_kp_s_124; static PyObject *__pyx_kp_s_125; static PyObject *__pyx_kp_s_126; @@ -2391,49 +2447,49 @@ static PyObject *__pyx_kp_s_129; static PyObject *__pyx_kp_s_13; static PyObject *__pyx_kp_s_130; static PyObject *__pyx_kp_s_131; -static PyObject *__pyx_kp_s_134; -static PyObject *__pyx_kp_s_135; -static PyObject *__pyx_kp_s_139; +static PyObject *__pyx_kp_s_132; +static PyObject *__pyx_kp_s_133; +static PyObject *__pyx_kp_s_136; +static PyObject *__pyx_kp_s_137; static PyObject *__pyx_kp_s_14; -static PyObject *__pyx_kp_s_140; +static PyObject *__pyx_kp_s_141; +static PyObject *__pyx_kp_s_142; static PyObject *__pyx_kp_s_18; static PyObject *__pyx_kp_s_2; -static PyObject *__pyx_kp_s_22; -static PyObject *__pyx_n_s_24; -static PyObject *__pyx_kp_s_28; +static PyObject *__pyx_kp_s_21; +static PyObject *__pyx_kp_s_25; +static PyObject *__pyx_n_s_27; static PyObject *__pyx_kp_s_3; -static PyObject *__pyx_kp_s_32; -static PyObject *__pyx_kp_s_39; +static PyObject *__pyx_kp_s_31; +static PyObject *__pyx_kp_s_35; static PyObject *__pyx_kp_s_4; static PyObject *__pyx_kp_s_42; -static PyObject *__pyx_kp_s_43; static PyObject *__pyx_kp_s_45; -static PyObject *__pyx_kp_s_47; -static PyObject *__pyx_kp_s_49; +static PyObject *__pyx_kp_s_46; +static PyObject *__pyx_kp_s_48; static PyObject *__pyx_kp_s_5; -static PyObject *__pyx_kp_s_53; -static PyObject *__pyx_kp_s_55; +static PyObject *__pyx_kp_s_50; +static PyObject *__pyx_kp_s_52; static PyObject *__pyx_kp_s_56; -static PyObject *__pyx_kp_s_57; +static PyObject *__pyx_kp_s_58; static PyObject *__pyx_kp_s_59; static PyObject *__pyx_kp_s_6; -static PyObject *__pyx_kp_s_61; +static PyObject *__pyx_kp_s_60; static PyObject *__pyx_kp_s_62; -static PyObject *__pyx_kp_s_63; static PyObject *__pyx_kp_s_64; static PyObject *__pyx_kp_s_65; static PyObject *__pyx_kp_s_66; static PyObject *__pyx_kp_s_67; -static PyObject *__pyx_n_s_68; -static PyObject *__pyx_n_s_69; +static PyObject *__pyx_kp_s_68; +static PyObject *__pyx_kp_s_69; static PyObject *__pyx_kp_s_7; -static PyObject *__pyx_kp_s_70; +static PyObject *__pyx_n_s_70; +static PyObject *__pyx_n_s_71; static PyObject *__pyx_kp_s_72; static PyObject *__pyx_kp_s_74; static PyObject *__pyx_kp_s_76; +static PyObject *__pyx_kp_s_78; static PyObject *__pyx_kp_s_8; -static PyObject *__pyx_kp_s_81; -static PyObject *__pyx_kp_s_82; static PyObject *__pyx_kp_s_83; static PyObject *__pyx_kp_s_84; static PyObject *__pyx_kp_s_85; @@ -2443,10 +2499,10 @@ static PyObject *__pyx_kp_s_88; static PyObject *__pyx_kp_s_89; static PyObject *__pyx_kp_s_9; static PyObject *__pyx_kp_s_90; +static PyObject *__pyx_kp_s_91; static PyObject *__pyx_kp_s_92; -static PyObject *__pyx_kp_s_93; -static PyObject *__pyx_kp_s_98; -static PyObject *__pyx_kp_s_99; +static PyObject *__pyx_kp_s_94; +static PyObject *__pyx_kp_s_95; static PyObject *__pyx_kp_s__0; static PyObject *__pyx_kp_s__1; static PyObject *__pyx_n_s__END_OF_FILE; @@ -2564,7 +2620,9 @@ static PyObject *__pyx_n_s__range; static PyObject *__pyx_n_s__reachable; static PyObject *__pyx_n_s__reachable_buffer; static PyObject *__pyx_n_s__read_binary; +static PyObject *__pyx_n_s__read_bitext; static PyObject *__pyx_n_s__read_text; +static PyObject *__pyx_n_s__read_text_data; static PyObject *__pyx_n_s__res; static PyObject *__pyx_n_s__reset; static PyObject *__pyx_n_s__resource; @@ -2581,9 +2639,11 @@ static PyObject *__pyx_n_s__scores; static PyObject *__pyx_n_s__seek; static PyObject *__pyx_n_s__setdefault; static PyObject *__pyx_n_s__shortest; +static PyObject *__pyx_n_s__side; static PyObject *__pyx_n_s__size; static PyObject *__pyx_n_s__skip; static PyObject *__pyx_n_s__sorted; +static PyObject *__pyx_n_s__source; static PyObject *__pyx_n_s__spanlen; static PyObject *__pyx_n_s__split; static PyObject *__pyx_n_s__start; @@ -2618,8 +2678,8 @@ static PyObject *__pyx_int_10; static PyObject *__pyx_int_20; static PyObject *__pyx_int_1000; static PyObject *__pyx_int_65536; -static PyObject *__pyx_k_38; -static PyObject *__pyx_k_97; +static PyObject *__pyx_k_41; +static PyObject *__pyx_k_99; static PyObject *__pyx_k_tuple_10; static PyObject *__pyx_k_tuple_11; static PyObject *__pyx_k_tuple_12; @@ -2628,49 +2688,51 @@ static PyObject *__pyx_k_tuple_16; static PyObject *__pyx_k_tuple_17; static PyObject *__pyx_k_tuple_19; static PyObject *__pyx_k_tuple_20; -static PyObject *__pyx_k_tuple_21; +static PyObject *__pyx_k_tuple_22; static PyObject *__pyx_k_tuple_23; -static PyObject *__pyx_k_tuple_25; +static PyObject *__pyx_k_tuple_24; static PyObject *__pyx_k_tuple_26; -static PyObject *__pyx_k_tuple_27; +static PyObject *__pyx_k_tuple_28; static PyObject *__pyx_k_tuple_29; static PyObject *__pyx_k_tuple_30; -static PyObject *__pyx_k_tuple_31; +static PyObject *__pyx_k_tuple_32; static PyObject *__pyx_k_tuple_33; static PyObject *__pyx_k_tuple_34; -static PyObject *__pyx_k_tuple_35; static PyObject *__pyx_k_tuple_36; static PyObject *__pyx_k_tuple_37; +static PyObject *__pyx_k_tuple_38; +static PyObject *__pyx_k_tuple_39; static PyObject *__pyx_k_tuple_40; -static PyObject *__pyx_k_tuple_41; +static PyObject *__pyx_k_tuple_43; static PyObject *__pyx_k_tuple_44; -static PyObject *__pyx_k_tuple_46; -static PyObject *__pyx_k_tuple_48; -static PyObject *__pyx_k_tuple_50; +static PyObject *__pyx_k_tuple_47; +static PyObject *__pyx_k_tuple_49; static PyObject *__pyx_k_tuple_51; -static PyObject *__pyx_k_tuple_52; +static PyObject *__pyx_k_tuple_53; static PyObject *__pyx_k_tuple_54; -static PyObject *__pyx_k_tuple_58; -static PyObject *__pyx_k_tuple_60; -static PyObject *__pyx_k_tuple_71; +static PyObject *__pyx_k_tuple_55; +static PyObject *__pyx_k_tuple_57; +static PyObject *__pyx_k_tuple_61; +static PyObject *__pyx_k_tuple_63; static PyObject *__pyx_k_tuple_73; static PyObject *__pyx_k_tuple_75; static PyObject *__pyx_k_tuple_77; -static PyObject *__pyx_k_tuple_78; static PyObject *__pyx_k_tuple_79; static PyObject *__pyx_k_tuple_80; -static PyObject *__pyx_k_tuple_91; -static PyObject *__pyx_k_tuple_94; -static PyObject *__pyx_k_tuple_95; +static PyObject *__pyx_k_tuple_81; +static PyObject *__pyx_k_tuple_82; +static PyObject *__pyx_k_tuple_93; static PyObject *__pyx_k_tuple_96; -static PyObject *__pyx_k_tuple_100; -static PyObject *__pyx_k_tuple_105; -static PyObject *__pyx_k_tuple_120; -static PyObject *__pyx_k_tuple_132; -static PyObject *__pyx_k_tuple_136; -static PyObject *__pyx_k_tuple_137; -static PyObject *__pyx_k_codeobj_133; -static PyObject *__pyx_k_codeobj_138; +static PyObject *__pyx_k_tuple_97; +static PyObject *__pyx_k_tuple_98; +static PyObject *__pyx_k_tuple_102; +static PyObject *__pyx_k_tuple_107; +static PyObject *__pyx_k_tuple_122; +static PyObject *__pyx_k_tuple_134; +static PyObject *__pyx_k_tuple_138; +static PyObject *__pyx_k_tuple_139; +static PyObject *__pyx_k_codeobj_135; +static PyObject *__pyx_k_codeobj_140; /* "_sa.pyx":5 * import gzip @@ -6290,27 +6352,30 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_from_binary = 0; PyObject *__pyx_v_from_text = 0; + PyObject *__pyx_v_side = 0; int __pyx_v_use_sent_id; int __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__use_sent_id,0}; - PyObject* values[3] = {0,0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,&__pyx_n_s__use_sent_id,0}; + PyObject* values[4] = {0,0,0,0}; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":17 * cdef bint use_sent_id * - * def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False): # <<<<<<<<<<<<<< + * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): # <<<<<<<<<<<<<< * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} * self.id2word = ["END_OF_FILE", "END_OF_LINE"] */ values[0] = ((PyObject *)Py_None); values[1] = ((PyObject *)Py_None); + values[2] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); @@ -6331,15 +6396,21 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject } case 2: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__use_sent_id); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side); if (value) { values[2] = value; kw_args--; } } + case 3: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__use_sent_id); + if (value) { values[3] = value; kw_args--; } + } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); @@ -6349,32 +6420,34 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject } __pyx_v_from_binary = values[0]; __pyx_v_from_text = values[1]; - if (values[2]) { - __pyx_v_use_sent_id = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_use_sent_id == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_side = values[2]; + if (values[3]) { + __pyx_v_use_sent_id = __Pyx_PyObject_IsTrue(values[3]); if (unlikely((__pyx_v_use_sent_id == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { __pyx_v_use_sent_id = ((int)0); } } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.DataArray.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_9DataArray___cinit__(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_use_sent_id); + __pyx_r = __pyx_pf_3_sa_9DataArray___cinit__(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_side, __pyx_v_use_sent_id); __Pyx_RefNannyFinishContext(); return __pyx_r; } -static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, int __pyx_v_use_sent_id) { +static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side, int __pyx_v_use_sent_id) { int __pyx_r; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; + long __pyx_t_5; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -6382,7 +6455,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":18 * - * def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False): + * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} # <<<<<<<<<<<<<< * self.id2word = ["END_OF_FILE", "END_OF_LINE"] * self.data = IntList(1000,1000) @@ -6398,7 +6471,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_t_1 = 0; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":19 - * def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False): + * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} * self.id2word = ["END_OF_FILE", "END_OF_LINE"] # <<<<<<<<<<<<<< * self.data = IntList(1000,1000) @@ -6487,7 +6560,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< * elif from_text: - * self.read_text(from_text) + * if side: */ __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_binary); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -6508,8 +6581,8 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ * if from_binary: * self.read_binary(from_binary) * elif from_text: # <<<<<<<<<<<<<< - * self.read_text(from_text) - * + * if side: + * self.read_bitext(from_text, (0 if side == 'source' else 1)) */ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { @@ -6517,22 +6590,68 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":27 * self.read_binary(from_binary) * elif from_text: - * self.read_text(from_text) # <<<<<<<<<<<<<< + * if side: # <<<<<<<<<<<<<< + * self.read_bitext(from_text, (0 if side == 'source' else 1)) + * else: + */ + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_side); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__pyx_t_2) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":28 + * elif from_text: + * if side: + * self.read_bitext(from_text, (0 if side == 'source' else 1)) # <<<<<<<<<<<<<< + * else: + * self.read_text(from_text) + */ + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_bitext); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_2 = __Pyx_PyString_Equals(__pyx_v_side, ((PyObject *)__pyx_n_s__source), Py_EQ); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__pyx_t_2) { + __pyx_t_5 = 0; + } else { + __pyx_t_5 = 1; + } + __pyx_t_3 = PyInt_FromLong(__pyx_t_5); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_from_text); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_from_text); + __Pyx_GIVEREF(__pyx_v_from_text); + PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L4; + } + /*else*/ { + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":30 + * self.read_bitext(from_text, (0 if side == 'source' else 1)) + * else: + * self.read_text(from_text) # <<<<<<<<<<<<<< * * def __len__(self): */ - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_from_text); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_from_text); - __Pyx_GIVEREF(__pyx_v_from_text); - __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_from_text); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_from_text); + __Pyx_GIVEREF(__pyx_v_from_text); + __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + __pyx_L4:; goto __pyx_L3; } __pyx_L3:; @@ -6561,8 +6680,8 @@ static Py_ssize_t __pyx_pw_3_sa_9DataArray_3__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":29 - * self.read_text(from_text) +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":32 + * self.read_text(from_text) * * def __len__(self): # <<<<<<<<<<<<<< * return len(self.data) @@ -6579,7 +6698,7 @@ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataAr int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__len__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":33 * * def __len__(self): * return len(self.data) # <<<<<<<<<<<<<< @@ -6588,7 +6707,7 @@ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataAr */ __pyx_t_1 = ((PyObject *)__pyx_v_self->data); __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; goto __pyx_L0; @@ -6615,7 +6734,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_5getSentId(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":32 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":35 * return len(self.data) * * def getSentId(self, i): # <<<<<<<<<<<<<< @@ -6633,7 +6752,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataA int __pyx_clineno = 0; __Pyx_RefNannySetupContext("getSentId", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":36 * * def getSentId(self, i): * return self.sent_id.arr[i] # <<<<<<<<<<<<<< @@ -6641,8 +6760,8 @@ static PyObject *__pyx_pf_3_sa_9DataArray_4getSentId(struct __pyx_obj_3_sa_DataA * def getSent(self, i): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_id->arr[__pyx_t_1])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_id->arr[__pyx_t_1])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; @@ -6671,7 +6790,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7getSent(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":35 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":38 * return self.sent_id.arr[i] * * def getSent(self, i): # <<<<<<<<<<<<<< @@ -6696,42 +6815,42 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArr __Pyx_RefNannySetupContext("getSent", 0); __Pyx_INCREF(__pyx_v_i); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40 * def getSent(self, i): * cdef int j, start, stop * sent = [] # <<<<<<<<<<<<<< * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_sent = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":38 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":41 * cdef int j, start, stop * sent = [] * start = self.sent_index.arr[i] # <<<<<<<<<<<<<< * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: */ - __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_start = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":39 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":42 * sent = [] * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] # <<<<<<<<<<<<<< * for i from start <= i < stop: * sent.append(self.id2word[self.data.arr[i]]) */ - __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_int_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_stop = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43 * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: # <<<<<<<<<<<<<< @@ -6740,41 +6859,41 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6getSent(struct __pyx_obj_3_sa_DataArr */ __pyx_t_3 = __pyx_v_stop; for (__pyx_t_4 = __pyx_v_start; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) { - __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_v_i); __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":44 * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: * sent.append(self.id2word[self.data.arr[i]]) # <<<<<<<<<<<<<< * return sent * */ - __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_self->id2word, (__pyx_v_self->data->arr[__pyx_t_2]), sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_self->id2word, (__pyx_v_self->data->arr[__pyx_t_2]), sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyList_Append(__pyx_v_sent, __pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyList_Append(__pyx_v_sent, __pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43 * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: # <<<<<<<<<<<<<< * sent.append(self.id2word[self.data.arr[i]]) * return sent */ - __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_v_i); __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":45 * for i from start <= i < stop: * sent.append(self.id2word[self.data.arr[i]]) * return sent # <<<<<<<<<<<<<< @@ -6811,7 +6930,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_9getSentPos(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":44 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":47 * return sent * * def getSentPos(self, loc): # <<<<<<<<<<<<<< @@ -6830,7 +6949,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8getSentPos(struct __pyx_obj_3_sa_Data int __pyx_clineno = 0; __Pyx_RefNannySetupContext("getSentPos", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":48 * * def getSentPos(self, loc): * return loc - self.sent_index.arr[self.sent_id.arr[loc]] # <<<<<<<<<<<<<< @@ -6838,10 +6957,10 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8getSentPos(struct __pyx_obj_3_sa_Data * def get_id(self, word): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_loc); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_index->arr[(__pyx_v_self->sent_id->arr[__pyx_t_1])])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_loc); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong((__pyx_v_self->sent_index->arr[(__pyx_v_self->sent_id->arr[__pyx_t_1])])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_Subtract(__pyx_v_loc, __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Subtract(__pyx_v_loc, __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_3; @@ -6872,7 +6991,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_11get_id(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":47 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":50 * return loc - self.sent_index.arr[self.sent_id.arr[loc]] * * def get_id(self, word): # <<<<<<<<<<<<<< @@ -6892,18 +7011,18 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_id", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":51 * * def get_id(self, word): * if not word in self.word2id: # <<<<<<<<<<<<<< * self.word2id[word] = len(self.id2word) * self.id2word.append(word) */ - __pyx_t_1 = ((PySequence_Contains(__pyx_v_self->word2id, __pyx_v_word))); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((PySequence_Contains(__pyx_v_self->word2id, __pyx_v_word))); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = (!__pyx_t_1); if (__pyx_t_2) { - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":52 * def get_id(self, word): * if not word in self.word2id: * self.word2id[word] = len(self.id2word) # <<<<<<<<<<<<<< @@ -6912,28 +7031,28 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr */ __pyx_t_3 = __pyx_v_self->id2word; __Pyx_INCREF(__pyx_t_3); - __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyObject_SetItem(__pyx_v_self->word2id, __pyx_v_word, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_v_self->word2id, __pyx_v_word, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":53 * if not word in self.word2id: * self.word2id[word] = len(self.id2word) * self.id2word.append(word) # <<<<<<<<<<<<<< * return self.word2id[word] * */ - __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, __pyx_v_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, __pyx_v_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L3; } __pyx_L3:; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":54 * self.word2id[word] = len(self.id2word) * self.id2word.append(word) * return self.word2id[word] # <<<<<<<<<<<<<< @@ -6941,7 +7060,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10get_id(struct __pyx_obj_3_sa_DataArr * def get_word(self, id): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_3) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_3) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; @@ -6970,7 +7089,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_13get_word(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":53 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":56 * return self.word2id[word] * * def get_word(self, id): # <<<<<<<<<<<<<< @@ -6987,7 +7106,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataA int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_word", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57 * * def get_word(self, id): * return self.id2word[id] # <<<<<<<<<<<<<< @@ -6995,7 +7114,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_word(struct __pyx_obj_3_sa_DataA * def write_text(self, char* filename): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetItem(__pyx_v_self->id2word, __pyx_v_id); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetItem(__pyx_v_self->id2word, __pyx_v_id); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -7021,7 +7140,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_15write_text(PyObject *__pyx_v_self, P __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_text (wrapper)", 0); assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -7034,7 +7153,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_15write_text(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":56 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":59 * return self.id2word[id] * * def write_text(self, char* filename): # <<<<<<<<<<<<<< @@ -7066,7 +7185,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_text", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -7074,9 +7193,9 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat * if w_id > 1: */ /*with:*/ { - __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); @@ -7084,14 +7203,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat PyTuple_SET_ITEM(__pyx_t_2, 1, ((PyObject *)__pyx_n_s__w)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__w)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -7106,7 +7225,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61 * def write_text(self, char* filename): * with open(filename, "w") as f: * for w_id in self.data: # <<<<<<<<<<<<<< @@ -7117,7 +7236,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __pyx_t_4 = ((PyObject *)__pyx_v_self->data); __Pyx_INCREF(__pyx_t_4); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; } @@ -7127,21 +7246,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; #endif } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_4)) { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; #endif } else { __pyx_t_1 = __pyx_t_9(__pyx_t_4); if (unlikely(!__pyx_t_1)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;} } break; } @@ -7151,48 +7270,48 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __pyx_v_w_id = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62 * with open(filename, "w") as f: * for w_id in self.data: * if w_id > 1: # <<<<<<<<<<<<<< * f.write("%s " % self.get_word(w_id)) * if w_id == 1: */ - __pyx_t_1 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_GT); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_GT); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_10) { - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":63 * for w_id in self.data: * if w_id > 1: * f.write("%s " % self.get_word(w_id)) # <<<<<<<<<<<<<< * if w_id == 1: * f.write("\n") */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_word); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_word); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_INCREF(__pyx_v_w_id); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_v_w_id); __Pyx_GIVEREF(__pyx_v_w_id); - __pyx_t_12 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_12 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; - __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_13), __pyx_t_12); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_13), __pyx_t_12); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_12); PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_t_11)); __Pyx_GIVEREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; - __pyx_t_11 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_12), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_12), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0; @@ -7201,29 +7320,29 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat } __pyx_L18:; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":64 * if w_id > 1: * f.write("%s " % self.get_word(w_id)) * if w_id == 1: # <<<<<<<<<<<<<< * f.write("\n") * */ - __pyx_t_11 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_EQ); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyObject_RichCompare(__pyx_v_w_id, __pyx_int_1, Py_EQ); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (__pyx_t_10) { - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65 * f.write("%s " % self.get_word(w_id)) * if w_id == 1: * f.write("\n") # <<<<<<<<<<<<<< * * def read_text(self, char* filename): */ - __pyx_t_11 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_12 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_k_tuple_15), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_12 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_k_tuple_15), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; @@ -7244,7 +7363,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -7253,11 +7372,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat */ /*except:*/ { __Pyx_AddTraceback("_sa.DataArray.write_text", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_12, &__pyx_t_11) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_12, &__pyx_t_11) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_GOTREF(__pyx_t_12); __Pyx_GOTREF(__pyx_t_11); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); @@ -7270,11 +7389,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_GIVEREF(__pyx_t_11); __pyx_t_13 = PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_13); __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_13); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (unlikely(__pyx_t_10 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __pyx_t_14 = (!__pyx_t_10); if (__pyx_t_14) { __Pyx_GIVEREF(__pyx_t_4); @@ -7282,7 +7401,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_GIVEREF(__pyx_t_11); __Pyx_ErrRestore(__pyx_t_4, __pyx_t_12, __pyx_t_11); __pyx_t_4 = 0; __pyx_t_12 = 0; __pyx_t_11 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} goto __pyx_L22; } __pyx_L22:; @@ -7310,11 +7429,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat if (__pyx_t_3) { __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_16, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_t_14 = __Pyx_PyObject_IsTrue(__pyx_t_7); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } goto __pyx_L23; @@ -7350,7 +7469,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_17read_text(PyObject *__pyx_v_self, Py __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_text (wrapper)", 0); assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -7363,20 +7482,16 @@ static PyObject *__pyx_pw_3_sa_9DataArray_17read_text(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":64 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":67 * f.write("\n") * * def read_text(self, char* filename): # <<<<<<<<<<<<<< - * cdef int word_count = 0 * with gzip_or_text(filename) as fp: + * self.read_text_data(fp) */ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { - int __pyx_v_word_count; PyObject *__pyx_v_fp = NULL; - PyObject *__pyx_v_line_num = NULL; - PyObject *__pyx_v_line = NULL; - PyObject *__pyx_v_word = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -7386,56 +7501,446 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data PyObject *__pyx_t_5 = NULL; PyObject *__pyx_t_6 = NULL; PyObject *__pyx_t_7 = NULL; - Py_ssize_t __pyx_t_8; - PyObject *(*__pyx_t_9)(PyObject *); + PyObject *__pyx_t_8 = NULL; + int __pyx_t_9; PyObject *__pyx_t_10 = NULL; - Py_ssize_t __pyx_t_11; - PyObject *(*__pyx_t_12)(PyObject *); - PyObject *__pyx_t_13 = NULL; - PyObject *__pyx_t_14 = NULL; - int __pyx_t_15; - PyObject *__pyx_t_16 = NULL; - int __pyx_t_17; + int __pyx_t_11; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68 * * def read_text(self, char* filename): - * cdef int word_count = 0 # <<<<<<<<<<<<<< + * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< + * self.read_text_data(fp) + * + */ + /*with:*/ { + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_2)); + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2)); + __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); + __pyx_t_2 = 0; + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; + __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + /*try:*/ { + { + __Pyx_ExceptionSave(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7); + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + /*try:*/ { + __Pyx_INCREF(__pyx_t_1); + __pyx_v_fp = __pyx_t_1; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69 + * def read_text(self, char* filename): * with gzip_or_text(filename) as fp: - * for line_num, line in enumerate(fp): + * self.read_text_data(fp) # <<<<<<<<<<<<<< + * + * def read_bitext(self, char* filename, int side): */ - __pyx_v_word_count = 0; + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text_data); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_v_fp); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_fp); + __Pyx_GIVEREF(__pyx_v_fp); + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L14_try_end; + __pyx_L7_error:; + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68 + * * def read_text(self, char* filename): - * cdef int word_count = 0 * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< - * for line_num, line in enumerate(fp): - * self.sent_index.append(word_count) + * self.read_text_data(fp) + * + */ + /*except:*/ { + __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_GOTREF(__pyx_t_2); + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_INCREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __Pyx_INCREF(__pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_2); + __Pyx_GIVEREF(__pyx_t_2); + __Pyx_INCREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __pyx_t_10 = PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __pyx_t_11 = (!__pyx_t_9); + if (__pyx_t_11) { + __Pyx_GIVEREF(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_2); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_ErrRestore(__pyx_t_3, __pyx_t_2, __pyx_t_1); + __pyx_t_3 = 0; __pyx_t_2 = 0; __pyx_t_1 = 0; + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + goto __pyx_L18; + } + __pyx_L18:; + __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L8_exception_handled; + } + __pyx_L9_except_error:; + __Pyx_XGIVEREF(__pyx_t_5); + __Pyx_XGIVEREF(__pyx_t_6); + __Pyx_XGIVEREF(__pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + goto __pyx_L1_error; + __pyx_L8_exception_handled:; + __Pyx_XGIVEREF(__pyx_t_5); + __Pyx_XGIVEREF(__pyx_t_6); + __Pyx_XGIVEREF(__pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + __pyx_L14_try_end:; + } + } + /*finally:*/ { + if (__pyx_t_4) { + __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_17, NULL); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + } + goto __pyx_L19; + __pyx_L3_error:; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + goto __pyx_L1_error; + __pyx_L19:; + } + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_fp); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_3_sa_9DataArray_19read_bitext(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_19read_bitext(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + char *__pyx_v_filename; + int __pyx_v_side; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("read_bitext (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__filename,&__pyx_n_s__side,0}; + PyObject* values[2] = {0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__filename)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("read_bitext", 1, 2, 2, 1); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "read_bitext") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + } + __pyx_v_filename = PyBytes_AsString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_side = __Pyx_PyInt_AsInt(values[1]); if (unlikely((__pyx_v_side == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("read_bitext", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_3_sa_9DataArray_18read_bitext(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), __pyx_v_filename, __pyx_v_side); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} +static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator3(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ + +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 + * def read_bitext(self, char* filename, int side): + * with gzip_or_text(filename) as fp: + * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< + * self.read_text_data(data) + * + */ + +static PyObject *__pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(PyObject *__pyx_self) { + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *__pyx_cur_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("genexpr", 0); + __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_1_genexpr, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __Pyx_RefNannyFinishContext(); + return NULL; + } + __Pyx_GOTREF(__pyx_cur_scope); + __pyx_cur_scope->__pyx_outer_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *) __pyx_self; + __Pyx_INCREF(((PyObject *)__pyx_cur_scope->__pyx_outer_scope)); + __Pyx_GIVEREF(__pyx_cur_scope->__pyx_outer_scope); + { + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_3_sa_9DataArray_11read_bitext_2generator3, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_cur_scope); + __Pyx_RefNannyFinishContext(); + return (PyObject *) gen; + } + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("_sa.DataArray.read_bitext.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_DECREF(((PyObject *)__pyx_cur_scope)); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator3(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +{ + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)__pyx_generator->closure); + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *(*__pyx_t_3)(PyObject *); + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("None", 0); + switch (__pyx_generator->resume_label) { + case 0: goto __pyx_L3_first_run; + case 1: goto __pyx_L6_resume_from_yield; + default: /* CPython raises the right error here */ + __Pyx_RefNannyFinishContext(); + return NULL; + } + __pyx_L3_first_run:; + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp)) { __Pyx_RaiseClosureNameError("fp"); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } + if (PyList_CheckExact(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp) || PyTuple_CheckExact(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp)) { + __pyx_t_1 = __pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; + __pyx_t_3 = NULL; + } else { + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_cur_scope->__pyx_outer_scope->__pyx_v_fp); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; + } + for (;;) { + if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) { + if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; + #else + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { + if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; + #else + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else { + __pyx_t_4 = __pyx_t_3(__pyx_t_1); + if (unlikely(!__pyx_t_4)) { + if (PyErr_Occurred()) { + if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_4); + } + __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_line); + __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_line); + __Pyx_GIVEREF(__pyx_t_4); + __pyx_cur_scope->__pyx_v_line = __pyx_t_4; + __pyx_t_4 = 0; + __pyx_t_4 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_5, __pyx_cur_scope->__pyx_outer_scope->__pyx_v_side, sizeof(int), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + __Pyx_XGIVEREF(__pyx_t_1); + __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; + __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; + __pyx_cur_scope->__pyx_t_2 = __pyx_t_3; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + /* return from generator, yielding value */ + __pyx_generator->resume_label = 1; + return __pyx_r; + __pyx_L6_resume_from_yield:; + __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; + __pyx_cur_scope->__pyx_t_0 = 0; + __Pyx_XGOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; + __pyx_t_3 = __pyx_cur_scope->__pyx_t_2; + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + PyErr_SetNone(PyExc_StopIteration); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_L0:; + __Pyx_XDECREF(__pyx_r); + __pyx_generator->resume_label = -1; + __Pyx_Generator_clear((PyObject*)__pyx_generator); + __Pyx_RefNannyFinishContext(); + return NULL; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71 + * self.read_text_data(fp) + * + * def read_bitext(self, char* filename, int side): # <<<<<<<<<<<<<< + * with gzip_or_text(filename) as fp: + * data = (line.split(' ||| ')[side] for line in fp) + */ + +static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename, int __pyx_v_side) { + struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *__pyx_cur_scope; + PyObject *__pyx_v_data = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_t_9; + PyObject *__pyx_t_10 = NULL; + int __pyx_t_11; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("read_bitext", 0); + __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)__pyx_ptype_3_sa___pyx_scope_struct__read_bitext->tp_new(__pyx_ptype_3_sa___pyx_scope_struct__read_bitext, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __Pyx_RefNannyFinishContext(); + return NULL; + } + __Pyx_GOTREF(__pyx_cur_scope); + __pyx_cur_scope->__pyx_v_side = __pyx_v_side; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72 + * + * def read_bitext(self, char* filename, int side): + * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< + * data = (line.split(' ||| ')[side] for line in fp) + * self.read_text_data(data) */ /*with:*/ { - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__gzip_or_text); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____exit__); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s____enter__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -7447,315 +7952,97 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data __Pyx_XGOTREF(__pyx_t_7); /*try:*/ { __Pyx_INCREF(__pyx_t_1); - __pyx_v_fp = __pyx_t_1; + __Pyx_GIVEREF(__pyx_t_1); + __pyx_cur_scope->__pyx_v_fp = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":67 - * cdef int word_count = 0 - * with gzip_or_text(filename) as fp: - * for line_num, line in enumerate(fp): # <<<<<<<<<<<<<< - * self.sent_index.append(word_count) - * for word in line.split(): - */ - __Pyx_INCREF(__pyx_int_0); - __pyx_t_1 = __pyx_int_0; - if (PyList_CheckExact(__pyx_v_fp) || PyTuple_CheckExact(__pyx_v_fp)) { - __pyx_t_2 = __pyx_v_fp; __Pyx_INCREF(__pyx_t_2); __pyx_t_8 = 0; - __pyx_t_9 = NULL; - } else { - __pyx_t_8 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_fp); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_9 = Py_TYPE(__pyx_t_2)->tp_iternext; - } - for (;;) { - if (!__pyx_t_9 && PyList_CheckExact(__pyx_t_2)) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_3); __pyx_t_8++; - #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; - #endif - } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_2)) { - if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_8); __Pyx_INCREF(__pyx_t_3); __pyx_t_8++; - #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; - #endif - } else { - __pyx_t_3 = __pyx_t_9(__pyx_t_2); - if (unlikely(!__pyx_t_3)) { - if (PyErr_Occurred()) { - if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF(__pyx_v_line); - __pyx_v_line = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_INCREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_v_line_num); - __pyx_v_line_num = __pyx_t_1; - __pyx_t_3 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); - __pyx_t_1 = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 + * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: - * for line_num, line in enumerate(fp): - * self.sent_index.append(word_count) # <<<<<<<<<<<<<< - * for word in line.split(): - * self.data.append(self.get_id(word)) - */ - __pyx_t_3 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69 - * for line_num, line in enumerate(fp): - * self.sent_index.append(word_count) - * for word in line.split(): # <<<<<<<<<<<<<< - * self.data.append(self.get_id(word)) - * if self.use_sent_id: - */ - __pyx_t_10 = PyObject_GetAttr(__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (PyList_CheckExact(__pyx_t_3) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_10 = __pyx_t_3; __Pyx_INCREF(__pyx_t_10); __pyx_t_11 = 0; - __pyx_t_12 = NULL; - } else { - __pyx_t_11 = -1; __pyx_t_10 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_12 = Py_TYPE(__pyx_t_10)->tp_iternext; - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (!__pyx_t_12 && PyList_CheckExact(__pyx_t_10)) { - if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_10)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_10, __pyx_t_11); __Pyx_INCREF(__pyx_t_3); __pyx_t_11++; - #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; - #endif - } else if (!__pyx_t_12 && PyTuple_CheckExact(__pyx_t_10)) { - if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_10)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_10, __pyx_t_11); __Pyx_INCREF(__pyx_t_3); __pyx_t_11++; - #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;}; - #endif - } else { - __pyx_t_3 = __pyx_t_12(__pyx_t_10); - if (unlikely(!__pyx_t_3)) { - if (PyErr_Occurred()) { - if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF(__pyx_v_word); - __pyx_v_word = __pyx_t_3; - __pyx_t_3 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":70 - * self.sent_index.append(word_count) - * for word in line.split(): - * self.data.append(self.get_id(word)) # <<<<<<<<<<<<<< - * if self.use_sent_id: - * self.sent_id.append(line_num) - */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_id); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_13); - __Pyx_INCREF(__pyx_v_word); - PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_v_word); - __Pyx_GIVEREF(__pyx_v_word); - __pyx_t_14 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_13), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_t_14); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":71 - * for word in line.split(): - * self.data.append(self.get_id(word)) - * if self.use_sent_id: # <<<<<<<<<<<<<< - * self.sent_id.append(line_num) - * word_count = word_count + 1 - */ - if (__pyx_v_self->use_sent_id) { - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72 - * self.data.append(self.get_id(word)) - * if self.use_sent_id: - * self.sent_id.append(line_num) # <<<<<<<<<<<<<< - * word_count = word_count + 1 - * self.data.append(1) - */ - __pyx_t_13 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - goto __pyx_L20; - } - __pyx_L20:; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 - * if self.use_sent_id: - * self.sent_id.append(line_num) - * word_count = word_count + 1 # <<<<<<<<<<<<<< - * self.data.append(1) - * if self.use_sent_id: - */ - __pyx_v_word_count = (__pyx_v_word_count + 1); - } - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74 - * self.sent_id.append(line_num) - * word_count = word_count + 1 - * self.data.append(1) # <<<<<<<<<<<<<< - * if self.use_sent_id: - * self.sent_id.append(line_num) - */ - __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":75 - * word_count = word_count + 1 - * self.data.append(1) - * if self.use_sent_id: # <<<<<<<<<<<<<< - * self.sent_id.append(line_num) - * word_count = word_count + 1 - */ - if (__pyx_v_self->use_sent_id) { - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":76 - * self.data.append(1) - * if self.use_sent_id: - * self.sent_id.append(line_num) # <<<<<<<<<<<<<< - * word_count = word_count + 1 - * self.data.append(0) - */ - __pyx_t_10 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L7_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L21; - } - __pyx_L21:; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":77 - * if self.use_sent_id: - * self.sent_id.append(line_num) - * word_count = word_count + 1 # <<<<<<<<<<<<<< - * self.data.append(0) - * self.sent_index.append(word_count) - */ - __pyx_v_word_count = (__pyx_v_word_count + 1); - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":78 - * self.sent_id.append(line_num) - * word_count = word_count + 1 - * self.data.append(0) # <<<<<<<<<<<<<< - * self.sent_index.append(word_count) + * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< + * self.read_text_data(data) * */ - __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = __pyx_pf_3_sa_9DataArray_11read_bitext_genexpr(((PyObject*)__pyx_cur_scope)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_data = __pyx_t_1; + __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":79 - * word_count = word_count + 1 - * self.data.append(0) - * self.sent_index.append(word_count) # <<<<<<<<<<<<<< + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74 + * with gzip_or_text(filename) as fp: + * data = (line.split(' ||| ')[side] for line in fp) + * self.read_text_data(data) # <<<<<<<<<<<<<< * - * def read_binary(self, char* filename): + * def read_text_data(self, data): */ - __pyx_t_1 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text_data); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_v_data); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_data); + __Pyx_GIVEREF(__pyx_v_data); + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; goto __pyx_L14_try_end; __pyx_L7_error:; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66 - * def read_text(self, char* filename): - * cdef int word_count = 0 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72 + * + * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< - * for line_num, line in enumerate(fp): - * self.sent_index.append(word_count) + * data = (line.split(' ||| ')[side] for line in fp) + * self.read_text_data(data) */ /*except:*/ { - __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_10) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_GOTREF(__pyx_t_2); __Pyx_GOTREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_INCREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __Pyx_INCREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); - __Pyx_INCREF(__pyx_t_10); - PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_16 = PyObject_Call(__pyx_t_4, __pyx_t_13, NULL); + __pyx_t_10 = PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_15 = __Pyx_PyObject_IsTrue(__pyx_t_16); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (unlikely(__pyx_t_15 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - __pyx_t_17 = (!__pyx_t_15); - if (__pyx_t_17) { + if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __pyx_t_11 = (!__pyx_t_9); + if (__pyx_t_11) { + __Pyx_GIVEREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_2); __Pyx_GIVEREF(__pyx_t_1); - __Pyx_GIVEREF(__pyx_t_10); - __Pyx_ErrRestore(__pyx_t_2, __pyx_t_1, __pyx_t_10); - __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_10 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} - goto __pyx_L24; + __Pyx_ErrRestore(__pyx_t_3, __pyx_t_2, __pyx_t_1); + __pyx_t_3 = 0; __pyx_t_2 = 0; __pyx_t_1 = 0; + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + goto __pyx_L18; } - __pyx_L24:; - __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0; + __pyx_L18:; + __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L8_exception_handled; } __pyx_L9_except_error:; @@ -7774,20 +8061,20 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data } /*finally:*/ { if (__pyx_t_4) { - __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_17, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_20, NULL); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = __Pyx_PyObject_IsTrue(__pyx_t_7); + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(__pyx_t_17 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } - goto __pyx_L25; + goto __pyx_L19; __pyx_L3_error:; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L1_error; - __pyx_L25:; + __pyx_L19:; } __pyx_r = Py_None; __Pyx_INCREF(Py_None); @@ -7796,13 +8083,327 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("_sa.DataArray.read_bitext", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_data); + __Pyx_DECREF(((PyObject *)__pyx_cur_scope)); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_3_sa_9DataArray_21read_text_data(PyObject *__pyx_v_self, PyObject *__pyx_v_data); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_21read_text_data(PyObject *__pyx_v_self, PyObject *__pyx_v_data) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("read_text_data (wrapper)", 0); + __pyx_r = __pyx_pf_3_sa_9DataArray_20read_text_data(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_data)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":76 + * self.read_text_data(data) + * + * def read_text_data(self, data): # <<<<<<<<<<<<<< + * cdef int word_count = 0 + * for line_num, line in enumerate(data): + */ + +static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_data) { + int __pyx_v_word_count; + PyObject *__pyx_v_line_num = NULL; + PyObject *__pyx_v_line = NULL; + PyObject *__pyx_v_word = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + Py_ssize_t __pyx_t_3; + PyObject *(*__pyx_t_4)(PyObject *); + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; + PyObject *(*__pyx_t_8)(PyObject *); + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("read_text_data", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":77 + * + * def read_text_data(self, data): + * cdef int word_count = 0 # <<<<<<<<<<<<<< + * for line_num, line in enumerate(data): + * self.sent_index.append(word_count) + */ + __pyx_v_word_count = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":78 + * def read_text_data(self, data): + * cdef int word_count = 0 + * for line_num, line in enumerate(data): # <<<<<<<<<<<<<< + * self.sent_index.append(word_count) + * for word in line.split(): + */ + __Pyx_INCREF(__pyx_int_0); + __pyx_t_1 = __pyx_int_0; + if (PyList_CheckExact(__pyx_v_data) || PyTuple_CheckExact(__pyx_v_data)) { + __pyx_t_2 = __pyx_v_data; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; + __pyx_t_4 = NULL; + } else { + __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_data); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; + } + for (;;) { + if (!__pyx_t_4 && PyList_CheckExact(__pyx_t_2)) { + if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else if (!__pyx_t_4 && PyTuple_CheckExact(__pyx_t_2)) { + if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else { + __pyx_t_5 = __pyx_t_4(__pyx_t_2); + if (unlikely(!__pyx_t_5)) { + if (PyErr_Occurred()) { + if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_5); + } + __Pyx_XDECREF(__pyx_v_line); + __pyx_v_line = __pyx_t_5; + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_v_line_num); + __pyx_v_line_num = __pyx_t_1; + __pyx_t_5 = PyNumber_Add(__pyx_t_1, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); + __pyx_t_1 = __pyx_t_5; + __pyx_t_5 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":79 + * cdef int word_count = 0 + * for line_num, line in enumerate(data): + * self.sent_index.append(word_count) # <<<<<<<<<<<<<< + * for word in line.split(): + * self.data.append(self.get_id(word)) + */ + __pyx_t_5 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":80 + * for line_num, line in enumerate(data): + * self.sent_index.append(word_count) + * for word in line.split(): # <<<<<<<<<<<<<< + * self.data.append(self.get_id(word)) + * if self.use_sent_id: + */ + __pyx_t_6 = PyObject_GetAttr(__pyx_v_line, __pyx_n_s__split); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + if (PyList_CheckExact(__pyx_t_5) || PyTuple_CheckExact(__pyx_t_5)) { + __pyx_t_6 = __pyx_t_5; __Pyx_INCREF(__pyx_t_6); __pyx_t_7 = 0; + __pyx_t_8 = NULL; + } else { + __pyx_t_7 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = Py_TYPE(__pyx_t_6)->tp_iternext; + } + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + for (;;) { + if (!__pyx_t_8 && PyList_CheckExact(__pyx_t_6)) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_6)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++; + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else if (!__pyx_t_8 && PyTuple_CheckExact(__pyx_t_6)) { + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++; + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else { + __pyx_t_5 = __pyx_t_8(__pyx_t_6); + if (unlikely(!__pyx_t_5)) { + if (PyErr_Occurred()) { + if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_5); + } + __Pyx_XDECREF(__pyx_v_word); + __pyx_v_word = __pyx_t_5; + __pyx_t_5 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":81 + * self.sent_index.append(word_count) + * for word in line.split(): + * self.data.append(self.get_id(word)) # <<<<<<<<<<<<<< + * if self.use_sent_id: + * self.sent_id.append(line_num) + */ + __pyx_t_5 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__get_id); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_INCREF(__pyx_v_word); + PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_v_word); + __Pyx_GIVEREF(__pyx_v_word); + __pyx_t_10 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; + __pyx_t_9 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_t_10); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":82 + * for word in line.split(): + * self.data.append(self.get_id(word)) + * if self.use_sent_id: # <<<<<<<<<<<<<< + * self.sent_id.append(line_num) + * word_count = word_count + 1 + */ + if (__pyx_v_self->use_sent_id) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":83 + * self.data.append(self.get_id(word)) + * if self.use_sent_id: + * self.sent_id.append(line_num) # <<<<<<<<<<<<<< + * word_count = word_count + 1 + * self.data.append(1) + */ + __pyx_t_9 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + goto __pyx_L7; + } + __pyx_L7:; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":84 + * if self.use_sent_id: + * self.sent_id.append(line_num) + * word_count = word_count + 1 # <<<<<<<<<<<<<< + * self.data.append(1) + * if self.use_sent_id: + */ + __pyx_v_word_count = (__pyx_v_word_count + 1); + } + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":85 + * self.sent_id.append(line_num) + * word_count = word_count + 1 + * self.data.append(1) # <<<<<<<<<<<<<< + * if self.use_sent_id: + * self.sent_id.append(line_num) + */ + __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 85; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":86 + * word_count = word_count + 1 + * self.data.append(1) + * if self.use_sent_id: # <<<<<<<<<<<<<< + * self.sent_id.append(line_num) + * word_count = word_count + 1 + */ + if (__pyx_v_self->use_sent_id) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":87 + * self.data.append(1) + * if self.use_sent_id: + * self.sent_id.append(line_num) # <<<<<<<<<<<<<< + * word_count = word_count + 1 + * self.data.append(0) + */ + __pyx_t_6 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_id), __pyx_v_line_num); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + goto __pyx_L8; + } + __pyx_L8:; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":88 + * if self.use_sent_id: + * self.sent_id.append(line_num) + * word_count = word_count + 1 # <<<<<<<<<<<<<< + * self.data.append(0) + * self.sent_index.append(word_count) + */ + __pyx_v_word_count = (__pyx_v_word_count + 1); + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":89 + * self.sent_id.append(line_num) + * word_count = word_count + 1 + * self.data.append(0) # <<<<<<<<<<<<<< + * self.sent_index.append(word_count) + * + */ + __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->data), __pyx_int_0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 89; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":90 + * word_count = word_count + 1 + * self.data.append(0) + * self.sent_index.append(word_count) # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_1 = PyInt_FromLong(__pyx_v_word_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->sent_index), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_9); __Pyx_XDECREF(__pyx_t_10); - __Pyx_XDECREF(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_14); - __Pyx_AddTraceback("_sa.DataArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("_sa.DataArray.read_text_data", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; - __Pyx_XDECREF(__pyx_v_fp); __Pyx_XDECREF(__pyx_v_line_num); __Pyx_XDECREF(__pyx_v_line); __Pyx_XDECREF(__pyx_v_word); @@ -7812,14 +8413,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data } /* Python wrapper */ -static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ -static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { +static PyObject *__pyx_pw_3_sa_9DataArray_23read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_23read_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { char *__pyx_v_filename; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_binary (wrapper)", 0); assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -7827,26 +8428,26 @@ static PyObject *__pyx_pw_3_sa_9DataArray_19read_binary(PyObject *__pyx_v_self, __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_9DataArray_18read_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); + __pyx_r = __pyx_pf_3_sa_9DataArray_22read_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":81 - * self.sent_index.append(word_count) +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":93 + * * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< * cdef FILE* f * f = fopen(filename, "r") */ -static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { +static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { FILE *__pyx_v_f; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_binary", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":95 * def read_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -7855,7 +8456,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":96 * cdef FILE* f * f = fopen(filename, "r") * self.read_handle(f) # <<<<<<<<<<<<<< @@ -7864,7 +8465,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->read_handle(__pyx_v_self, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":97 * f = fopen(filename, "r") * self.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -7879,7 +8480,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_binary(struct __pyx_obj_3_sa_Da return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":87 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":99 * fclose(f) * * cdef void read_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -7904,7 +8505,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_handle", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":104 * cdef char* c_word * cdef bytes py_word * self.data.read_handle(f) # <<<<<<<<<<<<<< @@ -7913,7 +8514,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->read_handle(__pyx_v_self->data, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":105 * cdef bytes py_word * self.data.read_handle(f) * self.sent_index.read_handle(f) # <<<<<<<<<<<<<< @@ -7922,7 +8523,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->read_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":106 * self.data.read_handle(f) * self.sent_index.read_handle(f) * self.sent_id.read_handle(f) # <<<<<<<<<<<<<< @@ -7931,7 +8532,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->read_handle(__pyx_v_self->sent_id, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":107 * self.sent_index.read_handle(f) * self.sent_id.read_handle(f) * fread(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -7940,7 +8541,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":108 * self.sent_id.read_handle(f) * fread(&(num_words), sizeof(int), 1, f) * for i in range(num_words): # <<<<<<<<<<<<<< @@ -7951,7 +8552,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":109 * fread(&(num_words), sizeof(int), 1, f) * for i in range(num_words): * fread(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -7960,7 +8561,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":110 * for i in range(num_words): * fread(&(word_len), sizeof(int), 1, f) * c_word = malloc (word_len * sizeof(char)) # <<<<<<<<<<<<<< @@ -7969,7 +8570,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_v_c_word = ((char *)malloc((__pyx_v_word_len * (sizeof(char))))); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":99 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":111 * fread(&(word_len), sizeof(int), 1, f) * c_word = malloc (word_len * sizeof(char)) * fread(c_word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -7978,20 +8579,20 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread(__pyx_v_c_word, (sizeof(char)), __pyx_v_word_len, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":100 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":112 * c_word = malloc (word_len * sizeof(char)) * fread(c_word, sizeof(char), word_len, f) * py_word = c_word # <<<<<<<<<<<<<< * free(c_word) * self.word2id[py_word] = len(self.id2word) */ - __pyx_t_3 = PyBytes_FromString(__pyx_v_c_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyBytes_FromString(__pyx_v_c_word); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); __Pyx_XDECREF(((PyObject *)__pyx_v_py_word)); __pyx_v_py_word = __pyx_t_3; __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":113 * fread(c_word, sizeof(char), word_len, f) * py_word = c_word * free(c_word) # <<<<<<<<<<<<<< @@ -8000,7 +8601,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ free(__pyx_v_c_word); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":114 * py_word = c_word * free(c_word) * self.word2id[py_word] = len(self.id2word) # <<<<<<<<<<<<<< @@ -8009,26 +8610,26 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_t_3 = __pyx_v_self->id2word; __Pyx_INCREF(__pyx_t_3); - __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyObject_SetItem(__pyx_v_self->word2id, ((PyObject *)__pyx_v_py_word), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_v_self->word2id, ((PyObject *)__pyx_v_py_word), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":115 * free(c_word) * self.word2id[py_word] = len(self.id2word) * self.id2word.append(py_word) # <<<<<<<<<<<<<< * if len(self.sent_id) == 0: * self.use_sent_id = False */ - __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, ((PyObject *)__pyx_v_py_word)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Append(__pyx_v_self->id2word, ((PyObject *)__pyx_v_py_word)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":116 * self.word2id[py_word] = len(self.id2word) * self.id2word.append(py_word) * if len(self.sent_id) == 0: # <<<<<<<<<<<<<< @@ -8037,12 +8638,12 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_t_3 = ((PyObject *)__pyx_v_self->sent_id); __Pyx_INCREF(__pyx_t_3); - __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_5 = (__pyx_t_4 == 0); if (__pyx_t_5) { - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":117 * self.id2word.append(py_word) * if len(self.sent_id) == 0: * self.use_sent_id = False # <<<<<<<<<<<<<< @@ -8054,7 +8655,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray } /*else*/ { - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":119 * self.use_sent_id = False * else: * self.use_sent_id = True # <<<<<<<<<<<<<< @@ -8074,7 +8675,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray __Pyx_RefNannyFinishContext(); } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":109 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":121 * self.use_sent_id = True * * cdef void write_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -8098,7 +8699,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_handle", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":125 * cdef int num_words * cdef char* c_word * self.data.write_handle(f) # <<<<<<<<<<<<<< @@ -8107,7 +8708,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->write_handle(__pyx_v_self->data, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":126 * cdef char* c_word * self.data.write_handle(f) * self.sent_index.write_handle(f) # <<<<<<<<<<<<<< @@ -8116,7 +8717,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->write_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":127 * self.data.write_handle(f) * self.sent_index.write_handle(f) * self.sent_id.write_handle(f) # <<<<<<<<<<<<<< @@ -8125,7 +8726,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->write_handle(__pyx_v_self->sent_id, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":128 * self.sent_index.write_handle(f) * self.sent_id.write_handle(f) * num_words = len(self.id2word) - 2 # <<<<<<<<<<<<<< @@ -8134,11 +8735,11 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_t_1 = __pyx_v_self->id2word; __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_num_words = (__pyx_t_2 - 2); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":129 * self.sent_id.write_handle(f) * num_words = len(self.id2word) - 2 * fwrite(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -8147,20 +8748,20 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ fwrite((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":118 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":130 * num_words = len(self.id2word) - 2 * fwrite(&(num_words), sizeof(int), 1, f) * for word in self.id2word[2:]: # <<<<<<<<<<<<<< * c_word = word * word_len = strlen(c_word) + 1 */ - __pyx_t_1 = __Pyx_PySequence_GetSlice(__pyx_v_self->id2word, 2, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PySequence_GetSlice(__pyx_v_self->id2word, 2, PY_SSIZE_T_MAX); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); if (PyList_CheckExact(__pyx_t_1) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_2 = 0; __pyx_t_4 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = Py_TYPE(__pyx_t_3)->tp_iternext; } @@ -8171,21 +8772,21 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_4 && PyTuple_CheckExact(__pyx_t_3)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_1 = __pyx_t_4(__pyx_t_3); if (unlikely(!__pyx_t_1)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8195,17 +8796,17 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray __pyx_v_word = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":119 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":131 * fwrite(&(num_words), sizeof(int), 1, f) * for word in self.id2word[2:]: * c_word = word # <<<<<<<<<<<<<< * word_len = strlen(c_word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) */ - __pyx_t_5 = PyBytes_AsString(__pyx_v_word); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyBytes_AsString(__pyx_v_word); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_c_word = __pyx_t_5; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":120 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":132 * for word in self.id2word[2:]: * c_word = word * word_len = strlen(c_word) + 1 # <<<<<<<<<<<<<< @@ -8214,7 +8815,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_v_word_len = (strlen(__pyx_v_c_word) + 1); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":121 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133 * c_word = word * word_len = strlen(c_word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -8223,7 +8824,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ fwrite((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":122 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":134 * word_len = strlen(c_word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) * fwrite(c_word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -8245,14 +8846,14 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray } /* Python wrapper */ -static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ -static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { +static PyObject *__pyx_pw_3_sa_9DataArray_25write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_25write_binary(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { char *__pyx_v_filename; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_binary (wrapper)", 0); assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -8260,12 +8861,12 @@ static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_9DataArray_20write_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); + __pyx_r = __pyx_pf_3_sa_9DataArray_24write_binary(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":124 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136 * fwrite(c_word, sizeof(char), word_len, f) * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -8273,13 +8874,13 @@ static PyObject *__pyx_pw_3_sa_9DataArray_21write_binary(PyObject *__pyx_v_self, * f = fopen(filename, "w") */ -static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { +static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { FILE *__pyx_v_f; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_binary", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":138 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -8288,7 +8889,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139 * cdef FILE* f * f = fopen(filename, "w") * self.write_handle(f) # <<<<<<<<<<<<<< @@ -8297,7 +8898,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->write_handle(__pyx_v_self, __pyx_v_f); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":140 * f = fopen(filename, "w") * self.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -8313,17 +8914,17 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20write_binary(struct __pyx_obj_3_sa_D } /* Python wrapper */ -static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f); /*proto*/ -static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f) { +static PyObject *__pyx_pw_3_sa_9DataArray_27write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_27write_enhanced_handle(PyObject *__pyx_v_self, PyObject *__pyx_v_f) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_enhanced_handle (wrapper)", 0); - __pyx_r = __pyx_pf_3_sa_9DataArray_22write_enhanced_handle(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_f)); + __pyx_r = __pyx_pf_3_sa_9DataArray_26write_enhanced_handle(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((PyObject *)__pyx_v_f)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":130 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142 * fclose(f) * * def write_enhanced_handle(self, f): # <<<<<<<<<<<<<< @@ -8331,7 +8932,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_23write_enhanced_handle(PyObject *__py * f.write("%d " %i) */ -static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f) { +static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, PyObject *__pyx_v_f) { PyObject *__pyx_v_i = NULL; PyObject *__pyx_v_word = NULL; PyObject *__pyx_r = NULL; @@ -8347,7 +8948,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced_handle", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":143 * * def write_enhanced_handle(self, f): * for i in self.data: # <<<<<<<<<<<<<< @@ -8358,7 +8959,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_t_1 = ((PyObject *)__pyx_v_self->data); __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(((PyObject *)__pyx_v_self->data)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -8368,21 +8969,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_4 = __pyx_t_3(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8392,23 +8993,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_4; __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144 * def write_enhanced_handle(self, f): * for i in self.data: * f.write("%d " %i) # <<<<<<<<<<<<<< * f.write("\n") * for i in self.sent_index: */ - __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_5)); __Pyx_GIVEREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; @@ -8416,21 +9017,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 * for i in self.data: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for i in self.sent_index: * f.write("%d " %i) */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_22), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":146 * f.write("%d " %i) * f.write("\n") * for i in self.sent_index: # <<<<<<<<<<<<<< @@ -8441,7 +9042,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_t_5 = ((PyObject *)__pyx_v_self->sent_index); __Pyx_INCREF(__pyx_t_5); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_5 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_5 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_t_3 = Py_TYPE(__pyx_t_5)->tp_iternext; } @@ -8451,21 +9052,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_5)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_2); __Pyx_INCREF(__pyx_t_1); __pyx_t_2++; #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_1 = __pyx_t_3(__pyx_t_5); if (unlikely(!__pyx_t_1)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8475,23 +9076,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":147 * f.write("\n") * for i in self.sent_index: * f.write("%d " %i) # <<<<<<<<<<<<<< * f.write("\n") * for i in self.sent_id: */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_6)); __Pyx_GIVEREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; @@ -8499,21 +9100,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":148 * for i in self.sent_index: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for i in self.sent_id: * f.write("%d " %i) */ - __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_k_tuple_20), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_k_tuple_23), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":137 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":149 * f.write("%d " %i) * f.write("\n") * for i in self.sent_id: # <<<<<<<<<<<<<< @@ -8524,7 +9125,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_t_6 = ((PyObject *)__pyx_v_self->sent_id); __Pyx_INCREF(__pyx_t_6); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_6 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_id)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_6 = PyObject_GetIter(((PyObject *)__pyx_v_self->sent_id)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_t_3 = Py_TYPE(__pyx_t_6)->tp_iternext; } @@ -8534,21 +9135,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++; #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_6)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_6)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_2); __Pyx_INCREF(__pyx_t_5); __pyx_t_2++; #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_5 = __pyx_t_3(__pyx_t_6); if (unlikely(!__pyx_t_5)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8558,23 +9159,23 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_5; __pyx_t_5 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":150 * f.write("\n") * for i in self.sent_id: * f.write("%d " %i) # <<<<<<<<<<<<<< * f.write("\n") * for word in self.id2word: */ - __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; @@ -8582,21 +9183,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":151 * for i in self.sent_id: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) */ - __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_k_tuple_21), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_k_tuple_24), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":140 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":152 * f.write("%d " %i) * f.write("\n") * for word in self.id2word: # <<<<<<<<<<<<<< @@ -8607,7 +9208,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_t_4 = __pyx_v_self->id2word; __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_self->id2word); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_self->id2word); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = Py_TYPE(__pyx_t_4)->tp_iternext; } @@ -8617,21 +9218,21 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_6 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_4)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_2); __Pyx_INCREF(__pyx_t_6); __pyx_t_2++; #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = PySequence_ITEM(__pyx_t_4, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_6 = __pyx_t_3(__pyx_t_4); if (unlikely(!__pyx_t_6)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[3]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8641,18 +9242,18 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o __pyx_v_word = __pyx_t_6; __pyx_t_6 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":141 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":153 * f.write("\n") * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) # <<<<<<<<<<<<<< * f.write("\n") * */ - __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetItem(__pyx_v_self->word2id, __pyx_v_word); if (!__pyx_t_1) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(__pyx_v_word); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_word); @@ -8660,15 +9261,15 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_22), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_25), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; @@ -8676,16 +9277,16 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":154 * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) * f.write("\n") # <<<<<<<<<<<<<< * * def write_enhanced(self, char* filename): */ - __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_23), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_26), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -8708,14 +9309,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22write_enhanced_handle(struct __pyx_o } /* Python wrapper */ -static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ -static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { +static PyObject *__pyx_pw_3_sa_9DataArray_29write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ +static PyObject *__pyx_pw_3_sa_9DataArray_29write_enhanced(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { char *__pyx_v_filename; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_enhanced (wrapper)", 0); assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -8723,12 +9324,12 @@ static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_sel __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_9DataArray_24write_enhanced(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); + __pyx_r = __pyx_pf_3_sa_9DataArray_28write_enhanced(((struct __pyx_obj_3_sa_DataArray *)__pyx_v_self), ((char *)__pyx_v_filename)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":156 * f.write("\n") * * def write_enhanced(self, char* filename): # <<<<<<<<<<<<<< @@ -8736,7 +9337,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_25write_enhanced(PyObject *__pyx_v_sel * self.write_enhanced_handle(self, f) */ -static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { +static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa_DataArray *__pyx_v_self, char *__pyx_v_filename) { PyObject *__pyx_v_f = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -8756,16 +9357,16 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< * self.write_enhanced_handle(self, f) */ /*with:*/ { - __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); @@ -8773,14 +9374,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa PyTuple_SET_ITEM(__pyx_t_2, 1, ((PyObject *)__pyx_n_s__w)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__w)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_open, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____exit__); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s____enter__); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -8795,14 +9396,14 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":158 * def write_enhanced(self, char* filename): * with open(filename, "w") as f: * self.write_enhanced_handle(self, f) # <<<<<<<<<<<<<< */ - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_24); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_27); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_self)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self)); @@ -8810,7 +9411,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa __Pyx_INCREF(__pyx_v_f); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_f); __Pyx_GIVEREF(__pyx_v_f); - __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; @@ -8825,7 +9426,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -8833,11 +9434,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa */ /*except:*/ { __Pyx_AddTraceback("_sa.DataArray.write_enhanced", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_4) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (__Pyx_GetException(&__pyx_t_2, &__pyx_t_1, &__pyx_t_4) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_GOTREF(__pyx_t_1); __Pyx_GOTREF(__pyx_t_4); - __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_INCREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_2); @@ -8850,11 +9451,11 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa __Pyx_GIVEREF(__pyx_t_4); __pyx_t_10 = PyObject_Call(__pyx_t_3, __pyx_t_8, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_t_9 = __Pyx_PyObject_IsTrue(__pyx_t_10); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + if (unlikely(__pyx_t_9 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} __pyx_t_11 = (!__pyx_t_9); if (__pyx_t_11) { __Pyx_GIVEREF(__pyx_t_2); @@ -8862,7 +9463,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa __Pyx_GIVEREF(__pyx_t_4); __Pyx_ErrRestore(__pyx_t_2, __pyx_t_1, __pyx_t_4); __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L9_except_error;} goto __pyx_L18; } __pyx_L18:; @@ -8888,13 +9489,13 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_enhanced(struct __pyx_obj_3_sa } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_25, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_28, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_7); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_11 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } goto __pyx_L19; @@ -9350,7 +9951,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * * self.sent_index = IntList(1000,1000) * if from_binary: */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_26), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_29), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->links); @@ -9365,7 +9966,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * * if from_binary: * self.read_binary(from_binary) */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_27), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_k_tuple_30), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->sent_index); @@ -9687,7 +10288,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align */ __pyx_t_3 = PyObject_GetAttr(__pyx_v_pair, __pyx_n_s__split); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_12 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_29), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_12 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_32), NULL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -9871,7 +10472,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align } /*finally:*/ { if (__pyx_t_4) { - __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_30, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_33, NULL); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -10174,7 +10775,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali */ __pyx_t_10 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_k_tuple_31), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_10, ((PyObject *)__pyx_k_tuple_34), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -10213,7 +10814,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0; - __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_32), __pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_35), __pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_12)); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -10239,7 +10840,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali */ __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_33), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_36), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -10320,7 +10921,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_34, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_37, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -10591,7 +11192,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa */ __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); @@ -10615,7 +11216,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa */ __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_35), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_38), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -10674,7 +11275,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa */ __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_10)); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); @@ -10698,7 +11299,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa */ __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_36), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_39), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; @@ -10777,7 +11378,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_37, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_40, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -11269,7 +11870,7 @@ static int __pyx_pw_3_sa_5BiLex_1__cinit__(PyObject *__pyx_v_self, PyObject *__p * self.id2eword = [] */ values[0] = ((PyObject *)Py_None); - values[1] = __pyx_k_38; + values[1] = __pyx_k_41; values[2] = ((PyObject *)Py_None); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":55 @@ -12220,7 +12821,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_5 = 0; __pyx_t_11 = 0; __pyx_t_12 = 0; - __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_39), ((PyObject *)__pyx_t_13)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_42), ((PyObject *)__pyx_t_13)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_12)); __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0; __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -14311,7 +14912,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ */ __pyx_t_12 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__seek); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_1 = PyObject_Call(__pyx_t_12, ((PyObject *)__pyx_k_tuple_40), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_12, ((PyObject *)__pyx_k_tuple_43), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -14642,7 +15243,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ } /*finally:*/ { if (__pyx_t_4) { - __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_41, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_4, __pyx_k_tuple_44, NULL); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -14714,7 +15315,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ */ __pyx_t_20 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_20 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_24 = __Pyx_PyInt_AsInt(__pyx_v_j); if (unlikely((__pyx_t_24 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = ((PyObject *)__pyx_kp_s_42); + __pyx_t_3 = ((PyObject *)__pyx_kp_s_45); __Pyx_INCREF(__pyx_t_3); __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, __pyx_t_20, __pyx_t_24, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -14937,7 +15538,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ * if i == j: #empty interval * return */ - __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_44), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_47), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -15094,7 +15695,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ * self.qsort(p+1,j, pad+" ") * */ - __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, __pyx_v_i, __pyx_v_p, __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -15108,7 +15709,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ * * */ - __pyx_t_4 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_BiLex *)__pyx_v_self->__pyx_vtab)->qsort(__pyx_v_self, (__pyx_v_p + 1), __pyx_v_j, __pyx_t_4); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -15280,7 +15881,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL */ __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); @@ -15304,7 +15905,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL */ __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_46), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_49), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -15450,7 +16051,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_INCREF(__pyx_v_s2); PyTuple_SET_ITEM(__pyx_t_11, 2, __pyx_v_s2); __Pyx_GIVEREF(__pyx_v_s2); - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_47), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_50), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -15475,7 +16076,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL */ __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_48), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_51), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -15552,7 +16153,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_INCREF(__pyx_v_w); PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_w); __Pyx_GIVEREF(__pyx_v_w); - __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_49), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_52), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_10)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -15578,7 +16179,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL */ __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_50), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_53), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -15655,7 +16256,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_INCREF(__pyx_v_w); PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_w); __Pyx_GIVEREF(__pyx_v_w); - __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_49), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_11 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_52), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -15681,7 +16282,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL */ __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_51), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_54), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -15762,7 +16363,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_52, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_55, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -16448,7 +17049,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __Pyx_GIVEREF(__pyx_v_score2); __pyx_t_4 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_53), ((PyObject *)__pyx_t_12)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_56), ((PyObject *)__pyx_t_12)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_DECREF(((PyObject *)__pyx_t_12)); __pyx_t_12 = 0; __pyx_t_12 = PyTuple_New(1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L7_error;} @@ -16551,7 +17152,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_54, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_57, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -17543,7 +18144,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_10__str__(struct __pyx_obj_3_sa_BitSet *_ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = ((PyObject *)__pyx_f_3_sa_dec2bin(__pyx_v_self->b->bitset)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_55)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_58)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyInt_FromLong(__pyx_v_self->b->size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -17594,7 +18195,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_10__str__(struct __pyx_obj_3_sa_BitSet *_ __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_56)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_1, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; @@ -17848,8 +18449,8 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { * cdef unsigned d * for d in range(MIN_BOTTOM_SIZE): */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_42)); - __pyx_v_result = __pyx_kp_s_42; + __Pyx_INCREF(((PyObject *)__pyx_kp_s_45)); + __pyx_v_result = __pyx_kp_s_45; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":160 * cdef str result = "" @@ -19908,7 +20509,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_58), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_61), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -20134,7 +20735,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_60), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_63), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -20184,14 +20785,14 @@ static PyObject *__pyx_pw_3_sa_3LCP_3compute_stats(PyObject *__pyx_v_self, PyObj */ static PyObject *__pyx_pf_3_sa_3LCP_2compute_stats(struct __pyx_obj_3_sa_LCP *__pyx_v_self, int __pyx_v_max_n) { - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *__pyx_cur_scope; + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("compute_stats", 0); - __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)__pyx_ptype_3_sa___pyx_scope_struct__compute_stats->tp_new(__pyx_ptype_3_sa___pyx_scope_struct__compute_stats, __pyx_empty_tuple, NULL); + __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_cur_scope)) { __Pyx_RefNannyFinishContext(); return NULL; @@ -20222,7 +20823,7 @@ static PyObject *__pyx_pf_3_sa_3LCP_2compute_stats(struct __pyx_obj_3_sa_LCP *__ static PyObject *__pyx_gb_3_sa_3LCP_4generator(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)__pyx_generator->closure); + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)__pyx_generator->closure); PyObject *__pyx_r = NULL; PyObject *__pyx_t_1 = NULL; int __pyx_t_2; @@ -21263,7 +21864,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p __Pyx_GIVEREF(__pyx_t_5); __pyx_t_2 = 0; __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_61), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_64), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; if (unlikely(((PyObject *)__pyx_v_self->id2sym) == Py_None)) { @@ -21285,7 +21886,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_t_5 = PyBytes_FromString(((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_self->__pyx_vtab)->tocat(__pyx_v_self, __pyx_v_sym)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); - __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_62), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_65), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; if (unlikely(((PyObject *)__pyx_v_self->id2sym) == Py_None)) { @@ -21435,7 +22036,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_t_6 = PyBytes_FromString(__pyx_v_s); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_7 = PyNumber_Add(((PyObject *)__pyx_kp_s_63), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(((PyObject *)__pyx_kp_s_66), ((PyObject *)__pyx_t_6)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; __pyx_v_s1 = __pyx_t_7; @@ -22221,7 +22822,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ * def handle(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -22558,7 +23159,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * * def arity(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); @@ -23551,14 +24152,14 @@ static PyObject *__pyx_pw_3_sa_6Phrase_29__iter__(PyObject *__pyx_v_self) { */ static PyObject *__pyx_pf_3_sa_6Phrase_28__iter__(struct __pyx_obj_3_sa_Phrase *__pyx_v_self) { - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *__pyx_cur_scope; + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__iter__", 0); - __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)__pyx_ptype_3_sa___pyx_scope_struct_1___iter__->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_1___iter__, __pyx_empty_tuple, NULL); + __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)__pyx_ptype_3_sa___pyx_scope_struct_3___iter__->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_3___iter__, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_cur_scope)) { __Pyx_RefNannyFinishContext(); return NULL; @@ -23588,7 +24189,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_28__iter__(struct __pyx_obj_3_sa_Phrase * static PyObject *__pyx_gb_3_sa_6Phrase_30generator1(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)__pyx_generator->closure); + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)__pyx_generator->closure); PyObject *__pyx_r = NULL; int __pyx_t_1; PyObject *__pyx_t_2 = NULL; @@ -24076,7 +24677,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel */ __pyx_t_2 = PyInt_FromLong(__pyx_v_lhs); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_65), __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_68), __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -24806,7 +25407,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -24894,7 +25495,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx __Pyx_GIVEREF(__pyx_t_6); __pyx_t_8 = 0; __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_66), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_69), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; __pyx_t_4 = PyList_Append(__pyx_v_alignstr, ((PyObject *)__pyx_t_6)); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -24908,7 +25509,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx * * return " ||| ".join(fields) */ - __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_64), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -24933,7 +25534,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_14__str__(struct __pyx_obj_3_sa_Rule *__pyx * property scores: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_67), __pyx_n_s__join); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_18), __pyx_n_s__join); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -27041,7 +27642,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__fsarray,&__pyx_n_s__from_stats,&__pyx_n_s__from_binary,&__pyx_n_s__precompute_rank,&__pyx_n_s_68,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s_69,&__pyx_n_s__train_min_gap_size,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__fsarray,&__pyx_n_s__from_stats,&__pyx_n_s__from_binary,&__pyx_n_s__precompute_rank,&__pyx_n_s_70,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,0}; PyObject* values[9] = {0,0,0,0,0,0,0,0,0}; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":200 @@ -27100,7 +27701,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO } case 4: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_68); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_70); if (value) { values[4] = value; kw_args--; } } case 5: @@ -27115,7 +27716,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO } case 7: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_69); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_71); if (value) { values[7] = value; kw_args--; } } case 8: @@ -28321,7 +28922,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_71), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_k_tuple_73), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -28622,7 +29223,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__info); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_73), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_75), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -28765,7 +29366,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__info); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_75), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_77), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -29344,9 +29945,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_3); __pyx_t_8 = PyTuple_New(2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_76)); - PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_76)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_76)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_78)); + PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_78)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_78)); PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; @@ -29504,7 +30105,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * J2_set.add(combined_pattern) * */ - __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_77)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_79)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_t_7 = PyNumber_Add(__pyx_t_8, __pyx_v_pattern2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -29613,7 +30214,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * IJ_set.add(combined_pattern) * */ - __pyx_t_7 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_78)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_80)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_t_8 = PyNumber_Add(__pyx_t_7, __pyx_v_pattern2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); @@ -29722,7 +30323,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * IJ_set.add(combined_pattern) * combined_pattern = pattern2 + (-1,) + pattern1 */ - __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_79)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_v_pattern1, ((PyObject *)__pyx_k_tuple_81)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_t_7 = PyNumber_Add(__pyx_t_8, __pyx_v_pattern2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -29747,7 +30348,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * IJ_set.add(combined_pattern) * */ - __pyx_t_7 = PyNumber_Add(__pyx_v_pattern2, ((PyObject *)__pyx_k_tuple_80)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_v_pattern2, ((PyObject *)__pyx_k_tuple_82)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_t_8 = PyNumber_Add(__pyx_t_7, __pyx_v_pattern1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); @@ -29867,9 +30468,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * for word_id in pattern: * if word_id == -1: */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_42)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_45)); __Pyx_XDECREF(__pyx_v_s); - __pyx_v_s = ((PyObject *)__pyx_kp_s_42); + __pyx_v_s = ((PyObject *)__pyx_kp_s_45); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":418 * if pattern not in IJ_set: @@ -29936,7 +30537,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s * else: * s = s + darray.id2word[word_id] + " " */ - __pyx_t_3 = PyNumber_Add(__pyx_v_s, ((PyObject *)__pyx_kp_s_81)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Add(__pyx_v_s, ((PyObject *)__pyx_kp_s_83)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_v_s); __pyx_v_s = __pyx_t_3; @@ -29957,7 +30558,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_7 = PyNumber_Add(__pyx_v_s, __pyx_t_3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Add(__pyx_t_7, ((PyObject *)__pyx_kp_s_64)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Add(__pyx_t_7, ((PyObject *)__pyx_kp_s_67)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_v_s); @@ -29982,9 +30583,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_t_8 = PyTuple_New(2); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_82)); - PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_82)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_82)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_84)); + PyTuple_SET_ITEM(__pyx_t_8, 0, ((PyObject *)__pyx_kp_s_84)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_84)); __Pyx_INCREF(__pyx_v_s); PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_v_s); __Pyx_GIVEREF(__pyx_v_s); @@ -30320,9 +30921,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_8); __pyx_t_7 = PyTuple_New(6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_83)); - PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_83)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_83)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_85)); + PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_85)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_85)); PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_3); @@ -30443,9 +31044,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_8); __pyx_t_6 = PyTuple_New(4); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_84)); - PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_84)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_84)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_86)); + PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_86)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_86)); __Pyx_INCREF(__pyx_v_num_found_patterns); PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_v_num_found_patterns); __Pyx_GIVEREF(__pyx_v_num_found_patterns); @@ -30480,9 +31081,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_8); __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_85)); - PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_85)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_85)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_87)); + PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_kp_s_87)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_87)); PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; @@ -30506,9 +31107,9 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_8); __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_86)); - PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_86)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_86)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_88)); + PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_88)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_88)); PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; @@ -30570,26 +31171,29 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_from_binary = 0; PyObject *__pyx_v_from_text = 0; + PyObject *__pyx_v_side = 0; int __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,0}; - PyObject* values[2] = {0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,0}; + PyObject* values[3] = {0,0,0}; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":11 * cdef IntList ha * - * def __cinit__(self, from_binary=None, from_text=None): # <<<<<<<<<<<<<< + * def __cinit__(self, from_binary=None, from_text=None, side=None): # <<<<<<<<<<<<<< * self.darray = DataArray() * self.sa = IntList() */ values[0] = ((PyObject *)Py_None); values[1] = ((PyObject *)Py_None); + values[2] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; @@ -30607,12 +31211,18 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__from_text); if (value) { values[1] = value; kw_args--; } } + case 2: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side); + if (value) { values[2] = value; kw_args--; } + } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; @@ -30621,21 +31231,22 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje } __pyx_v_from_binary = values[0]; __pyx_v_from_text = values[1]; + __pyx_v_side = values[2]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.SuffixArray.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_11SuffixArray___cinit__(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text); + __pyx_r = __pyx_pf_3_sa_11SuffixArray___cinit__(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_from_binary, __pyx_v_from_text, __pyx_v_side); __Pyx_RefNannyFinishContext(); return __pyx_r; } -static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text) { +static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_from_binary, PyObject *__pyx_v_from_text, PyObject *__pyx_v_side) { int __pyx_r; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -30649,7 +31260,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":12 * - * def __cinit__(self, from_binary=None, from_text=None): + * def __cinit__(self, from_binary=None, from_text=None, side=None): * self.darray = DataArray() # <<<<<<<<<<<<<< * self.sa = IntList() * self.ha = IntList() @@ -30663,7 +31274,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_t_1 = 0; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":13 - * def __cinit__(self, from_binary=None, from_text=None): + * def __cinit__(self, from_binary=None, from_text=None, side=None): * self.darray = DataArray() * self.sa = IntList() # <<<<<<<<<<<<<< * self.ha = IntList() @@ -30707,7 +31318,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< * elif from_text: - * self.read_text(from_text) + * self.read_text(from_text, side) */ __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_binary); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -30728,7 +31339,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr * if from_binary: * self.read_binary(from_binary) * elif from_text: # <<<<<<<<<<<<<< - * self.read_text(from_text) + * self.read_text(from_text, side) * */ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -30737,17 +31348,20 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":18 * self.read_binary(from_binary) * elif from_text: - * self.read_text(from_text) # <<<<<<<<<<<<<< + * self.read_text(from_text, side) # <<<<<<<<<<<<<< * * def __getitem__(self, i): */ __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__read_text); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_v_from_text); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_from_text); __Pyx_GIVEREF(__pyx_v_from_text); + __Pyx_INCREF(__pyx_v_side); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_v_side); + __Pyx_GIVEREF(__pyx_v_side); __pyx_t_1 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -30782,7 +31396,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_3__getitem__(PyObject *__pyx_v_self } /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":20 - * self.read_text(from_text) + * self.read_text(from_text, side) * * def __getitem__(self, i): # <<<<<<<<<<<<<< * return self.sa.arr[i] @@ -30995,7 +31609,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_S * def getSentPos(self, loc): * return self.darray.getSentPos(loc) # <<<<<<<<<<<<<< * - * def read_text(self, char* filename): + * def read_text(self, filename, side): */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s__getSentPos); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -31028,23 +31642,58 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_8getSentPos(struct __pyx_obj_3_sa_S } /* Python wrapper */ -static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename); /*proto*/ +static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static char __pyx_doc_3_sa_11SuffixArray_10read_text[] = "Constructs suffix array using the algorithm\n of Larsson & Sadahkane (1999)"; -static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_arg_filename) { - char *__pyx_v_filename; +static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_filename = 0; + PyObject *__pyx_v_side = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_text (wrapper)", 0); - assert(__pyx_arg_filename); { - __pyx_v_filename = PyBytes_AsString(__pyx_arg_filename); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__filename,&__pyx_n_s__side,0}; + PyObject* values[2] = {0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__filename)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__side)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("read_text", 1, 2, 2, 1); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "read_text") < 0)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + } + __pyx_v_filename = values[0]; + __pyx_v_side = values[1]; } goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("read_text", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[13]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.SuffixArray.read_text", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_3_sa_11SuffixArray_10read_text(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), ((char *)__pyx_v_filename)); + __pyx_r = __pyx_pf_3_sa_11SuffixArray_10read_text(((struct __pyx_obj_3_sa_SuffixArray *)__pyx_v_self), __pyx_v_filename, __pyx_v_side); __Pyx_RefNannyFinishContext(); return __pyx_r; } @@ -31052,12 +31701,12 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_text(PyObject *__pyx_v_self, /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":32 * return self.darray.getSentPos(loc) * - * def read_text(self, char* filename): # <<<<<<<<<<<<<< + * def read_text(self, filename, side): # <<<<<<<<<<<<<< * '''Constructs suffix array using the algorithm * of Larsson & Sadahkane (1999)''' */ -static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, char *__pyx_v_filename) { +static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_SuffixArray *__pyx_v_self, PyObject *__pyx_v_filename, PyObject *__pyx_v_side) { int __pyx_v_V; int __pyx_v_N; int __pyx_v_i; @@ -31092,16 +31741,14 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":38 * cdef IntList isa, word_count * - * self.darray = DataArray(from_text=filename, use_sent_id=True) # <<<<<<<<<<<<<< + * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) # <<<<<<<<<<<<<< * N = len(self.darray) * V = len(self.darray.id2word) */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__from_text), ((PyObject *)__pyx_t_2)) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__from_text), __pyx_v_filename) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__side), __pyx_v_side) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__use_sent_id), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -31117,7 +31764,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":39 * - * self.darray = DataArray(from_text=filename, use_sent_id=True) + * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) * N = len(self.darray) # <<<<<<<<<<<<<< * V = len(self.darray.id2word) * @@ -31129,7 +31776,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __pyx_v_N = __pyx_t_3; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":40 - * self.darray = DataArray(from_text=filename, use_sent_id=True) + * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) * N = len(self.darray) * V = len(self.darray.id2word) # <<<<<<<<<<<<<< * @@ -31461,9 +32108,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __Pyx_GOTREF(__pyx_t_2); __pyx_t_9 = PyTuple_New(2); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_87)); - PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_kp_s_87)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_87)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_89)); + PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_kp_s_89)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_89)); PyTuple_SET_ITEM(__pyx_t_9, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; @@ -31518,9 +32165,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __Pyx_GOTREF(__pyx_t_2); __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_88)); - PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_88)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_88)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_90)); + PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_kp_s_90)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_90)); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; @@ -31724,9 +32371,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __Pyx_GOTREF(__pyx_t_10); __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_89)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_89)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_89)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_91)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_91)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_91)); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; @@ -31749,7 +32396,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __pyx_t_2 = PyObject_GetAttr(__pyx_t_10, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_91), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_93), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; @@ -31799,9 +32446,9 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_text(struct __pyx_obj_3_sa_S __Pyx_GOTREF(__pyx_t_10); __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_92)); - PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_kp_s_92)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_92)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_94)); + PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_kp_s_94)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_94)); PyTuple_SET_ITEM(__pyx_t_11, 1, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; @@ -31844,7 +32491,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_13q3sort(PyObject *__pyx_v_self, Py { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__i,&__pyx_n_s__j,&__pyx_n_s__h,&__pyx_n_s__isa,&__pyx_n_s__pad,0}; PyObject* values[5] = {0,0,0,0,0}; - values[4] = ((PyObject *)__pyx_kp_s_42); + values[4] = ((PyObject *)__pyx_kp_s_45); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); @@ -31980,7 +32627,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff __Pyx_GIVEREF(__pyx_t_3); __pyx_t_2 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_93), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_95), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -32339,7 +32986,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff __Pyx_GOTREF(__pyx_t_2); __pyx_t_6 = PyInt_FromLong(__pyx_v_h); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_t_8 = PyTuple_New(5); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); @@ -32421,7 +33068,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12q3sort(struct __pyx_obj_3_sa_Suff __Pyx_GOTREF(__pyx_t_3); __pyx_t_6 = PyInt_FromLong(__pyx_v_h); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_45)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_pad, ((PyObject *)__pyx_kp_s_48)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = PyTuple_New(5); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -32811,7 +33458,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 * for a_i in self.sa: * f.write("%d " % a_i) */ - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s_24); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self->darray), __pyx_n_s_27); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); @@ -32878,7 +33525,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 */ __pyx_t_1 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_a_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_a_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __pyx_t_10 = PyTuple_New(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); @@ -32902,7 +33549,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 */ __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_94), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_96), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -32961,7 +33608,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 */ __pyx_t_2 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_v_w_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_v_w_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_10)); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_1); @@ -32985,7 +33632,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 */ __pyx_t_4 = PyObject_GetAttr(__pyx_v_f, __pyx_n_s__write); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_95), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_97), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; @@ -33064,7 +33711,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_20write_enhanced(struct __pyx_obj_3 } /*finally:*/ { if (__pyx_t_3) { - __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_96, NULL); + __pyx_t_7 = PyObject_Call(__pyx_t_3, __pyx_k_tuple_98, NULL); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -34267,7 +34914,7 @@ static int __pyx_pw_3_sa_9TrieTable_1__cinit__(PyObject *__pyx_v_self, PyObject { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__extended,0}; PyObject* values[1] = {0}; - values[0] = __pyx_k_97; + values[0] = __pyx_k_99; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); @@ -35014,9 +35661,9 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_98)); - PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_98)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_98)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_100)); + PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_kp_s_100)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_100)); PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; @@ -35041,7 +35688,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_100), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_k_tuple_102), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -35829,7 +36476,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__alignment,&__pyx_n_s__by_slack_factor,&__pyx_n_s__category,&__pyx_n_s__max_chunks,&__pyx_n_s__max_initial_size,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s__max_target_chunks,&__pyx_n_s__max_target_length,&__pyx_n_s__min_gap_size,&__pyx_n_s__precompute_file,&__pyx_n_s_68,&__pyx_n_s__precompute_rank,&__pyx_n_s_101,&__pyx_n_s_102,&__pyx_n_s_69,&__pyx_n_s__train_min_gap_size,&__pyx_n_s__tight_phrases,&__pyx_n_s__use_baeza_yates,&__pyx_n_s__use_collocations,&__pyx_n_s__use_index,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__alignment,&__pyx_n_s__by_slack_factor,&__pyx_n_s__category,&__pyx_n_s__max_chunks,&__pyx_n_s__max_initial_size,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s__max_target_chunks,&__pyx_n_s__max_target_length,&__pyx_n_s__min_gap_size,&__pyx_n_s__precompute_file,&__pyx_n_s_70,&__pyx_n_s__precompute_rank,&__pyx_n_s_103,&__pyx_n_s_104,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,&__pyx_n_s__tight_phrases,&__pyx_n_s__use_baeza_yates,&__pyx_n_s__use_collocations,&__pyx_n_s__use_index,0}; PyObject* values[21] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":257 @@ -35952,7 +36599,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ } case 11: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_68); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_70); if (value) { values[11] = value; kw_args--; } } case 12: @@ -35962,17 +36609,17 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ } case 13: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_101); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_103); if (value) { values[13] = value; kw_args--; } } case 14: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_102); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_104); if (value) { values[14] = value; kw_args--; } } case 15: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_69); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_71); if (value) { values[15] = value; kw_args--; } } case 16: @@ -36048,7 +36695,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ if (values[2]) { __pyx_v_category = PyBytes_AsString(values[2]); if (unlikely((!__pyx_v_category) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - __pyx_v_category = ((char *)__pyx_k_103); + __pyx_v_category = ((char *)__pyx_k_105); } __pyx_v_max_chunks = values[3]; if (values[4]) { @@ -36275,7 +36922,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ * self.alignment = alignment * */ - __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_105), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_107), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -37756,9 +38403,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_106)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_106)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_106)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_108)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_108)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_108)); __Pyx_INCREF(__pyx_v_self->precompute_file); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_self->precompute_file); __Pyx_GIVEREF(__pyx_v_self->precompute_file); @@ -37812,9 +38459,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_107)); - PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_107)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_107)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_109)); + PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_109)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_109)); PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3); @@ -37858,9 +38505,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GOTREF(__pyx_t_4); __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_108)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_108)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_108)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_110)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_110)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_110)); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_2, 2, __pyx_t_4); @@ -37905,7 +38552,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_2); __pyx_t_4 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_109), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_111), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -37952,7 +38599,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_5); __pyx_t_2 = 0; __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_110), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_112), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -37999,9 +38646,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GOTREF(__pyx_t_5); __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_111)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_111)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_111)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_113)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_113)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_113)); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; @@ -38153,9 +38800,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GOTREF(__pyx_t_5); __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_112)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_112)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_112)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_114)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_114)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_114)); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; @@ -38259,9 +38906,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GOTREF(__pyx_t_5); __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_113)); - PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_113)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_113)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_115)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_115)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_115)); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; @@ -40981,8 +41628,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st * i = 0 * while i < loc.arr_high: */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_114)); - __pyx_v_result = ((PyObject *)__pyx_kp_s_114); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_116)); + __pyx_v_result = ((PyObject *)__pyx_kp_s_116); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":793 * cdef int i, j @@ -41011,7 +41658,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st * for j from i <= j < i + loc.num_subpatterns: * result = result + ("%d " %loc.arr[j]) */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_115)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_117)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; @@ -41036,7 +41683,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st */ __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_loc->arr), __pyx_v_j, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_18), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -41054,7 +41701,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st * i = i + loc.num_subpatterns * result = result + "}" */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_56)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; @@ -41077,7 +41724,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st * return result * */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_116)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_118)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 800; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; @@ -41194,7 +41841,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact * if result is not None: * intersect_method = "precomputed" */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_117); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_119); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -41272,9 +41919,9 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact * else: * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, MERGE) */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_118)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_120)); __Pyx_XDECREF(__pyx_v_intersect_method); - __pyx_v_intersect_method = ((PyObject *)__pyx_kp_s_118); + __pyx_v_intersect_method = ((PyObject *)__pyx_kp_s_120); goto __pyx_L5; } /*else*/ { @@ -43798,14 +44445,14 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_23input(PyObject *__pyx */ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_22input(struct __pyx_obj_3_sa_HieroCachingRuleFactory *__pyx_v_self, PyObject *__pyx_v_fwords, PyObject *__pyx_v_models) { - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *__pyx_cur_scope; + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("input", 0); - __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)__pyx_ptype_3_sa___pyx_scope_struct_2_input->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_2_input, __pyx_empty_tuple, NULL); + __pyx_cur_scope = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)__pyx_ptype_3_sa___pyx_scope_struct_4_input->tp_new(__pyx_ptype_3_sa___pyx_scope_struct_4_input, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_cur_scope)) { __Pyx_RefNannyFinishContext(); return NULL; @@ -43841,7 +44488,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_22input(struct __pyx_ob static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)__pyx_generator->closure); + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *__pyx_cur_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)__pyx_generator->closure); PyObject *__pyx_r = NULL; Py_ssize_t __pyx_t_1; PyObject *__pyx_t_2 = NULL; @@ -44879,7 +45526,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene * # checking whether lookup_required * if lookup_required: */ - __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_120), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_122), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -46643,7 +47290,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene * nodes_isteps_away_buffer[key] = frontier_nodes * */ - __pyx_t_10 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_121); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_123); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); @@ -46907,9 +47554,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_122)); - PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_kp_s_122)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_122)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_124)); + PyTuple_SET_ITEM(__pyx_t_12, 0, ((PyObject *)__pyx_kp_s_124)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_124)); PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = 0; @@ -46952,9 +47599,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator2(__pyx_Gene __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_123)); - PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_kp_s_123)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_123)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_125)); + PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_kp_s_125)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_125)); PyTuple_SET_ITEM(__pyx_t_15, 1, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = 0; @@ -49754,8 +50401,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * * for i from 0 <= i < e_sent_len: */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_42)); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_42); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_45)); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_45); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1446 * reason_for_failure = "" @@ -50129,9 +50776,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * met_constraints = 0 * if self.require_aligned_chunks and num_aligned_chunks < num_chunks: */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_124)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_126)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_124); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1486 * if num_aligned_chunks == 0: @@ -50167,9 +50814,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * met_constraints = 0 * */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_125)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_127)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_125); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_127); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1489 * if self.require_aligned_chunks and num_aligned_chunks < num_chunks: @@ -50227,9 +50874,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * met_constraints = 0 * break */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_126)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_128)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1496 * if f_links_low[matching.arr[matching.start+i]+chunklen[i]-f_sent_start] == -1: @@ -50269,9 +50916,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * met_constraints = 0 * break */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_126)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_128)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1500 * if f_links_low[matching.arr[matching.start+i+1]-1-f_sent_start] == -1: @@ -50483,9 +51130,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * else: * gap_start = 1 */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_127)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_129)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_127); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_129); goto __pyx_L38; } __pyx_L38:; @@ -50681,9 +51328,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * else: * if self.tight_phrases and f_links_low[f_high-1] == -1: */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_128)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_130)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_130); goto __pyx_L45; } __pyx_L45:; @@ -50801,7 +51448,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_1); __pyx_t_10 = 0; __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_129), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_131), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_v_reason_for_failure); @@ -51117,7 +51764,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_130), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_132), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_14)); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_v_reason_for_failure); @@ -53335,9 +53982,9 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * * free(sent_links) */ - __Pyx_INCREF(((PyObject *)__pyx_kp_s_131)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_133)); __Pyx_DECREF(__pyx_v_reason_for_failure); - __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_131); + __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_133); } __pyx_L34:; goto __pyx_L33; @@ -54631,10 +55278,12 @@ static PyMethodDef __pyx_methods_3_sa_DataArray[] = { {__Pyx_NAMESTR("get_word"), (PyCFunction)__pyx_pw_3_sa_9DataArray_13get_word, METH_O, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("write_text"), (PyCFunction)__pyx_pw_3_sa_9DataArray_15write_text, METH_O, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_9DataArray_17read_text, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_19read_binary, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("write_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_21write_binary, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("write_enhanced_handle"), (PyCFunction)__pyx_pw_3_sa_9DataArray_23write_enhanced_handle, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("write_enhanced"), (PyCFunction)__pyx_pw_3_sa_9DataArray_25write_enhanced, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("read_bitext"), (PyCFunction)__pyx_pw_3_sa_9DataArray_19read_bitext, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("read_text_data"), (PyCFunction)__pyx_pw_3_sa_9DataArray_21read_text_data, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_23read_binary, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("write_binary"), (PyCFunction)__pyx_pw_3_sa_9DataArray_25write_binary, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("write_enhanced_handle"), (PyCFunction)__pyx_pw_3_sa_9DataArray_27write_enhanced_handle, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("write_enhanced"), (PyCFunction)__pyx_pw_3_sa_9DataArray_29write_enhanced, METH_O, __Pyx_DOCSTR(0)}, {0, 0, 0, 0} }; @@ -56869,7 +57518,7 @@ static PyMethodDef __pyx_methods_3_sa_SuffixArray[] = { {__Pyx_NAMESTR("getSentId"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_5getSentId, METH_O, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("getSent"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_7getSent, METH_O, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("getSentPos"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_9getSentPos, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_11read_text, METH_O, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_10read_text)}, + {__Pyx_NAMESTR("read_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_11read_text, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_10read_text)}, {__Pyx_NAMESTR("q3sort"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_13q3sort, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_11SuffixArray_12q3sort)}, {__Pyx_NAMESTR("write_text"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_15write_text, METH_O, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("read_binary"), (PyCFunction)__pyx_pw_3_sa_11SuffixArray_17read_binary, METH_O, __Pyx_DOCSTR(0)}, @@ -58040,7 +58689,517 @@ static PyNumberMethods __pyx_tp_as_number_Sampler = { #endif }; -static PySequenceMethods __pyx_tp_as_sequence_Sampler = { +static PySequenceMethods __pyx_tp_as_sequence_Sampler = { + 0, /*sq_length*/ + 0, /*sq_concat*/ + 0, /*sq_repeat*/ + 0, /*sq_item*/ + 0, /*sq_slice*/ + 0, /*sq_ass_item*/ + 0, /*sq_ass_slice*/ + 0, /*sq_contains*/ + 0, /*sq_inplace_concat*/ + 0, /*sq_inplace_repeat*/ +}; + +static PyMappingMethods __pyx_tp_as_mapping_Sampler = { + 0, /*mp_length*/ + 0, /*mp_subscript*/ + 0, /*mp_ass_subscript*/ +}; + +static PyBufferProcs __pyx_tp_as_buffer_Sampler = { + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getreadbuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getwritebuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getsegcount*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getcharbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_getbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_releasebuffer*/ + #endif +}; + +static PyTypeObject __pyx_type_3_sa_Sampler = { + PyVarObject_HEAD_INIT(0, 0) + __Pyx_NAMESTR("_sa.Sampler"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa_Sampler), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_3_sa_Sampler, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #else + 0, /*reserved*/ + #endif + 0, /*tp_repr*/ + &__pyx_tp_as_number_Sampler, /*tp_as_number*/ + &__pyx_tp_as_sequence_Sampler, /*tp_as_sequence*/ + &__pyx_tp_as_mapping_Sampler, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &__pyx_tp_as_buffer_Sampler, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + __Pyx_DOCSTR("A Sampler implements a logic for choosing\n samples from a population range"), /*tp_doc*/ + __pyx_tp_traverse_3_sa_Sampler, /*tp_traverse*/ + __pyx_tp_clear_3_sa_Sampler, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_3_sa_Sampler, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_3_sa_Sampler, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + #if PY_VERSION_HEX >= 0x02060000 + 0, /*tp_version_tag*/ + #endif +}; +static struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory __pyx_vtable_3_sa_HieroCachingRuleFactory; + +static PyObject *__pyx_tp_new_3_sa_HieroCachingRuleFactory(PyTypeObject *t, PyObject *a, PyObject *k) { + struct __pyx_obj_3_sa_HieroCachingRuleFactory *p; + PyObject *o = (*t->tp_alloc)(t, 0); + if (!o) return 0; + p = ((struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o); + p->__pyx_vtab = __pyx_vtabptr_3_sa_HieroCachingRuleFactory; + p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None); + p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None); + p->precomputed_index = Py_None; Py_INCREF(Py_None); + p->precomputed_collocations = Py_None; Py_INCREF(Py_None); + p->precompute_file = Py_None; Py_INCREF(Py_None); + p->max_rank = Py_None; Py_INCREF(Py_None); + p->prev_norm_prefix = Py_None; Py_INCREF(Py_None); + p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None); + p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); + p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); + p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None); + p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + if (__pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(o, a, k) < 0) { + Py_DECREF(o); o = 0; + } + return o; +} + +static void __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory(PyObject *o) { + struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; + Py_XDECREF(((PyObject *)p->rules)); + Py_XDECREF(((PyObject *)p->sampler)); + Py_XDECREF(p->precomputed_index); + Py_XDECREF(p->precomputed_collocations); + Py_XDECREF(p->precompute_file); + Py_XDECREF(p->max_rank); + Py_XDECREF(p->prev_norm_prefix); + Py_XDECREF(((PyObject *)p->fsa)); + Py_XDECREF(((PyObject *)p->fda)); + Py_XDECREF(((PyObject *)p->eda)); + Py_XDECREF(((PyObject *)p->alignment)); + Py_XDECREF(((PyObject *)p->eid2symid)); + Py_XDECREF(((PyObject *)p->fid2symid)); + Py_XDECREF(((PyObject *)p->findexes)); + Py_XDECREF(((PyObject *)p->findexes1)); + (*Py_TYPE(o)->tp_free)(o); +} + +static int __pyx_tp_traverse_3_sa_HieroCachingRuleFactory(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; + if (p->rules) { + e = (*v)(((PyObject*)p->rules), a); if (e) return e; + } + if (p->sampler) { + e = (*v)(((PyObject*)p->sampler), a); if (e) return e; + } + if (p->precomputed_index) { + e = (*v)(p->precomputed_index, a); if (e) return e; + } + if (p->precomputed_collocations) { + e = (*v)(p->precomputed_collocations, a); if (e) return e; + } + if (p->precompute_file) { + e = (*v)(p->precompute_file, a); if (e) return e; + } + if (p->max_rank) { + e = (*v)(p->max_rank, a); if (e) return e; + } + if (p->prev_norm_prefix) { + e = (*v)(p->prev_norm_prefix, a); if (e) return e; + } + if (p->fsa) { + e = (*v)(((PyObject*)p->fsa), a); if (e) return e; + } + if (p->fda) { + e = (*v)(((PyObject*)p->fda), a); if (e) return e; + } + if (p->eda) { + e = (*v)(((PyObject*)p->eda), a); if (e) return e; + } + if (p->alignment) { + e = (*v)(((PyObject*)p->alignment), a); if (e) return e; + } + if (p->eid2symid) { + e = (*v)(((PyObject*)p->eid2symid), a); if (e) return e; + } + if (p->fid2symid) { + e = (*v)(((PyObject*)p->fid2symid), a); if (e) return e; + } + if (p->findexes) { + e = (*v)(((PyObject*)p->findexes), a); if (e) return e; + } + if (p->findexes1) { + e = (*v)(((PyObject*)p->findexes1), a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_3_sa_HieroCachingRuleFactory(PyObject *o) { + struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; + PyObject* tmp; + tmp = ((PyObject*)p->rules); + p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->sampler); + p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->precomputed_index); + p->precomputed_index = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->precomputed_collocations); + p->precomputed_collocations = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->precompute_file); + p->precompute_file = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->max_rank); + p->max_rank = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->prev_norm_prefix); + p->prev_norm_prefix = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->fsa); + p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->fda); + p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->eda); + p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->alignment); + p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->eid2symid); + p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->fid2symid); + p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->findexes); + p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->findexes1); + p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyMethodDef __pyx_methods_3_sa_HieroCachingRuleFactory[] = { + {__Pyx_NAMESTR("configure"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_2configure)}, + {__Pyx_NAMESTR("pattern2phrase"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_5pattern2phrase, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("pattern2phrase_plus"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_7pattern2phrase_plus, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("precompute"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_9precompute, METH_NOARGS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("get_precomputed_collocation"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_11get_precomputed_collocation, METH_O, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("advance"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("get_all_nodes_isteps_away"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_away, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("reachable"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("shortest"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("get_next_states"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("input"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_23input, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_22input)}, + {0, 0, 0, 0} +}; + +static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_divide*/ + #endif + 0, /*nb_remainder*/ + 0, /*nb_divmod*/ + 0, /*nb_power*/ + 0, /*nb_negative*/ + 0, /*nb_positive*/ + 0, /*nb_absolute*/ + 0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_coerce*/ + #endif + 0, /*nb_int*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_long*/ + #else + 0, /*reserved*/ + #endif + 0, /*nb_float*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_oct*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*nb_hex*/ + #endif + 0, /*nb_inplace_add*/ + 0, /*nb_inplace_subtract*/ + 0, /*nb_inplace_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_inplace_divide*/ + #endif + 0, /*nb_inplace_remainder*/ + 0, /*nb_inplace_power*/ + 0, /*nb_inplace_lshift*/ + 0, /*nb_inplace_rshift*/ + 0, /*nb_inplace_and*/ + 0, /*nb_inplace_xor*/ + 0, /*nb_inplace_or*/ + 0, /*nb_floor_divide*/ + 0, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + #if PY_VERSION_HEX >= 0x02050000 + 0, /*nb_index*/ + #endif +}; + +static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = { + 0, /*sq_length*/ + 0, /*sq_concat*/ + 0, /*sq_repeat*/ + 0, /*sq_item*/ + 0, /*sq_slice*/ + 0, /*sq_ass_item*/ + 0, /*sq_ass_slice*/ + 0, /*sq_contains*/ + 0, /*sq_inplace_concat*/ + 0, /*sq_inplace_repeat*/ +}; + +static PyMappingMethods __pyx_tp_as_mapping_HieroCachingRuleFactory = { + 0, /*mp_length*/ + 0, /*mp_subscript*/ + 0, /*mp_ass_subscript*/ +}; + +static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = { + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getreadbuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getwritebuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getsegcount*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getcharbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_getbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_releasebuffer*/ + #endif +}; + +static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = { + PyVarObject_HEAD_INIT(0, 0) + __Pyx_NAMESTR("_sa.HieroCachingRuleFactory"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa_HieroCachingRuleFactory), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #else + 0, /*reserved*/ + #endif + 0, /*tp_repr*/ + &__pyx_tp_as_number_HieroCachingRuleFactory, /*tp_as_number*/ + &__pyx_tp_as_sequence_HieroCachingRuleFactory, /*tp_as_sequence*/ + &__pyx_tp_as_mapping_HieroCachingRuleFactory, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &__pyx_tp_as_buffer_HieroCachingRuleFactory, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + __Pyx_DOCSTR("This RuleFactory implements a caching \n method using TrieTable, which makes phrase\n generation somewhat speedier -- phrases only\n need to be extracted once (however, it is\n quite possible they need to be scored \n for each input sentence, for contextual models)"), /*tp_doc*/ + __pyx_tp_traverse_3_sa_HieroCachingRuleFactory, /*tp_traverse*/ + __pyx_tp_clear_3_sa_HieroCachingRuleFactory, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_3_sa_HieroCachingRuleFactory, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_3_sa_HieroCachingRuleFactory, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + #if PY_VERSION_HEX >= 0x02060000 + 0, /*tp_version_tag*/ + #endif +}; + +static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__read_bitext(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p; + PyObject *o = (*t->tp_alloc)(t, 0); + if (!o) return 0; + p = ((struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o); + p->__pyx_v_fp = 0; + return o; +} + +static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__read_bitext(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o; + Py_XDECREF(p->__pyx_v_fp); + (*Py_TYPE(o)->tp_free)(o); +} + +static int __pyx_tp_traverse_3_sa___pyx_scope_struct__read_bitext(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o; + if (p->__pyx_v_fp) { + e = (*v)(p->__pyx_v_fp, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_3_sa___pyx_scope_struct__read_bitext(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *p = (struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)o; + PyObject* tmp; + tmp = ((PyObject*)p->__pyx_v_fp); + p->__pyx_v_fp = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct__read_bitext[] = { + {0, 0, 0, 0} +}; + +static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__read_bitext = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_divide*/ + #endif + 0, /*nb_remainder*/ + 0, /*nb_divmod*/ + 0, /*nb_power*/ + 0, /*nb_negative*/ + 0, /*nb_positive*/ + 0, /*nb_absolute*/ + 0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_coerce*/ + #endif + 0, /*nb_int*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_long*/ + #else + 0, /*reserved*/ + #endif + 0, /*nb_float*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_oct*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*nb_hex*/ + #endif + 0, /*nb_inplace_add*/ + 0, /*nb_inplace_subtract*/ + 0, /*nb_inplace_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_inplace_divide*/ + #endif + 0, /*nb_inplace_remainder*/ + 0, /*nb_inplace_power*/ + 0, /*nb_inplace_lshift*/ + 0, /*nb_inplace_rshift*/ + 0, /*nb_inplace_and*/ + 0, /*nb_inplace_xor*/ + 0, /*nb_inplace_or*/ + 0, /*nb_floor_divide*/ + 0, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + #if PY_VERSION_HEX >= 0x02050000 + 0, /*nb_index*/ + #endif +}; + +static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__read_bitext = { 0, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ @@ -58053,13 +59212,13 @@ static PySequenceMethods __pyx_tp_as_sequence_Sampler = { 0, /*sq_inplace_repeat*/ }; -static PyMappingMethods __pyx_tp_as_mapping_Sampler = { +static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct__read_bitext = { 0, /*mp_length*/ 0, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; -static PyBufferProcs __pyx_tp_as_buffer_Sampler = { +static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__read_bitext = { #if PY_MAJOR_VERSION < 3 0, /*bf_getreadbuffer*/ #endif @@ -58080,12 +59239,12 @@ static PyBufferProcs __pyx_tp_as_buffer_Sampler = { #endif }; -static PyTypeObject __pyx_type_3_sa_Sampler = { +static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__read_bitext = { PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("_sa.Sampler"), /*tp_name*/ - sizeof(struct __pyx_obj_3_sa_Sampler), /*tp_basicsize*/ + __Pyx_NAMESTR("_sa.__pyx_scope_struct__read_bitext"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext), /*tp_basicsize*/ 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3_sa_Sampler, /*tp_dealloc*/ + __pyx_tp_dealloc_3_sa___pyx_scope_struct__read_bitext, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -58095,24 +59254,24 @@ static PyTypeObject __pyx_type_3_sa_Sampler = { 0, /*reserved*/ #endif 0, /*tp_repr*/ - &__pyx_tp_as_number_Sampler, /*tp_as_number*/ - &__pyx_tp_as_sequence_Sampler, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_Sampler, /*tp_as_mapping*/ + &__pyx_tp_as_number___pyx_scope_struct__read_bitext, /*tp_as_number*/ + &__pyx_tp_as_sequence___pyx_scope_struct__read_bitext, /*tp_as_sequence*/ + &__pyx_tp_as_mapping___pyx_scope_struct__read_bitext, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_Sampler, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - __Pyx_DOCSTR("A Sampler implements a logic for choosing\n samples from a population range"), /*tp_doc*/ - __pyx_tp_traverse_3_sa_Sampler, /*tp_traverse*/ - __pyx_tp_clear_3_sa_Sampler, /*tp_clear*/ + &__pyx_tp_as_buffer___pyx_scope_struct__read_bitext, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_3_sa___pyx_scope_struct__read_bitext, /*tp_traverse*/ + __pyx_tp_clear_3_sa___pyx_scope_struct__read_bitext, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ - __pyx_methods_3_sa_Sampler, /*tp_methods*/ + __pyx_methods_3_sa___pyx_scope_struct__read_bitext, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ @@ -58122,7 +59281,7 @@ static PyTypeObject __pyx_type_3_sa_Sampler = { 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ - __pyx_tp_new_3_sa_Sampler, /*tp_new*/ + __pyx_tp_new_3_sa___pyx_scope_struct__read_bitext, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ @@ -58135,173 +59294,61 @@ static PyTypeObject __pyx_type_3_sa_Sampler = { 0, /*tp_version_tag*/ #endif }; -static struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory __pyx_vtable_3_sa_HieroCachingRuleFactory; -static PyObject *__pyx_tp_new_3_sa_HieroCachingRuleFactory(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_3_sa_HieroCachingRuleFactory *p; +static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_1_genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p; PyObject *o = (*t->tp_alloc)(t, 0); if (!o) return 0; - p = ((struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o); - p->__pyx_vtab = __pyx_vtabptr_3_sa_HieroCachingRuleFactory; - p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None); - p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None); - p->precomputed_index = Py_None; Py_INCREF(Py_None); - p->precomputed_collocations = Py_None; Py_INCREF(Py_None); - p->precompute_file = Py_None; Py_INCREF(Py_None); - p->max_rank = Py_None; Py_INCREF(Py_None); - p->prev_norm_prefix = Py_None; Py_INCREF(Py_None); - p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None); - p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); - p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); - p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None); - p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - if (__pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(o, a, k) < 0) { - Py_DECREF(o); o = 0; - } + p = ((struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o); + p->__pyx_outer_scope = 0; + p->__pyx_v_line = 0; + p->__pyx_t_0 = 0; return o; } -static void __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory(PyObject *o) { - struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; - Py_XDECREF(((PyObject *)p->rules)); - Py_XDECREF(((PyObject *)p->sampler)); - Py_XDECREF(p->precomputed_index); - Py_XDECREF(p->precomputed_collocations); - Py_XDECREF(p->precompute_file); - Py_XDECREF(p->max_rank); - Py_XDECREF(p->prev_norm_prefix); - Py_XDECREF(((PyObject *)p->fsa)); - Py_XDECREF(((PyObject *)p->fda)); - Py_XDECREF(((PyObject *)p->eda)); - Py_XDECREF(((PyObject *)p->alignment)); - Py_XDECREF(((PyObject *)p->eid2symid)); - Py_XDECREF(((PyObject *)p->fid2symid)); - Py_XDECREF(((PyObject *)p->findexes)); - Py_XDECREF(((PyObject *)p->findexes1)); +static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_1_genexpr(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o; + Py_XDECREF(((PyObject *)p->__pyx_outer_scope)); + Py_XDECREF(p->__pyx_v_line); + Py_XDECREF(p->__pyx_t_0); (*Py_TYPE(o)->tp_free)(o); } -static int __pyx_tp_traverse_3_sa_HieroCachingRuleFactory(PyObject *o, visitproc v, void *a) { +static int __pyx_tp_traverse_3_sa___pyx_scope_struct_1_genexpr(PyObject *o, visitproc v, void *a) { int e; - struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; - if (p->rules) { - e = (*v)(((PyObject*)p->rules), a); if (e) return e; - } - if (p->sampler) { - e = (*v)(((PyObject*)p->sampler), a); if (e) return e; - } - if (p->precomputed_index) { - e = (*v)(p->precomputed_index, a); if (e) return e; - } - if (p->precomputed_collocations) { - e = (*v)(p->precomputed_collocations, a); if (e) return e; - } - if (p->precompute_file) { - e = (*v)(p->precompute_file, a); if (e) return e; - } - if (p->max_rank) { - e = (*v)(p->max_rank, a); if (e) return e; - } - if (p->prev_norm_prefix) { - e = (*v)(p->prev_norm_prefix, a); if (e) return e; - } - if (p->fsa) { - e = (*v)(((PyObject*)p->fsa), a); if (e) return e; - } - if (p->fda) { - e = (*v)(((PyObject*)p->fda), a); if (e) return e; - } - if (p->eda) { - e = (*v)(((PyObject*)p->eda), a); if (e) return e; - } - if (p->alignment) { - e = (*v)(((PyObject*)p->alignment), a); if (e) return e; - } - if (p->eid2symid) { - e = (*v)(((PyObject*)p->eid2symid), a); if (e) return e; + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o; + if (p->__pyx_outer_scope) { + e = (*v)(((PyObject*)p->__pyx_outer_scope), a); if (e) return e; } - if (p->fid2symid) { - e = (*v)(((PyObject*)p->fid2symid), a); if (e) return e; + if (p->__pyx_v_line) { + e = (*v)(p->__pyx_v_line, a); if (e) return e; } - if (p->findexes) { - e = (*v)(((PyObject*)p->findexes), a); if (e) return e; - } - if (p->findexes1) { - e = (*v)(((PyObject*)p->findexes1), a); if (e) return e; + if (p->__pyx_t_0) { + e = (*v)(p->__pyx_t_0, a); if (e) return e; } return 0; } -static int __pyx_tp_clear_3_sa_HieroCachingRuleFactory(PyObject *o) { - struct __pyx_obj_3_sa_HieroCachingRuleFactory *p = (struct __pyx_obj_3_sa_HieroCachingRuleFactory *)o; +static int __pyx_tp_clear_3_sa___pyx_scope_struct_1_genexpr(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr *)o; PyObject* tmp; - tmp = ((PyObject*)p->rules); - p->rules = ((struct __pyx_obj_3_sa_TrieTable *)Py_None); Py_INCREF(Py_None); + tmp = ((PyObject*)p->__pyx_outer_scope); + p->__pyx_outer_scope = ((struct __pyx_obj_3_sa___pyx_scope_struct__read_bitext *)Py_None); Py_INCREF(Py_None); Py_XDECREF(tmp); - tmp = ((PyObject*)p->sampler); - p->sampler = ((struct __pyx_obj_3_sa_Sampler *)Py_None); Py_INCREF(Py_None); + tmp = ((PyObject*)p->__pyx_v_line); + p->__pyx_v_line = Py_None; Py_INCREF(Py_None); Py_XDECREF(tmp); - tmp = ((PyObject*)p->precomputed_index); - p->precomputed_index = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->precomputed_collocations); - p->precomputed_collocations = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->precompute_file); - p->precompute_file = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->max_rank); - p->max_rank = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->prev_norm_prefix); - p->prev_norm_prefix = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->fsa); - p->fsa = ((struct __pyx_obj_3_sa_SuffixArray *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->fda); - p->fda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->eda); - p->eda = ((struct __pyx_obj_3_sa_DataArray *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->alignment); - p->alignment = ((struct __pyx_obj_3_sa_Alignment *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->eid2symid); - p->eid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->fid2symid); - p->fid2symid = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->findexes); - p->findexes = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->findexes1); - p->findexes1 = ((struct __pyx_obj_3_sa_IntList *)Py_None); Py_INCREF(Py_None); + tmp = ((PyObject*)p->__pyx_t_0); + p->__pyx_t_0 = Py_None; Py_INCREF(Py_None); Py_XDECREF(tmp); return 0; } -static PyMethodDef __pyx_methods_3_sa_HieroCachingRuleFactory[] = { - {__Pyx_NAMESTR("configure"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_2configure)}, - {__Pyx_NAMESTR("pattern2phrase"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_5pattern2phrase, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("pattern2phrase_plus"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_7pattern2phrase_plus, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("precompute"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_9precompute, METH_NOARGS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("get_precomputed_collocation"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_11get_precomputed_collocation, METH_O, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("advance"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("get_all_nodes_isteps_away"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_away, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("reachable"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("shortest"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("get_next_states"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, - {__Pyx_NAMESTR("input"), (PyCFunction)__pyx_pw_3_sa_23HieroCachingRuleFactory_23input, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_3_sa_23HieroCachingRuleFactory_22input)}, +static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_1_genexpr[] = { {0, 0, 0, 0} }; -static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = { +static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1_genexpr = { 0, /*nb_add*/ 0, /*nb_subtract*/ 0, /*nb_multiply*/ @@ -58359,7 +59406,7 @@ static PyNumberMethods __pyx_tp_as_number_HieroCachingRuleFactory = { #endif }; -static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = { +static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1_genexpr = { 0, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ @@ -58372,13 +59419,13 @@ static PySequenceMethods __pyx_tp_as_sequence_HieroCachingRuleFactory = { 0, /*sq_inplace_repeat*/ }; -static PyMappingMethods __pyx_tp_as_mapping_HieroCachingRuleFactory = { +static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_1_genexpr = { 0, /*mp_length*/ 0, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; -static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = { +static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1_genexpr = { #if PY_MAJOR_VERSION < 3 0, /*bf_getreadbuffer*/ #endif @@ -58399,12 +59446,12 @@ static PyBufferProcs __pyx_tp_as_buffer_HieroCachingRuleFactory = { #endif }; -static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = { +static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1_genexpr = { PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("_sa.HieroCachingRuleFactory"), /*tp_name*/ - sizeof(struct __pyx_obj_3_sa_HieroCachingRuleFactory), /*tp_basicsize*/ + __Pyx_NAMESTR("_sa.__pyx_scope_struct_1_genexpr"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_1_genexpr), /*tp_basicsize*/ 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3_sa_HieroCachingRuleFactory, /*tp_dealloc*/ + __pyx_tp_dealloc_3_sa___pyx_scope_struct_1_genexpr, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -58414,24 +59461,24 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = { 0, /*reserved*/ #endif 0, /*tp_repr*/ - &__pyx_tp_as_number_HieroCachingRuleFactory, /*tp_as_number*/ - &__pyx_tp_as_sequence_HieroCachingRuleFactory, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_HieroCachingRuleFactory, /*tp_as_mapping*/ + &__pyx_tp_as_number___pyx_scope_struct_1_genexpr, /*tp_as_number*/ + &__pyx_tp_as_sequence___pyx_scope_struct_1_genexpr, /*tp_as_sequence*/ + &__pyx_tp_as_mapping___pyx_scope_struct_1_genexpr, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_HieroCachingRuleFactory, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - __Pyx_DOCSTR("This RuleFactory implements a caching \n method using TrieTable, which makes phrase\n generation somewhat speedier -- phrases only\n need to be extracted once (however, it is\n quite possible they need to be scored \n for each input sentence, for contextual models)"), /*tp_doc*/ - __pyx_tp_traverse_3_sa_HieroCachingRuleFactory, /*tp_traverse*/ - __pyx_tp_clear_3_sa_HieroCachingRuleFactory, /*tp_clear*/ + &__pyx_tp_as_buffer___pyx_scope_struct_1_genexpr, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_3_sa___pyx_scope_struct_1_genexpr, /*tp_traverse*/ + __pyx_tp_clear_3_sa___pyx_scope_struct_1_genexpr, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ - __pyx_methods_3_sa_HieroCachingRuleFactory, /*tp_methods*/ + __pyx_methods_3_sa___pyx_scope_struct_1_genexpr, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ @@ -58441,7 +59488,7 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = { 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ - __pyx_tp_new_3_sa_HieroCachingRuleFactory, /*tp_new*/ + __pyx_tp_new_3_sa___pyx_scope_struct_1_genexpr, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ @@ -58455,11 +59502,11 @@ static PyTypeObject __pyx_type_3_sa_HieroCachingRuleFactory = { #endif }; -static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__compute_stats(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p; +static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_compute_stats(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p; PyObject *o = (*t->tp_alloc)(t, 0); if (!o) return 0; - p = ((struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o); + p = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o); p->__pyx_v_ngram = 0; p->__pyx_v_ngram_start = 0; p->__pyx_v_ngram_starts = 0; @@ -58469,8 +59516,8 @@ static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct__compute_stats(PyTypeObjec return o; } -static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o; +static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o; Py_XDECREF(((PyObject *)p->__pyx_v_ngram)); Py_XDECREF(((PyObject *)p->__pyx_v_ngram_start)); Py_XDECREF(((PyObject *)p->__pyx_v_ngram_starts)); @@ -58480,9 +59527,9 @@ static void __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats(PyObject *o) (*Py_TYPE(o)->tp_free)(o); } -static int __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats(PyObject *o, visitproc v, void *a) { +static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o, visitproc v, void *a) { int e; - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o; + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o; if (p->__pyx_v_ngram) { e = (*v)(p->__pyx_v_ngram, a); if (e) return e; } @@ -58504,8 +59551,8 @@ static int __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats(PyObject *o, return 0; } -static int __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats *)o; +static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_compute_stats(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats *)o; PyObject* tmp; tmp = ((PyObject*)p->__pyx_v_ngram); p->__pyx_v_ngram = ((PyObject*)Py_None); Py_INCREF(Py_None); @@ -58528,11 +59575,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats(PyObject *o) { return 0; } -static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct__compute_stats[] = { +static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_2_compute_stats[] = { {0, 0, 0, 0} }; -static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__compute_stats = { +static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_compute_stats = { 0, /*nb_add*/ 0, /*nb_subtract*/ 0, /*nb_multiply*/ @@ -58590,7 +59637,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct__compute_stats = { #endif }; -static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__compute_stats = { +static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_compute_stats = { 0, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ @@ -58603,13 +59650,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct__compute_stats 0, /*sq_inplace_repeat*/ }; -static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct__compute_stats = { +static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_2_compute_stats = { 0, /*mp_length*/ 0, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; -static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__compute_stats = { +static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_compute_stats = { #if PY_MAJOR_VERSION < 3 0, /*bf_getreadbuffer*/ #endif @@ -58630,12 +59677,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct__compute_stats = { #endif }; -static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = { +static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_compute_stats = { PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("_sa.__pyx_scope_struct__compute_stats"), /*tp_name*/ - sizeof(struct __pyx_obj_3_sa___pyx_scope_struct__compute_stats), /*tp_basicsize*/ + __Pyx_NAMESTR("_sa.__pyx_scope_struct_2_compute_stats"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_2_compute_stats), /*tp_basicsize*/ 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3_sa___pyx_scope_struct__compute_stats, /*tp_dealloc*/ + __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_compute_stats, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -58645,24 +59692,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = { 0, /*reserved*/ #endif 0, /*tp_repr*/ - &__pyx_tp_as_number___pyx_scope_struct__compute_stats, /*tp_as_number*/ - &__pyx_tp_as_sequence___pyx_scope_struct__compute_stats, /*tp_as_sequence*/ - &__pyx_tp_as_mapping___pyx_scope_struct__compute_stats, /*tp_as_mapping*/ + &__pyx_tp_as_number___pyx_scope_struct_2_compute_stats, /*tp_as_number*/ + &__pyx_tp_as_sequence___pyx_scope_struct_2_compute_stats, /*tp_as_sequence*/ + &__pyx_tp_as_mapping___pyx_scope_struct_2_compute_stats, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ - &__pyx_tp_as_buffer___pyx_scope_struct__compute_stats, /*tp_as_buffer*/ + &__pyx_tp_as_buffer___pyx_scope_struct_2_compute_stats, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /*tp_doc*/ - __pyx_tp_traverse_3_sa___pyx_scope_struct__compute_stats, /*tp_traverse*/ - __pyx_tp_clear_3_sa___pyx_scope_struct__compute_stats, /*tp_clear*/ + __pyx_tp_traverse_3_sa___pyx_scope_struct_2_compute_stats, /*tp_traverse*/ + __pyx_tp_clear_3_sa___pyx_scope_struct_2_compute_stats, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ - __pyx_methods_3_sa___pyx_scope_struct__compute_stats, /*tp_methods*/ + __pyx_methods_3_sa___pyx_scope_struct_2_compute_stats, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ @@ -58672,7 +59719,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = { 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ - __pyx_tp_new_3_sa___pyx_scope_struct__compute_stats, /*tp_new*/ + __pyx_tp_new_3_sa___pyx_scope_struct_2_compute_stats, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ @@ -58686,32 +59733,32 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct__compute_stats = { #endif }; -static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_1___iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p; +static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_3___iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p; PyObject *o = (*t->tp_alloc)(t, 0); if (!o) return 0; - p = ((struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o); + p = ((struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o); p->__pyx_v_self = 0; return o; } -static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_1___iter__(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o; +static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_3___iter__(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o; Py_XDECREF(((PyObject *)p->__pyx_v_self)); (*Py_TYPE(o)->tp_free)(o); } -static int __pyx_tp_traverse_3_sa___pyx_scope_struct_1___iter__(PyObject *o, visitproc v, void *a) { +static int __pyx_tp_traverse_3_sa___pyx_scope_struct_3___iter__(PyObject *o, visitproc v, void *a) { int e; - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o; + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o; if (p->__pyx_v_self) { e = (*v)(((PyObject*)p->__pyx_v_self), a); if (e) return e; } return 0; } -static int __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__ *)o; +static int __pyx_tp_clear_3_sa___pyx_scope_struct_3___iter__(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *p = (struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__ *)o; PyObject* tmp; tmp = ((PyObject*)p->__pyx_v_self); p->__pyx_v_self = ((struct __pyx_obj_3_sa_Phrase *)Py_None); Py_INCREF(Py_None); @@ -58719,11 +59766,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__(PyObject *o) { return 0; } -static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_1___iter__[] = { +static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_3___iter__[] = { {0, 0, 0, 0} }; -static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1___iter__ = { +static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_3___iter__ = { 0, /*nb_add*/ 0, /*nb_subtract*/ 0, /*nb_multiply*/ @@ -58781,7 +59828,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_1___iter__ = { #endif }; -static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1___iter__ = { +static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_3___iter__ = { 0, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ @@ -58794,13 +59841,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_1___iter__ = { 0, /*sq_inplace_repeat*/ }; -static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_1___iter__ = { +static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_3___iter__ = { 0, /*mp_length*/ 0, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; -static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1___iter__ = { +static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_3___iter__ = { #if PY_MAJOR_VERSION < 3 0, /*bf_getreadbuffer*/ #endif @@ -58821,12 +59868,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_1___iter__ = { #endif }; -static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = { +static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_3___iter__ = { PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("_sa.__pyx_scope_struct_1___iter__"), /*tp_name*/ - sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_1___iter__), /*tp_basicsize*/ + __Pyx_NAMESTR("_sa.__pyx_scope_struct_3___iter__"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_3___iter__), /*tp_basicsize*/ 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3_sa___pyx_scope_struct_1___iter__, /*tp_dealloc*/ + __pyx_tp_dealloc_3_sa___pyx_scope_struct_3___iter__, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -58836,24 +59883,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = { 0, /*reserved*/ #endif 0, /*tp_repr*/ - &__pyx_tp_as_number___pyx_scope_struct_1___iter__, /*tp_as_number*/ - &__pyx_tp_as_sequence___pyx_scope_struct_1___iter__, /*tp_as_sequence*/ - &__pyx_tp_as_mapping___pyx_scope_struct_1___iter__, /*tp_as_mapping*/ + &__pyx_tp_as_number___pyx_scope_struct_3___iter__, /*tp_as_number*/ + &__pyx_tp_as_sequence___pyx_scope_struct_3___iter__, /*tp_as_sequence*/ + &__pyx_tp_as_mapping___pyx_scope_struct_3___iter__, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ - &__pyx_tp_as_buffer___pyx_scope_struct_1___iter__, /*tp_as_buffer*/ + &__pyx_tp_as_buffer___pyx_scope_struct_3___iter__, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /*tp_doc*/ - __pyx_tp_traverse_3_sa___pyx_scope_struct_1___iter__, /*tp_traverse*/ - __pyx_tp_clear_3_sa___pyx_scope_struct_1___iter__, /*tp_clear*/ + __pyx_tp_traverse_3_sa___pyx_scope_struct_3___iter__, /*tp_traverse*/ + __pyx_tp_clear_3_sa___pyx_scope_struct_3___iter__, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ - __pyx_methods_3_sa___pyx_scope_struct_1___iter__, /*tp_methods*/ + __pyx_methods_3_sa___pyx_scope_struct_3___iter__, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ @@ -58863,7 +59910,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = { 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ - __pyx_tp_new_3_sa___pyx_scope_struct_1___iter__, /*tp_new*/ + __pyx_tp_new_3_sa___pyx_scope_struct_3___iter__, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ @@ -58877,11 +59924,11 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_1___iter__ = { #endif }; -static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_input(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p; +static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_4_input(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p; PyObject *o = (*t->tp_alloc)(t, 0); if (!o) return 0; - p = ((struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o); + p = ((struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o); p->__pyx_v_alignment = 0; p->__pyx_v_als = 0; p->__pyx_v_alslist = 0; @@ -58935,8 +59982,8 @@ static PyObject *__pyx_tp_new_3_sa___pyx_scope_struct_2_input(PyTypeObject *t, C return o; } -static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o; +static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_4_input(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o; Py_XDECREF(p->__pyx_v_alignment); Py_XDECREF(p->__pyx_v_als); Py_XDECREF(p->__pyx_v_alslist); @@ -58990,9 +60037,9 @@ static void __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input(PyObject *o) { (*Py_TYPE(o)->tp_free)(o); } -static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input(PyObject *o, visitproc v, void *a) { +static int __pyx_tp_traverse_3_sa___pyx_scope_struct_4_input(PyObject *o, visitproc v, void *a) { int e; - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o; + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o; if (p->__pyx_v_alignment) { e = (*v)(p->__pyx_v_alignment, a); if (e) return e; } @@ -59146,8 +60193,8 @@ static int __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input(PyObject *o, visitp return 0; } -static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_input(PyObject *o) { - struct __pyx_obj_3_sa___pyx_scope_struct_2_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_2_input *)o; +static int __pyx_tp_clear_3_sa___pyx_scope_struct_4_input(PyObject *o) { + struct __pyx_obj_3_sa___pyx_scope_struct_4_input *p = (struct __pyx_obj_3_sa___pyx_scope_struct_4_input *)o; PyObject* tmp; tmp = ((PyObject*)p->__pyx_v_alignment); p->__pyx_v_alignment = Py_None; Py_INCREF(Py_None); @@ -59302,11 +60349,11 @@ static int __pyx_tp_clear_3_sa___pyx_scope_struct_2_input(PyObject *o) { return 0; } -static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_2_input[] = { +static PyMethodDef __pyx_methods_3_sa___pyx_scope_struct_4_input[] = { {0, 0, 0, 0} }; -static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_input = { +static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_4_input = { 0, /*nb_add*/ 0, /*nb_subtract*/ 0, /*nb_multiply*/ @@ -59364,7 +60411,7 @@ static PyNumberMethods __pyx_tp_as_number___pyx_scope_struct_2_input = { #endif }; -static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_input = { +static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_4_input = { 0, /*sq_length*/ 0, /*sq_concat*/ 0, /*sq_repeat*/ @@ -59377,13 +60424,13 @@ static PySequenceMethods __pyx_tp_as_sequence___pyx_scope_struct_2_input = { 0, /*sq_inplace_repeat*/ }; -static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_2_input = { +static PyMappingMethods __pyx_tp_as_mapping___pyx_scope_struct_4_input = { 0, /*mp_length*/ 0, /*mp_subscript*/ 0, /*mp_ass_subscript*/ }; -static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_input = { +static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_4_input = { #if PY_MAJOR_VERSION < 3 0, /*bf_getreadbuffer*/ #endif @@ -59404,12 +60451,12 @@ static PyBufferProcs __pyx_tp_as_buffer___pyx_scope_struct_2_input = { #endif }; -static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = { +static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_4_input = { PyVarObject_HEAD_INIT(0, 0) - __Pyx_NAMESTR("_sa.__pyx_scope_struct_2_input"), /*tp_name*/ - sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_2_input), /*tp_basicsize*/ + __Pyx_NAMESTR("_sa.__pyx_scope_struct_4_input"), /*tp_name*/ + sizeof(struct __pyx_obj_3_sa___pyx_scope_struct_4_input), /*tp_basicsize*/ 0, /*tp_itemsize*/ - __pyx_tp_dealloc_3_sa___pyx_scope_struct_2_input, /*tp_dealloc*/ + __pyx_tp_dealloc_3_sa___pyx_scope_struct_4_input, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -59419,24 +60466,24 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = { 0, /*reserved*/ #endif 0, /*tp_repr*/ - &__pyx_tp_as_number___pyx_scope_struct_2_input, /*tp_as_number*/ - &__pyx_tp_as_sequence___pyx_scope_struct_2_input, /*tp_as_sequence*/ - &__pyx_tp_as_mapping___pyx_scope_struct_2_input, /*tp_as_mapping*/ + &__pyx_tp_as_number___pyx_scope_struct_4_input, /*tp_as_number*/ + &__pyx_tp_as_sequence___pyx_scope_struct_4_input, /*tp_as_sequence*/ + &__pyx_tp_as_mapping___pyx_scope_struct_4_input, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ - &__pyx_tp_as_buffer___pyx_scope_struct_2_input, /*tp_as_buffer*/ + &__pyx_tp_as_buffer___pyx_scope_struct_4_input, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /*tp_doc*/ - __pyx_tp_traverse_3_sa___pyx_scope_struct_2_input, /*tp_traverse*/ - __pyx_tp_clear_3_sa___pyx_scope_struct_2_input, /*tp_clear*/ + __pyx_tp_traverse_3_sa___pyx_scope_struct_4_input, /*tp_traverse*/ + __pyx_tp_clear_3_sa___pyx_scope_struct_4_input, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ - __pyx_methods_3_sa___pyx_scope_struct_2_input, /*tp_methods*/ + __pyx_methods_3_sa___pyx_scope_struct_4_input, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ @@ -59446,7 +60493,7 @@ static PyTypeObject __pyx_type_3_sa___pyx_scope_struct_2_input = { 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ - __pyx_tp_new_3_sa___pyx_scope_struct_2_input, /*tp_new*/ + __pyx_tp_new_3_sa___pyx_scope_struct_4_input, /*tp_new*/ 0, /*tp_free*/ 0, /*tp_is_gc*/ 0, /*tp_bases*/ @@ -59480,11 +60527,11 @@ static struct PyModuleDef __pyx_moduledef = { static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 1, 0}, - {&__pyx_n_s_101, __pyx_k_101, sizeof(__pyx_k_101), 0, 0, 1, 1}, - {&__pyx_n_s_102, __pyx_k_102, sizeof(__pyx_k_102), 0, 0, 1, 1}, - {&__pyx_kp_s_104, __pyx_k_104, sizeof(__pyx_k_104), 0, 0, 1, 0}, + {&__pyx_kp_s_100, __pyx_k_100, sizeof(__pyx_k_100), 0, 0, 1, 0}, + {&__pyx_kp_s_101, __pyx_k_101, sizeof(__pyx_k_101), 0, 0, 1, 0}, + {&__pyx_n_s_103, __pyx_k_103, sizeof(__pyx_k_103), 0, 0, 1, 1}, + {&__pyx_n_s_104, __pyx_k_104, sizeof(__pyx_k_104), 0, 0, 1, 1}, {&__pyx_kp_s_106, __pyx_k_106, sizeof(__pyx_k_106), 0, 0, 1, 0}, - {&__pyx_kp_s_107, __pyx_k_107, sizeof(__pyx_k_107), 0, 0, 1, 0}, {&__pyx_kp_s_108, __pyx_k_108, sizeof(__pyx_k_108), 0, 0, 1, 0}, {&__pyx_kp_s_109, __pyx_k_109, sizeof(__pyx_k_109), 0, 0, 1, 0}, {&__pyx_kp_s_110, __pyx_k_110, sizeof(__pyx_k_110), 0, 0, 1, 0}, @@ -59494,12 +60541,12 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_114, __pyx_k_114, sizeof(__pyx_k_114), 0, 0, 1, 0}, {&__pyx_kp_s_115, __pyx_k_115, sizeof(__pyx_k_115), 0, 0, 1, 0}, {&__pyx_kp_s_116, __pyx_k_116, sizeof(__pyx_k_116), 0, 0, 1, 0}, - {&__pyx_n_s_117, __pyx_k_117, sizeof(__pyx_k_117), 0, 0, 1, 1}, + {&__pyx_kp_s_117, __pyx_k_117, sizeof(__pyx_k_117), 0, 0, 1, 0}, {&__pyx_kp_s_118, __pyx_k_118, sizeof(__pyx_k_118), 0, 0, 1, 0}, - {&__pyx_kp_s_119, __pyx_k_119, sizeof(__pyx_k_119), 0, 0, 1, 0}, - {&__pyx_n_s_121, __pyx_k_121, sizeof(__pyx_k_121), 0, 0, 1, 1}, - {&__pyx_kp_s_122, __pyx_k_122, sizeof(__pyx_k_122), 0, 0, 1, 0}, - {&__pyx_kp_s_123, __pyx_k_123, sizeof(__pyx_k_123), 0, 0, 1, 0}, + {&__pyx_n_s_119, __pyx_k_119, sizeof(__pyx_k_119), 0, 0, 1, 1}, + {&__pyx_kp_s_120, __pyx_k_120, sizeof(__pyx_k_120), 0, 0, 1, 0}, + {&__pyx_kp_s_121, __pyx_k_121, sizeof(__pyx_k_121), 0, 0, 1, 0}, + {&__pyx_n_s_123, __pyx_k_123, sizeof(__pyx_k_123), 0, 0, 1, 1}, {&__pyx_kp_s_124, __pyx_k_124, sizeof(__pyx_k_124), 0, 0, 1, 0}, {&__pyx_kp_s_125, __pyx_k_125, sizeof(__pyx_k_125), 0, 0, 1, 0}, {&__pyx_kp_s_126, __pyx_k_126, sizeof(__pyx_k_126), 0, 0, 1, 0}, @@ -59509,49 +60556,49 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_13, __pyx_k_13, sizeof(__pyx_k_13), 0, 0, 1, 0}, {&__pyx_kp_s_130, __pyx_k_130, sizeof(__pyx_k_130), 0, 0, 1, 0}, {&__pyx_kp_s_131, __pyx_k_131, sizeof(__pyx_k_131), 0, 0, 1, 0}, - {&__pyx_kp_s_134, __pyx_k_134, sizeof(__pyx_k_134), 0, 0, 1, 0}, - {&__pyx_kp_s_135, __pyx_k_135, sizeof(__pyx_k_135), 0, 0, 1, 0}, - {&__pyx_kp_s_139, __pyx_k_139, sizeof(__pyx_k_139), 0, 0, 1, 0}, + {&__pyx_kp_s_132, __pyx_k_132, sizeof(__pyx_k_132), 0, 0, 1, 0}, + {&__pyx_kp_s_133, __pyx_k_133, sizeof(__pyx_k_133), 0, 0, 1, 0}, + {&__pyx_kp_s_136, __pyx_k_136, sizeof(__pyx_k_136), 0, 0, 1, 0}, + {&__pyx_kp_s_137, __pyx_k_137, sizeof(__pyx_k_137), 0, 0, 1, 0}, {&__pyx_kp_s_14, __pyx_k_14, sizeof(__pyx_k_14), 0, 0, 1, 0}, - {&__pyx_kp_s_140, __pyx_k_140, sizeof(__pyx_k_140), 0, 0, 1, 0}, + {&__pyx_kp_s_141, __pyx_k_141, sizeof(__pyx_k_141), 0, 0, 1, 0}, + {&__pyx_kp_s_142, __pyx_k_142, sizeof(__pyx_k_142), 0, 0, 1, 0}, {&__pyx_kp_s_18, __pyx_k_18, sizeof(__pyx_k_18), 0, 0, 1, 0}, {&__pyx_kp_s_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 0, 1, 0}, - {&__pyx_kp_s_22, __pyx_k_22, sizeof(__pyx_k_22), 0, 0, 1, 0}, - {&__pyx_n_s_24, __pyx_k_24, sizeof(__pyx_k_24), 0, 0, 1, 1}, - {&__pyx_kp_s_28, __pyx_k_28, sizeof(__pyx_k_28), 0, 0, 1, 0}, + {&__pyx_kp_s_21, __pyx_k_21, sizeof(__pyx_k_21), 0, 0, 1, 0}, + {&__pyx_kp_s_25, __pyx_k_25, sizeof(__pyx_k_25), 0, 0, 1, 0}, + {&__pyx_n_s_27, __pyx_k_27, sizeof(__pyx_k_27), 0, 0, 1, 1}, {&__pyx_kp_s_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 0, 1, 0}, - {&__pyx_kp_s_32, __pyx_k_32, sizeof(__pyx_k_32), 0, 0, 1, 0}, - {&__pyx_kp_s_39, __pyx_k_39, sizeof(__pyx_k_39), 0, 0, 1, 0}, + {&__pyx_kp_s_31, __pyx_k_31, sizeof(__pyx_k_31), 0, 0, 1, 0}, + {&__pyx_kp_s_35, __pyx_k_35, sizeof(__pyx_k_35), 0, 0, 1, 0}, {&__pyx_kp_s_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 0, 1, 0}, {&__pyx_kp_s_42, __pyx_k_42, sizeof(__pyx_k_42), 0, 0, 1, 0}, - {&__pyx_kp_s_43, __pyx_k_43, sizeof(__pyx_k_43), 0, 0, 1, 0}, {&__pyx_kp_s_45, __pyx_k_45, sizeof(__pyx_k_45), 0, 0, 1, 0}, - {&__pyx_kp_s_47, __pyx_k_47, sizeof(__pyx_k_47), 0, 0, 1, 0}, - {&__pyx_kp_s_49, __pyx_k_49, sizeof(__pyx_k_49), 0, 0, 1, 0}, + {&__pyx_kp_s_46, __pyx_k_46, sizeof(__pyx_k_46), 0, 0, 1, 0}, + {&__pyx_kp_s_48, __pyx_k_48, sizeof(__pyx_k_48), 0, 0, 1, 0}, {&__pyx_kp_s_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 0, 1, 0}, - {&__pyx_kp_s_53, __pyx_k_53, sizeof(__pyx_k_53), 0, 0, 1, 0}, - {&__pyx_kp_s_55, __pyx_k_55, sizeof(__pyx_k_55), 0, 0, 1, 0}, + {&__pyx_kp_s_50, __pyx_k_50, sizeof(__pyx_k_50), 0, 0, 1, 0}, + {&__pyx_kp_s_52, __pyx_k_52, sizeof(__pyx_k_52), 0, 0, 1, 0}, {&__pyx_kp_s_56, __pyx_k_56, sizeof(__pyx_k_56), 0, 0, 1, 0}, - {&__pyx_kp_s_57, __pyx_k_57, sizeof(__pyx_k_57), 0, 0, 1, 0}, + {&__pyx_kp_s_58, __pyx_k_58, sizeof(__pyx_k_58), 0, 0, 1, 0}, {&__pyx_kp_s_59, __pyx_k_59, sizeof(__pyx_k_59), 0, 0, 1, 0}, {&__pyx_kp_s_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 0, 1, 0}, - {&__pyx_kp_s_61, __pyx_k_61, sizeof(__pyx_k_61), 0, 0, 1, 0}, + {&__pyx_kp_s_60, __pyx_k_60, sizeof(__pyx_k_60), 0, 0, 1, 0}, {&__pyx_kp_s_62, __pyx_k_62, sizeof(__pyx_k_62), 0, 0, 1, 0}, - {&__pyx_kp_s_63, __pyx_k_63, sizeof(__pyx_k_63), 0, 0, 1, 0}, {&__pyx_kp_s_64, __pyx_k_64, sizeof(__pyx_k_64), 0, 0, 1, 0}, {&__pyx_kp_s_65, __pyx_k_65, sizeof(__pyx_k_65), 0, 0, 1, 0}, {&__pyx_kp_s_66, __pyx_k_66, sizeof(__pyx_k_66), 0, 0, 1, 0}, {&__pyx_kp_s_67, __pyx_k_67, sizeof(__pyx_k_67), 0, 0, 1, 0}, - {&__pyx_n_s_68, __pyx_k_68, sizeof(__pyx_k_68), 0, 0, 1, 1}, - {&__pyx_n_s_69, __pyx_k_69, sizeof(__pyx_k_69), 0, 0, 1, 1}, + {&__pyx_kp_s_68, __pyx_k_68, sizeof(__pyx_k_68), 0, 0, 1, 0}, + {&__pyx_kp_s_69, __pyx_k_69, sizeof(__pyx_k_69), 0, 0, 1, 0}, {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0}, - {&__pyx_kp_s_70, __pyx_k_70, sizeof(__pyx_k_70), 0, 0, 1, 0}, + {&__pyx_n_s_70, __pyx_k_70, sizeof(__pyx_k_70), 0, 0, 1, 1}, + {&__pyx_n_s_71, __pyx_k_71, sizeof(__pyx_k_71), 0, 0, 1, 1}, {&__pyx_kp_s_72, __pyx_k_72, sizeof(__pyx_k_72), 0, 0, 1, 0}, {&__pyx_kp_s_74, __pyx_k_74, sizeof(__pyx_k_74), 0, 0, 1, 0}, {&__pyx_kp_s_76, __pyx_k_76, sizeof(__pyx_k_76), 0, 0, 1, 0}, + {&__pyx_kp_s_78, __pyx_k_78, sizeof(__pyx_k_78), 0, 0, 1, 0}, {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0}, - {&__pyx_kp_s_81, __pyx_k_81, sizeof(__pyx_k_81), 0, 0, 1, 0}, - {&__pyx_kp_s_82, __pyx_k_82, sizeof(__pyx_k_82), 0, 0, 1, 0}, {&__pyx_kp_s_83, __pyx_k_83, sizeof(__pyx_k_83), 0, 0, 1, 0}, {&__pyx_kp_s_84, __pyx_k_84, sizeof(__pyx_k_84), 0, 0, 1, 0}, {&__pyx_kp_s_85, __pyx_k_85, sizeof(__pyx_k_85), 0, 0, 1, 0}, @@ -59561,10 +60608,10 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_89, __pyx_k_89, sizeof(__pyx_k_89), 0, 0, 1, 0}, {&__pyx_kp_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 0}, {&__pyx_kp_s_90, __pyx_k_90, sizeof(__pyx_k_90), 0, 0, 1, 0}, + {&__pyx_kp_s_91, __pyx_k_91, sizeof(__pyx_k_91), 0, 0, 1, 0}, {&__pyx_kp_s_92, __pyx_k_92, sizeof(__pyx_k_92), 0, 0, 1, 0}, - {&__pyx_kp_s_93, __pyx_k_93, sizeof(__pyx_k_93), 0, 0, 1, 0}, - {&__pyx_kp_s_98, __pyx_k_98, sizeof(__pyx_k_98), 0, 0, 1, 0}, - {&__pyx_kp_s_99, __pyx_k_99, sizeof(__pyx_k_99), 0, 0, 1, 0}, + {&__pyx_kp_s_94, __pyx_k_94, sizeof(__pyx_k_94), 0, 0, 1, 0}, + {&__pyx_kp_s_95, __pyx_k_95, sizeof(__pyx_k_95), 0, 0, 1, 0}, {&__pyx_kp_s__0, __pyx_k__0, sizeof(__pyx_k__0), 0, 0, 1, 0}, {&__pyx_kp_s__1, __pyx_k__1, sizeof(__pyx_k__1), 0, 0, 1, 0}, {&__pyx_n_s__END_OF_FILE, __pyx_k__END_OF_FILE, sizeof(__pyx_k__END_OF_FILE), 0, 0, 1, 1}, @@ -59682,7 +60729,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__reachable, __pyx_k__reachable, sizeof(__pyx_k__reachable), 0, 0, 1, 1}, {&__pyx_n_s__reachable_buffer, __pyx_k__reachable_buffer, sizeof(__pyx_k__reachable_buffer), 0, 0, 1, 1}, {&__pyx_n_s__read_binary, __pyx_k__read_binary, sizeof(__pyx_k__read_binary), 0, 0, 1, 1}, + {&__pyx_n_s__read_bitext, __pyx_k__read_bitext, sizeof(__pyx_k__read_bitext), 0, 0, 1, 1}, {&__pyx_n_s__read_text, __pyx_k__read_text, sizeof(__pyx_k__read_text), 0, 0, 1, 1}, + {&__pyx_n_s__read_text_data, __pyx_k__read_text_data, sizeof(__pyx_k__read_text_data), 0, 0, 1, 1}, {&__pyx_n_s__res, __pyx_k__res, sizeof(__pyx_k__res), 0, 0, 1, 1}, {&__pyx_n_s__reset, __pyx_k__reset, sizeof(__pyx_k__reset), 0, 0, 1, 1}, {&__pyx_n_s__resource, __pyx_k__resource, sizeof(__pyx_k__resource), 0, 0, 1, 1}, @@ -59699,9 +60748,11 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__seek, __pyx_k__seek, sizeof(__pyx_k__seek), 0, 0, 1, 1}, {&__pyx_n_s__setdefault, __pyx_k__setdefault, sizeof(__pyx_k__setdefault), 0, 0, 1, 1}, {&__pyx_n_s__shortest, __pyx_k__shortest, sizeof(__pyx_k__shortest), 0, 0, 1, 1}, + {&__pyx_n_s__side, __pyx_k__side, sizeof(__pyx_k__side), 0, 0, 1, 1}, {&__pyx_n_s__size, __pyx_k__size, sizeof(__pyx_k__size), 0, 0, 1, 1}, {&__pyx_n_s__skip, __pyx_k__skip, sizeof(__pyx_k__skip), 0, 0, 1, 1}, {&__pyx_n_s__sorted, __pyx_k__sorted, sizeof(__pyx_k__sorted), 0, 0, 1, 1}, + {&__pyx_n_s__source, __pyx_k__source, sizeof(__pyx_k__source), 0, 0, 1, 1}, {&__pyx_n_s__spanlen, __pyx_k__spanlen, sizeof(__pyx_k__spanlen), 0, 0, 1, 1}, {&__pyx_n_s__split, __pyx_k__split, sizeof(__pyx_k__split), 0, 0, 1, 1}, {&__pyx_n_s__start, __pyx_k__start, sizeof(__pyx_k__start), 0, 0, 1, 1}, @@ -59734,7 +60785,7 @@ static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_IndexError = __Pyx_GetName(__pyx_b, __pyx_n_s__IndexError); if (!__pyx_builtin_IndexError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_TypeError = __Pyx_GetName(__pyx_b, __pyx_n_s__TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_map = __Pyx_GetName(__pyx_b, __pyx_n_s__map); if (!__pyx_builtin_map) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_Exception = __Pyx_GetName(__pyx_b, __pyx_n_s__Exception); if (!__pyx_builtin_Exception) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_zip = __Pyx_GetName(__pyx_b, __pyx_n_s__zip); if (!__pyx_builtin_zip) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -59802,28 +60853,28 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_12)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65 * f.write("%s " % self.get_word(w_id)) * if w_id == 1: * f.write("\n") # <<<<<<<<<<<<<< * * def read_text(self, char* filename): */ - __pyx_k_tuple_15 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_15)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_15 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_15)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_15); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); PyTuple_SET_ITEM(__pyx_k_tuple_15, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_15)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< * for w_id in self.data: * if w_id > 1: */ - __pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_16); __Pyx_INCREF(Py_None); PyTuple_SET_ITEM(__pyx_k_tuple_16, 0, Py_None); @@ -59836,14 +60887,14 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_16)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68 + * * def read_text(self, char* filename): - * cdef int word_count = 0 * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< - * for line_num, line in enumerate(fp): - * self.sent_index.append(word_count) + * self.read_text_data(fp) + * */ - __pyx_k_tuple_17 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_17)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_17 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_17)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_17); __Pyx_INCREF(Py_None); PyTuple_SET_ITEM(__pyx_k_tuple_17, 0, Py_None); @@ -59856,80 +60907,114 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_17)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 + * def read_bitext(self, char* filename, int side): + * with gzip_or_text(filename) as fp: + * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< + * self.read_text_data(data) + * + */ + __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_19); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_18)); + PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_18)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_18)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19)); + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72 + * + * def read_bitext(self, char* filename, int side): + * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< + * data = (line.split(' ||| ')[side] for line in fp) + * self.read_text_data(data) + */ + __pyx_k_tuple_20 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_20)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_20); + __Pyx_INCREF(Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_20, 0, Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_INCREF(Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_20, 1, Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_INCREF(Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_20, 2, Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_20)); + + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 * for i in self.data: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for i in self.sent_index: * f.write("%d " %i) */ - __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_19); + __pyx_k_tuple_22 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_22)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_22); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_22, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_22)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":148 * for i in self.sent_index: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for i in self.sent_id: * f.write("%d " %i) */ - __pyx_k_tuple_20 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_20)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_20); + __pyx_k_tuple_23 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_23)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_23); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_20, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_23, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_20)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_23)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":151 * for i in self.sent_id: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) */ - __pyx_k_tuple_21 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_21); + __pyx_k_tuple_24 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_24)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_24); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_21, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_24, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_21)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_24)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":154 * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) * f.write("\n") # <<<<<<<<<<<<<< * * def write_enhanced(self, char* filename): */ - __pyx_k_tuple_23 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_23)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_23); + __pyx_k_tuple_26 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_26)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_26); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_23, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_26, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_23)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_26)); - /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< * self.write_enhanced_handle(self, f) */ - __pyx_k_tuple_25 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_25)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_25); + __pyx_k_tuple_28 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_28)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_28); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_25, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_28, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_25, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_28, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_25, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_28, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_25)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_28)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":46 * @@ -59938,15 +61023,15 @@ static int __Pyx_InitCachedConstants(void) { * self.sent_index = IntList(1000,1000) * if from_binary: */ - __pyx_k_tuple_26 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_26)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_26); + __pyx_k_tuple_29 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_29)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_29); __Pyx_INCREF(__pyx_int_1000); - PyTuple_SET_ITEM(__pyx_k_tuple_26, 0, __pyx_int_1000); + PyTuple_SET_ITEM(__pyx_k_tuple_29, 0, __pyx_int_1000); __Pyx_GIVEREF(__pyx_int_1000); __Pyx_INCREF(__pyx_int_1000); - PyTuple_SET_ITEM(__pyx_k_tuple_26, 1, __pyx_int_1000); + PyTuple_SET_ITEM(__pyx_k_tuple_29, 1, __pyx_int_1000); __Pyx_GIVEREF(__pyx_int_1000); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_26)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_29)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":47 * def __cinit__(self, from_binary=None, from_text=None): @@ -59955,15 +61040,15 @@ static int __Pyx_InitCachedConstants(void) { * if from_binary: * self.read_binary(from_binary) */ - __pyx_k_tuple_27 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_27)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_27); + __pyx_k_tuple_30 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_30)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_30); __Pyx_INCREF(__pyx_int_1000); - PyTuple_SET_ITEM(__pyx_k_tuple_27, 0, __pyx_int_1000); + PyTuple_SET_ITEM(__pyx_k_tuple_30, 0, __pyx_int_1000); __Pyx_GIVEREF(__pyx_int_1000); __Pyx_INCREF(__pyx_int_1000); - PyTuple_SET_ITEM(__pyx_k_tuple_27, 1, __pyx_int_1000); + PyTuple_SET_ITEM(__pyx_k_tuple_30, 1, __pyx_int_1000); __Pyx_GIVEREF(__pyx_int_1000); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_27)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_30)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":59 * pairs = line.split() @@ -59972,12 +61057,12 @@ static int __Pyx_InitCachedConstants(void) { * self.links.append(self.link(i, j)) * self.sent_index.append(len(self.links)) */ - __pyx_k_tuple_29 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_29)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_29); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_28)); - PyTuple_SET_ITEM(__pyx_k_tuple_29, 0, ((PyObject *)__pyx_kp_s_28)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_28)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_29)); + __pyx_k_tuple_32 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_32)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_32); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_31)); + PyTuple_SET_ITEM(__pyx_k_tuple_32, 0, ((PyObject *)__pyx_kp_s_31)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_31)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_32)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":54 * @@ -59986,18 +61071,18 @@ static int __Pyx_InitCachedConstants(void) { * for line in f: * self.sent_index.append(len(self.links)) */ - __pyx_k_tuple_30 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_30)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_30); + __pyx_k_tuple_33 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_33)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_33); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_30, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_33, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_30, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_33, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_30, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_33, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_30)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_33)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":75 * for i, link in enumerate(self.links): @@ -60006,12 +61091,12 @@ static int __Pyx_InitCachedConstants(void) { * sent_num = sent_num + 1 * f.write("%d-%d " % self.unlink(link)) */ - __pyx_k_tuple_31 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_31)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_31); + __pyx_k_tuple_34 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_34)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_34); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_31, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_34, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_31)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_34)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":78 * sent_num = sent_num + 1 @@ -60020,12 +61105,12 @@ static int __Pyx_InitCachedConstants(void) { * * def write_binary(self, char* filename): */ - __pyx_k_tuple_33 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_33)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_33); + __pyx_k_tuple_36 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_36)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_36); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_33, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_36, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_33)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_36)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":71 * @@ -60034,18 +61119,18 @@ static int __Pyx_InitCachedConstants(void) { * sent_num = 0 * for i, link in enumerate(self.links): */ - __pyx_k_tuple_34 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_34)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_34); + __pyx_k_tuple_37 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_37)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_37); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_34, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_37, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_34, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_37, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_34, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_37, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_34)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_37)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":92 * for link in self.links: @@ -60054,12 +61139,12 @@ static int __Pyx_InitCachedConstants(void) { * for i in self.sent_index: * f.write("%d " % i) */ - __pyx_k_tuple_35 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_35)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_35); + __pyx_k_tuple_38 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_38)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_38); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_35, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_38, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_35)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_38)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":95 * for i in self.sent_index: @@ -60068,12 +61153,12 @@ static int __Pyx_InitCachedConstants(void) { * * def alignment(self, i): */ - __pyx_k_tuple_36 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_36)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_36); + __pyx_k_tuple_39 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_39)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_39); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_36, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_39, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_36)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_39)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":88 * @@ -60082,18 +61167,18 @@ static int __Pyx_InitCachedConstants(void) { * sent_num = 1 * for link in self.links: */ - __pyx_k_tuple_37 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_37)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_37); + __pyx_k_tuple_40 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_40)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_40); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_37, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_40, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_37, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_40, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_37, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_40, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_37)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":302 * @@ -60102,12 +61187,12 @@ static int __Pyx_InitCachedConstants(void) { * for line in f: * (fword, eword, score1, score2) = line.split() */ - __pyx_k_tuple_40 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_40)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_40); + __pyx_k_tuple_43 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_43)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_43); __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_k_tuple_40, 0, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_k_tuple_43, 0, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_43)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":278 * @@ -60116,18 +61201,18 @@ static int __Pyx_InitCachedConstants(void) { * # first loop merely establishes size of array objects * for line in f: */ - __pyx_k_tuple_41 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_41)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_41); + __pyx_k_tuple_44 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_44); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_41, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_44, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_41, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_44, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_41, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_44, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_41)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":344 * @@ -60136,12 +61221,12 @@ static int __Pyx_InitCachedConstants(void) { * if i == j: #empty interval * return */ - __pyx_k_tuple_44 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_44)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_44); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_43)); - PyTuple_SET_ITEM(__pyx_k_tuple_44, 0, ((PyObject *)__pyx_kp_s_43)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_43)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44)); + __pyx_k_tuple_47 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_47)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_47); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_46)); + PyTuple_SET_ITEM(__pyx_k_tuple_47, 0, ((PyObject *)__pyx_kp_s_46)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_46)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_47)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":367 * for i in self.f_index: @@ -60150,12 +61235,12 @@ static int __Pyx_InitCachedConstants(void) { * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): * f.write("%d %f %f " % (i, s1, s2)) */ - __pyx_k_tuple_46 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_46)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_46); + __pyx_k_tuple_49 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_49)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_49); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_46, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_49, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_46)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_49)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":370 * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): @@ -60164,12 +61249,12 @@ static int __Pyx_InitCachedConstants(void) { * for i, w in enumerate(self.id2fword): * f.write("%d %s " % (i, w)) */ - __pyx_k_tuple_48 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_48)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_48); + __pyx_k_tuple_51 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_51); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_48, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_48)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":373 * for i, w in enumerate(self.id2fword): @@ -60178,12 +61263,12 @@ static int __Pyx_InitCachedConstants(void) { * for i, w in enumerate(self.id2eword): * f.write("%d %s " % (i, w)) */ - __pyx_k_tuple_50 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_50)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_50); + __pyx_k_tuple_53 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_53)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_53); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_50, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_53, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_50)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_53)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":376 * for i, w in enumerate(self.id2eword): @@ -60192,12 +61277,12 @@ static int __Pyx_InitCachedConstants(void) { * * */ - __pyx_k_tuple_51 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_51)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_51); + __pyx_k_tuple_54 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_54)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_54); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_51, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_54, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":364 * @@ -60206,18 +61291,18 @@ static int __Pyx_InitCachedConstants(void) { * for i in self.f_index: * f.write("%d " % i) */ - __pyx_k_tuple_52 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_52)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_52); + __pyx_k_tuple_55 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_55)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_55); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_55, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_55, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_52, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_55, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_52)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_55)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":409 * cdef i, N, e_id, f_id @@ -60226,18 +61311,18 @@ static int __Pyx_InitCachedConstants(void) { * N = len(self.e_index) * f_id = 0 */ - __pyx_k_tuple_54 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_54)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_54); + __pyx_k_tuple_57 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_57)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_57); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_54, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_57, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_54, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_57, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_54, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_57, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_57)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":13 * cdef IntList rank @@ -60246,12 +61331,12 @@ static int __Pyx_InitCachedConstants(void) { * self.sa = sa * n = self.sa.sa.len */ - __pyx_k_tuple_58 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_58)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_58); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_57)); - PyTuple_SET_ITEM(__pyx_k_tuple_58, 0, ((PyObject *)__pyx_kp_s_57)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_57)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_58)); + __pyx_k_tuple_61 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_61)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_61); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_60)); + PyTuple_SET_ITEM(__pyx_k_tuple_61, 0, ((PyObject *)__pyx_kp_s_60)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_60)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_61)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":34 * if h > 0: @@ -60260,12 +61345,12 @@ static int __Pyx_InitCachedConstants(void) { * * def compute_stats(self, int max_n): */ - __pyx_k_tuple_60 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_60)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_60); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_59)); - PyTuple_SET_ITEM(__pyx_k_tuple_60, 0, ((PyObject *)__pyx_kp_s_59)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_59)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_60)); + __pyx_k_tuple_63 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_63)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_63); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_62)); + PyTuple_SET_ITEM(__pyx_k_tuple_63, 0, ((PyObject *)__pyx_kp_s_62)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_62)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_63)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":297 * pattern_rank = {} @@ -60274,12 +61359,12 @@ static int __Pyx_InitCachedConstants(void) { * cdef float start_time = monitor_cpu() * */ - __pyx_k_tuple_71 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_71)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_71); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_70)); - PyTuple_SET_ITEM(__pyx_k_tuple_71, 0, ((PyObject *)__pyx_kp_s_70)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_70)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_71)); + __pyx_k_tuple_73 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_73)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_73); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_72)); + PyTuple_SET_ITEM(__pyx_k_tuple_73, 0, ((PyObject *)__pyx_kp_s_72)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_72)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_73)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":314 * queue = IntList(increment=1000) @@ -60288,12 +61373,12 @@ static int __Pyx_InitCachedConstants(void) { * N = len(data) * for i from 0 <= i < N: */ - __pyx_k_tuple_73 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_73)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_73); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_72)); - PyTuple_SET_ITEM(__pyx_k_tuple_73, 0, ((PyObject *)__pyx_kp_s_72)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_72)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_73)); + __pyx_k_tuple_75 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_75)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_75); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_74)); + PyTuple_SET_ITEM(__pyx_k_tuple_75, 0, ((PyObject *)__pyx_kp_s_74)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_74)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_75)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":329 * trie_node_data_append(node, i) @@ -60302,12 +61387,12 @@ static int __Pyx_InitCachedConstants(void) { * N = len(queue) * ptr1 = 0 */ - __pyx_k_tuple_75 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_75)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_75); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_74)); - PyTuple_SET_ITEM(__pyx_k_tuple_75, 0, ((PyObject *)__pyx_kp_s_74)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_74)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_75)); + __pyx_k_tuple_77 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_77)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_77); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_76)); + PyTuple_SET_ITEM(__pyx_k_tuple_77, 0, ((PyObject *)__pyx_kp_s_76)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_76)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_77)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":393 * for pattern2 in J_set: @@ -60316,12 +61401,12 @@ static int __Pyx_InitCachedConstants(void) { * J2_set.add(combined_pattern) * */ - __pyx_k_tuple_77 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_77)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_77); + __pyx_k_tuple_79 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_79)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_79); __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_k_tuple_77, 0, __pyx_int_neg_1); + PyTuple_SET_ITEM(__pyx_k_tuple_79, 0, __pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_77)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_79)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":400 * x = x+1 @@ -60330,12 +61415,12 @@ static int __Pyx_InitCachedConstants(void) { * IJ_set.add(combined_pattern) * */ - __pyx_k_tuple_78 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_78)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_78); + __pyx_k_tuple_80 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_80)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_80); __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_k_tuple_78, 0, __pyx_int_neg_1); + PyTuple_SET_ITEM(__pyx_k_tuple_80, 0, __pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_78)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_80)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":407 * x = x+2 @@ -60344,12 +61429,12 @@ static int __Pyx_InitCachedConstants(void) { * IJ_set.add(combined_pattern) * combined_pattern = pattern2 + (-1,) + pattern1 */ - __pyx_k_tuple_79 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_79)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_79); + __pyx_k_tuple_81 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_81)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_81); __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_k_tuple_79, 0, __pyx_int_neg_1); + PyTuple_SET_ITEM(__pyx_k_tuple_81, 0, __pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_79)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_81)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":409 * combined_pattern = pattern1 + (-1,) + pattern2 @@ -60358,12 +61443,12 @@ static int __Pyx_InitCachedConstants(void) { * IJ_set.add(combined_pattern) * */ - __pyx_k_tuple_80 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_80)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_80); + __pyx_k_tuple_82 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_82)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_82); __Pyx_INCREF(__pyx_int_neg_1); - PyTuple_SET_ITEM(__pyx_k_tuple_80, 0, __pyx_int_neg_1); + PyTuple_SET_ITEM(__pyx_k_tuple_82, 0, __pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_80)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_82)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":103 * @@ -60372,12 +61457,12 @@ static int __Pyx_InitCachedConstants(void) { * for i from 0 <= i < N: * j = isa.arr[i] */ - __pyx_k_tuple_91 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_91)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_91); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_90)); - PyTuple_SET_ITEM(__pyx_k_tuple_91, 0, ((PyObject *)__pyx_kp_s_90)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_90)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_91)); + __pyx_k_tuple_93 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_93)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_93); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_92)); + PyTuple_SET_ITEM(__pyx_k_tuple_93, 0, ((PyObject *)__pyx_kp_s_92)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_92)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_93)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":202 * for a_i in self.sa: @@ -60386,12 +61471,12 @@ static int __Pyx_InitCachedConstants(void) { * for w_i in self.ha: * f.write("%d " % w_i) */ - __pyx_k_tuple_94 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_94)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_94); + __pyx_k_tuple_96 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_96)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_96); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_94, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_96, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_94)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_96)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":205 * for w_i in self.ha: @@ -60400,12 +61485,12 @@ static int __Pyx_InitCachedConstants(void) { * * cdef int __search_high(self, int word_id, int offset, int low, int high): */ - __pyx_k_tuple_95 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_95)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_95); + __pyx_k_tuple_97 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_97)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_97); __Pyx_INCREF(((PyObject *)__pyx_kp_s_14)); - PyTuple_SET_ITEM(__pyx_k_tuple_95, 0, ((PyObject *)__pyx_kp_s_14)); + PyTuple_SET_ITEM(__pyx_k_tuple_97, 0, ((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_95)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_97)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":198 * @@ -60414,18 +61499,18 @@ static int __Pyx_InitCachedConstants(void) { * self.darray.write_enhanced_handle(f) * for a_i in self.sa: */ - __pyx_k_tuple_96 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_96)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_96); + __pyx_k_tuple_98 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_98)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_98); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_96, 0, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_98, 0, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_96, 1, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_98, 1, Py_None); __Pyx_GIVEREF(Py_None); __Pyx_INCREF(Py_None); - PyTuple_SET_ITEM(__pyx_k_tuple_96, 2, Py_None); + PyTuple_SET_ITEM(__pyx_k_tuple_98, 2, Py_None); __Pyx_GIVEREF(Py_None); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_96)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_98)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":92 * logger.info("Sampling strategy: uniform, max sample size = %d", sample_size) @@ -60434,12 +61519,12 @@ static int __Pyx_InitCachedConstants(void) { * * def sample(self, PhraseLocation phrase_location): */ - __pyx_k_tuple_100 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_100)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_100); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_99)); - PyTuple_SET_ITEM(__pyx_k_tuple_100, 0, ((PyObject *)__pyx_kp_s_99)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_99)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_100)); + __pyx_k_tuple_102 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_102)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_102); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_101)); + PyTuple_SET_ITEM(__pyx_k_tuple_102, 0, ((PyObject *)__pyx_kp_s_101)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_101)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_102)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":300 * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) @@ -60448,12 +61533,12 @@ static int __Pyx_InitCachedConstants(void) { * self.alignment = alignment * */ - __pyx_k_tuple_105 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_105)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_105); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_104)); - PyTuple_SET_ITEM(__pyx_k_tuple_105, 0, ((PyObject *)__pyx_kp_s_104)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_104)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_105)); + __pyx_k_tuple_107 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_107)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_107); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_106)); + PyTuple_SET_ITEM(__pyx_k_tuple_107, 0, ((PyObject *)__pyx_kp_s_106)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_106)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_107)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1004 * else: @@ -60462,12 +61547,12 @@ static int __Pyx_InitCachedConstants(void) { * # checking whether lookup_required * if lookup_required: */ - __pyx_k_tuple_120 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_120)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_120); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_119)); - PyTuple_SET_ITEM(__pyx_k_tuple_120, 0, ((PyObject *)__pyx_kp_s_119)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_119)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_120)); + __pyx_k_tuple_122 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_122)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_122); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_121)); + PyTuple_SET_ITEM(__pyx_k_tuple_122, 0, ((PyObject *)__pyx_kp_s_121)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_121)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_122)); /* "_sa.pyx":9 * resource.getrusage(resource.RUSAGE_SELF).ru_stime) @@ -60476,16 +61561,16 @@ static int __Pyx_InitCachedConstants(void) { * if filename.endswith('.gz'): * return gzip.GzipFile(filename) */ - __pyx_k_tuple_132 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_132)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_132); + __pyx_k_tuple_134 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_134)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_134); __Pyx_INCREF(((PyObject *)__pyx_n_s__filename)); - PyTuple_SET_ITEM(__pyx_k_tuple_132, 0, ((PyObject *)__pyx_n_s__filename)); + PyTuple_SET_ITEM(__pyx_k_tuple_134, 0, ((PyObject *)__pyx_n_s__filename)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__filename)); __Pyx_INCREF(((PyObject *)__pyx_n_s__filename)); - PyTuple_SET_ITEM(__pyx_k_tuple_132, 1, ((PyObject *)__pyx_n_s__filename)); + PyTuple_SET_ITEM(__pyx_k_tuple_134, 1, ((PyObject *)__pyx_n_s__filename)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__filename)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_132)); - __pyx_k_codeobj_133 = (PyObject*)__Pyx_PyCode_New(1, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_132, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_134, __pyx_n_s__gzip_or_text, 9, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_133)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_134)); + __pyx_k_codeobj_135 = (PyObject*)__Pyx_PyCode_New(1, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_134, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_136, __pyx_n_s__gzip_or_text, 9, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_135)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "_sa.pyx":15 * return open(filename) @@ -60494,12 +61579,12 @@ static int __Pyx_InitCachedConstants(void) { * * include "float_list.pxi" */ - __pyx_k_tuple_136 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_136)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_136); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_135)); - PyTuple_SET_ITEM(__pyx_k_tuple_136, 0, ((PyObject *)__pyx_kp_s_135)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_135)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_136)); + __pyx_k_tuple_138 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_138)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_138); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_137)); + PyTuple_SET_ITEM(__pyx_k_tuple_138, 0, ((PyObject *)__pyx_kp_s_137)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_137)); + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_138)); /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104 * return ALPHABET.setindex(sym, id) @@ -60507,16 +61592,16 @@ static int __Pyx_InitCachedConstants(void) { * def sym_fromstring(char* string, bint terminal): # <<<<<<<<<<<<<< * return ALPHABET.fromstring(string, terminal) */ - __pyx_k_tuple_137 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_137)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_k_tuple_137); + __pyx_k_tuple_139 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_139)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_k_tuple_139); __Pyx_INCREF(((PyObject *)__pyx_n_s__string)); - PyTuple_SET_ITEM(__pyx_k_tuple_137, 0, ((PyObject *)__pyx_n_s__string)); + PyTuple_SET_ITEM(__pyx_k_tuple_139, 0, ((PyObject *)__pyx_n_s__string)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__string)); __Pyx_INCREF(((PyObject *)__pyx_n_s__terminal)); - PyTuple_SET_ITEM(__pyx_k_tuple_137, 1, ((PyObject *)__pyx_n_s__terminal)); + PyTuple_SET_ITEM(__pyx_k_tuple_139, 1, ((PyObject *)__pyx_n_s__terminal)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__terminal)); - __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_137)); - __pyx_k_codeobj_138 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_137, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_139, __pyx_n_s__sym_fromstring, 104, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_138)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_139)); + __pyx_k_codeobj_140 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_139, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_141, __pyx_n_s__sym_fromstring, 104, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_140)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -60776,12 +61861,16 @@ PyMODINIT_FUNC PyInit__sa(void) if (__Pyx_SetVtable(__pyx_type_3_sa_HieroCachingRuleFactory.tp_dict, __pyx_vtabptr_3_sa_HieroCachingRuleFactory) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_SetAttrString(__pyx_m, "HieroCachingRuleFactory", (PyObject *)&__pyx_type_3_sa_HieroCachingRuleFactory) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_3_sa_HieroCachingRuleFactory = &__pyx_type_3_sa_HieroCachingRuleFactory; - if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct__compute_stats) < 0) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_ptype_3_sa___pyx_scope_struct__compute_stats = &__pyx_type_3_sa___pyx_scope_struct__compute_stats; - if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_1___iter__) < 0) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_ptype_3_sa___pyx_scope_struct_1___iter__ = &__pyx_type_3_sa___pyx_scope_struct_1___iter__; - if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_2_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_ptype_3_sa___pyx_scope_struct_2_input = &__pyx_type_3_sa___pyx_scope_struct_2_input; + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct__read_bitext) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_3_sa___pyx_scope_struct__read_bitext = &__pyx_type_3_sa___pyx_scope_struct__read_bitext; + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_1_genexpr) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_3_sa___pyx_scope_struct_1_genexpr = &__pyx_type_3_sa___pyx_scope_struct_1_genexpr; + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_2_compute_stats) < 0) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_3_sa___pyx_scope_struct_2_compute_stats = &__pyx_type_3_sa___pyx_scope_struct_2_compute_stats; + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_3___iter__) < 0) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_3_sa___pyx_scope_struct_3___iter__ = &__pyx_type_3_sa___pyx_scope_struct_3___iter__; + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_4_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_3_sa___pyx_scope_struct_4_input = &__pyx_type_3_sa___pyx_scope_struct_4_input; /*--- Type import code ---*/ /*--- Variable import code ---*/ /*--- Function import code ---*/ @@ -60844,7 +61933,7 @@ PyMODINIT_FUNC PyInit__sa(void) __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__getLogger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_136), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_k_tuple_138), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (PyObject_SetAttr(__pyx_m, __pyx_n_s__logger, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -60859,7 +61948,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_t_1 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_k_38 = __pyx_t_1; + __pyx_k_41 = __pyx_t_1; __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; @@ -60986,9 +62075,9 @@ PyMODINIT_FUNC PyInit__sa(void) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(((PyObject *)__pyx_kp_s_140)); - PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_kp_s_140)); - __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_140)); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_142)); + PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_kp_s_142)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_142)); PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; @@ -61009,7 +62098,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_t_2 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_k_97 = __pyx_t_2; + __pyx_k_99 = __pyx_t_2; __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; @@ -61451,6 +62540,10 @@ bad: return -1; } +static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname) { + PyErr_Format(PyExc_NameError, "free variable '%s' referenced before assignment in enclosing scope", varname); +} + static CYTHON_INLINE long __Pyx_div_long(long a, long b) { long q = a / b; long r = a - q*b; @@ -61836,6 +62929,86 @@ bad: return module; } +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + if (PyBytes_GET_SIZE(s1) != PyBytes_GET_SIZE(s2)) { + return (equals == Py_NE); + } else if (PyBytes_GET_SIZE(s1) == 1) { + if (equals == Py_EQ) + return (PyBytes_AS_STRING(s1)[0] == PyBytes_AS_STRING(s2)[0]); + else + return (PyBytes_AS_STRING(s1)[0] != PyBytes_AS_STRING(s2)[0]); + } else { + int result = memcmp(PyBytes_AS_STRING(s1), PyBytes_AS_STRING(s2), (size_t)PyBytes_GET_SIZE(s1)); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyUnicode_CheckExact(s1) & PyUnicode_CheckExact(s2)) { + #if CYTHON_PEP393_ENABLED + if ((PyUnicode_READY(s1) < 0) || (PyUnicode_READY(s2) < 0)) + return -1; + if (PyUnicode_GET_LENGTH(s1) != PyUnicode_GET_LENGTH(s2)) { + return (equals == Py_NE); + } else if (PyUnicode_GET_LENGTH(s1) == 1) { + Py_UCS4 ch1 = PyUnicode_READ_CHAR(s1, 0); + Py_UCS4 ch2 = PyUnicode_READ_CHAR(s2, 0); + return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2); + #else + if (PyUnicode_GET_SIZE(s1) != PyUnicode_GET_SIZE(s2)) { + return (equals == Py_NE); + } else if (PyUnicode_GET_SIZE(s1) == 1) { + Py_UNICODE ch1 = PyUnicode_AS_UNICODE(s1)[0]; + Py_UNICODE ch2 = PyUnicode_AS_UNICODE(s2)[0]; + return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2); + #endif + } else { + int result = PyUnicode_Compare(s1, s2); + if ((result == -1) && unlikely(PyErr_Occurred())) + return -1; + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyUnicode_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyUnicode_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject* x) { const unsigned char neg_one = (unsigned char)-1, const_zero = 0; const int is_unsigned = neg_one > const_zero; diff --git a/python/src/sa/data_array.pxi b/python/src/sa/data_array.pxi index 1c044694..7a102a7e 100644 --- a/python/src/sa/data_array.pxi +++ b/python/src/sa/data_array.pxi @@ -14,7 +14,7 @@ cdef class DataArray: cdef IntList sent_index cdef bint use_sent_id - def __cinit__(self, from_binary=None, from_text=None, bint use_sent_id=False): + def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} self.id2word = ["END_OF_FILE", "END_OF_LINE"] self.data = IntList(1000,1000) @@ -24,7 +24,10 @@ cdef class DataArray: if from_binary: self.read_binary(from_binary) elif from_text: - self.read_text(from_text) + if side: + self.read_bitext(from_text, (0 if side == 'source' else 1)) + else: + self.read_text(from_text) def __len__(self): return len(self.data) @@ -62,21 +65,30 @@ cdef class DataArray: f.write("\n") def read_text(self, char* filename): - cdef int word_count = 0 with gzip_or_text(filename) as fp: - for line_num, line in enumerate(fp): - self.sent_index.append(word_count) - for word in line.split(): - self.data.append(self.get_id(word)) - if self.use_sent_id: - self.sent_id.append(line_num) - word_count = word_count + 1 - self.data.append(1) + self.read_text_data(fp) + + def read_bitext(self, char* filename, int side): + with gzip_or_text(filename) as fp: + data = (line.split(' ||| ')[side] for line in fp) + self.read_text_data(data) + + def read_text_data(self, data): + cdef int word_count = 0 + for line_num, line in enumerate(data): + self.sent_index.append(word_count) + for word in line.split(): + self.data.append(self.get_id(word)) if self.use_sent_id: self.sent_id.append(line_num) word_count = word_count + 1 - self.data.append(0) - self.sent_index.append(word_count) + self.data.append(1) + if self.use_sent_id: + self.sent_id.append(line_num) + word_count = word_count + 1 + self.data.append(0) + self.sent_index.append(word_count) + def read_binary(self, char* filename): cdef FILE* f diff --git a/python/src/sa/suffix_array.pxi b/python/src/sa/suffix_array.pxi index 20e6261d..d86e8ea6 100644 --- a/python/src/sa/suffix_array.pxi +++ b/python/src/sa/suffix_array.pxi @@ -8,14 +8,14 @@ cdef class SuffixArray: cdef IntList sa cdef IntList ha - def __cinit__(self, from_binary=None, from_text=None): + def __cinit__(self, from_binary=None, from_text=None, side=None): self.darray = DataArray() self.sa = IntList() self.ha = IntList() if from_binary: self.read_binary(from_binary) elif from_text: - self.read_text(from_text) + self.read_text(from_text, side) def __getitem__(self, i): return self.sa.arr[i] @@ -29,13 +29,13 @@ cdef class SuffixArray: def getSentPos(self, loc): return self.darray.getSentPos(loc) - def read_text(self, char* filename): + def read_text(self, filename, side): '''Constructs suffix array using the algorithm of Larsson & Sadahkane (1999)''' cdef int V, N, i, j, h, a_i, n, current_run, skip cdef IntList isa, word_count - self.darray = DataArray(from_text=filename, use_sent_id=True) + self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) N = len(self.darray) V = len(self.darray.id2word) -- cgit v1.2.3 From 3325742971681642adadfac180f8e0e27014bd57 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Mon, 30 Jul 2012 00:04:02 -0400 Subject: [python] monolingual grammars --- python/pkg/cdec/__init__.py | 2 +- python/src/_cdec.cpp | 588 ++++++++++++++++++++++++++++++++++++++++---- python/src/grammar.pxi | 12 + 3 files changed, 549 insertions(+), 53 deletions(-) (limited to 'python/pkg/cdec') diff --git a/python/pkg/cdec/__init__.py b/python/pkg/cdec/__init__.py index 503ac787..531fea49 100644 --- a/python/pkg/cdec/__init__.py +++ b/python/pkg/cdec/__init__.py @@ -1 +1 @@ -from cdec._cdec import Decoder, Lattice, TRule, NT, NTRef, ParseFailed, InvalidConfig +from cdec._cdec import Decoder, Lattice, TRule, MRule, NT, NTRef, ParseFailed, InvalidConfig diff --git a/python/src/_cdec.cpp b/python/src/_cdec.cpp index 88c03520..dcd33448 100644 --- a/python/src/_cdec.cpp +++ b/python/src/_cdec.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.beta1 on Sat Jul 28 20:10:21 2012 */ +/* Generated by Cython 0.17.beta1 on Sun Jul 29 23:00:27 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -417,6 +417,7 @@ struct __pyx_obj_5_cdec_CandidateSet; struct __pyx_obj_5_cdec___pyx_scope_struct_14___get__; struct __pyx_obj_5_cdec_TRule; struct __pyx_obj_4cdec_2sa_3_sa_Rule; +struct __pyx_obj_5_cdec_MRule; struct __pyx_obj_5_cdec_SegmentEvaluator; struct __pyx_obj_5_cdec___pyx_scope_struct_20___iter__; struct __pyx_obj_5_cdec_Candidate; @@ -535,8 +536,8 @@ struct __pyx_obj_4cdec_2sa_3_sa_Phrase { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":169 - * _phrase(self.f), _phrase(self.e), scores) +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":181 + * super(MRule, self).__init__(lhs, rhs, e, scores, a) * * cdef class Grammar: # <<<<<<<<<<<<<< * cdef shared_ptr[grammar.Grammar]* grammar @@ -652,6 +653,18 @@ struct __pyx_obj_4cdec_2sa_3_sa_Rule { }; +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":169 + * _phrase(self.f), _phrase(self.e), scores) + * + * cdef class MRule(TRule): # <<<<<<<<<<<<<< + * def __init__(self, lhs, rhs, scores, a=None): + * cdef unsigned i = 1 + */ +struct __pyx_obj_5_cdec_MRule { + struct __pyx_obj_5_cdec_TRule __pyx_base; +}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/mteval.pxi":98 * self.cs.AddKBestCandidates(hypergraph.hg[0], k, self.scorer.get()) * @@ -898,7 +911,7 @@ struct __pyx_obj_5_cdec_DenseVector { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":175 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":187 * del self.grammar * * def __iter__(self): # <<<<<<<<<<<<<< @@ -1136,7 +1149,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_22__make_config { }; -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":192 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":204 * self.grammar.get().SetGrammarName(string(name)) * * cdef class TextGrammar(Grammar): # <<<<<<<<<<<<<< @@ -1693,6 +1706,7 @@ static PyTypeObject *__pyx_ptype_5_cdec_SparseVector = 0; static PyTypeObject *__pyx_ptype_5_cdec_NT = 0; static PyTypeObject *__pyx_ptype_5_cdec_NTRef = 0; static PyTypeObject *__pyx_ptype_5_cdec_TRule = 0; +static PyTypeObject *__pyx_ptype_5_cdec_MRule = 0; static PyTypeObject *__pyx_ptype_5_cdec_Grammar = 0; static PyTypeObject *__pyx_ptype_5_cdec_TextGrammar = 0; static PyTypeObject *__pyx_ptype_5_cdec_Hypergraph = 0; @@ -1746,6 +1760,7 @@ static PyObject *__pyx_builtin_TypeError; static PyObject *__pyx_builtin_KeyError; static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_NotImplemented; +static PyObject *__pyx_builtin_super; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_eval; static PyObject *__pyx_builtin_enumerate; @@ -1810,6 +1825,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_3lhs___get__(struct __pyx_obj_5_cdec_TRu static int __pyx_pf_5_cdec_5TRule_3lhs_2__set__(struct __pyx_obj_5_cdec_TRule *__pyx_v_self, PyObject *__pyx_v_lhs); /* proto */ static PyObject *__pyx_pf_5_cdec_5TRule_7__str___genexpr(PyObject *__pyx_self); /* proto */ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule *__pyx_v_self); /* proto */ +static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_v_self, PyObject *__pyx_v_lhs, PyObject *__pyx_v_rhs, PyObject *__pyx_v_scores, PyObject *__pyx_v_a); /* proto */ static void __pyx_pf_5_cdec_7Grammar___dealloc__(CYTHON_UNUSED struct __pyx_obj_5_cdec_Grammar *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_7Grammar_2__iter__(struct __pyx_obj_5_cdec_Grammar *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_7Grammar_4name___get__(struct __pyx_obj_5_cdec_Grammar *__pyx_v_self); /* proto */ @@ -1952,6 +1968,7 @@ static char __pyx_k__key[] = "key"; static char __pyx_k__lhs[] = "lhs"; static char __pyx_k__plf[] = "plf"; static char __pyx_k__ref[] = "ref"; +static char __pyx_k__rhs[] = "rhs"; static char __pyx_k__BLEU[] = "BLEU"; static char __pyx_k__eval[] = "eval"; static char __pyx_k__info[] = "info"; @@ -1973,6 +1990,7 @@ static char __pyx_k__rules[] = "rules"; static char __pyx_k__score[] = "score"; static char __pyx_k__split[] = "split"; static char __pyx_k__strip[] = "strip"; +static char __pyx_k__super[] = "super"; static char __pyx_k__value[] = "value"; static char __pyx_k__config[] = "config"; static char __pyx_k__csplit[] = "csplit"; @@ -1990,6 +2008,7 @@ static char __pyx_k__replace[] = "replace"; static char __pyx_k__IBM_BLEU[] = "IBM_BLEU"; static char __pyx_k__KeyError[] = "KeyError"; static char __pyx_k____exit__[] = "__exit__"; +static char __pyx_k____init__[] = "__init__"; static char __pyx_k____main__[] = "__main__"; static char __pyx_k____name__[] = "__name__"; static char __pyx_k____test__[] = "__test__"; @@ -2070,6 +2089,7 @@ static PyObject *__pyx_n_s__ValueError; static PyObject *__pyx_n_s____class__; static PyObject *__pyx_n_s____enter__; static PyObject *__pyx_n_s____exit__; +static PyObject *__pyx_n_s____init__; static PyObject *__pyx_n_s____main__; static PyObject *__pyx_n_s____name__; static PyObject *__pyx_n_s____test__; @@ -2124,6 +2144,7 @@ static PyObject *__pyx_n_s__range; static PyObject *__pyx_n_s__ref; static PyObject *__pyx_n_s__refs; static PyObject *__pyx_n_s__replace; +static PyObject *__pyx_n_s__rhs; static PyObject *__pyx_n_s__rules; static PyObject *__pyx_n_s__scfg; static PyObject *__pyx_n_s__score; @@ -2134,6 +2155,7 @@ static PyObject *__pyx_n_s__span; static PyObject *__pyx_n_s__split; static PyObject *__pyx_n_s__startswith; static PyObject *__pyx_n_s__strip; +static PyObject *__pyx_n_s__super; static PyObject *__pyx_n_s__tagger; static PyObject *__pyx_n_s__utf8; static PyObject *__pyx_n_s__value; @@ -7706,7 +7728,7 @@ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule * * return '%s ||| %s ||| %s ||| %s' % (self.lhs, * _phrase(self.f), _phrase(self.e), scores) # <<<<<<<<<<<<<< * - * cdef class Grammar: + * cdef class MRule(TRule): */ __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s___phrase); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); @@ -7773,6 +7795,292 @@ static PyObject *__pyx_pf_5_cdec_5TRule_4__str__(struct __pyx_obj_5_cdec_TRule * return __pyx_r; } +/* Python wrapper */ +static int __pyx_pw_5_cdec_5MRule_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_5_cdec_5MRule_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_lhs = 0; + PyObject *__pyx_v_rhs = 0; + PyObject *__pyx_v_scores = 0; + PyObject *__pyx_v_a = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__lhs,&__pyx_n_s__rhs,&__pyx_n_s__scores,&__pyx_n_s__a,0}; + PyObject* values[4] = {0,0,0,0}; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":170 + * + * cdef class MRule(TRule): + * def __init__(self, lhs, rhs, scores, a=None): # <<<<<<<<<<<<<< + * cdef unsigned i = 1 + * e = [] + */ + values[3] = ((PyObject *)Py_None); + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__lhs)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__rhs)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 4, 1); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__scores)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 4, 2); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 3: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__a); + if (value) { values[3] = value; kw_args--; } + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else { + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_lhs = values[0]; + __pyx_v_rhs = values[1]; + __pyx_v_scores = values[2]; + __pyx_v_a = values[3]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_cdec.MRule.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5_cdec_5MRule___init__(((struct __pyx_obj_5_cdec_MRule *)__pyx_v_self), __pyx_v_lhs, __pyx_v_rhs, __pyx_v_scores, __pyx_v_a); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_5_cdec_5MRule___init__(struct __pyx_obj_5_cdec_MRule *__pyx_v_self, PyObject *__pyx_v_lhs, PyObject *__pyx_v_rhs, PyObject *__pyx_v_scores, PyObject *__pyx_v_a) { + unsigned int __pyx_v_i; + PyObject *__pyx_v_e = NULL; + PyObject *__pyx_v_s = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *(*__pyx_t_3)(PyObject *); + PyObject *__pyx_t_4 = NULL; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + int __pyx_t_7; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__init__", 0); + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":171 + * cdef class MRule(TRule): + * def __init__(self, lhs, rhs, scores, a=None): + * cdef unsigned i = 1 # <<<<<<<<<<<<<< + * e = [] + * for s in rhs: + */ + __pyx_v_i = 1; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":172 + * def __init__(self, lhs, rhs, scores, a=None): + * cdef unsigned i = 1 + * e = [] # <<<<<<<<<<<<<< + * for s in rhs: + * if isinstance(s, NT): + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 172; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_e = __pyx_t_1; + __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":173 + * cdef unsigned i = 1 + * e = [] + * for s in rhs: # <<<<<<<<<<<<<< + * if isinstance(s, NT): + * e.append(NTRef(i)) + */ + if (PyList_CheckExact(__pyx_v_rhs) || PyTuple_CheckExact(__pyx_v_rhs)) { + __pyx_t_1 = __pyx_v_rhs; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; + __pyx_t_3 = NULL; + } else { + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_rhs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; + } + for (;;) { + if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) { + if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; + #else + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { + if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; + #else + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + #endif + } else { + __pyx_t_4 = __pyx_t_3(__pyx_t_1); + if (unlikely(!__pyx_t_4)) { + if (PyErr_Occurred()) { + if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[2]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_4); + } + __Pyx_XDECREF(__pyx_v_s); + __pyx_v_s = __pyx_t_4; + __pyx_t_4 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":174 + * e = [] + * for s in rhs: + * if isinstance(s, NT): # <<<<<<<<<<<<<< + * e.append(NTRef(i)) + * i += 1 + */ + __pyx_t_4 = ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NT)); + __Pyx_INCREF(__pyx_t_4); + __pyx_t_5 = __Pyx_TypeCheck(__pyx_v_s, __pyx_t_4); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_5) { + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":175 + * for s in rhs: + * if isinstance(s, NT): + * e.append(NTRef(i)) # <<<<<<<<<<<<<< + * i += 1 + * else: + */ + __pyx_t_4 = PyLong_FromUnsignedLong(__pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_4); + __Pyx_GIVEREF(__pyx_t_4); + __pyx_t_4 = 0; + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_NTRef)), ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; + __pyx_t_7 = PyList_Append(__pyx_v_e, __pyx_t_4); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":176 + * if isinstance(s, NT): + * e.append(NTRef(i)) + * i += 1 # <<<<<<<<<<<<<< + * else: + * e.append(s) + */ + __pyx_v_i = (__pyx_v_i + 1); + goto __pyx_L5; + } + /*else*/ { + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":178 + * i += 1 + * else: + * e.append(s) # <<<<<<<<<<<<<< + * super(MRule, self).__init__(lhs, rhs, e, scores, a) + * + */ + __pyx_t_7 = PyList_Append(__pyx_v_e, __pyx_v_s); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_L5:; + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":179 + * else: + * e.append(s) + * super(MRule, self).__init__(lhs, rhs, e, scores, a) # <<<<<<<<<<<<<< + * + * cdef class Grammar: + */ + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_MRule))); + PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)((PyObject*)__pyx_ptype_5_cdec_MRule))); + __Pyx_GIVEREF(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_MRule))); + __Pyx_INCREF(((PyObject *)__pyx_v_self)); + PyTuple_SET_ITEM(__pyx_t_1, 1, ((PyObject *)__pyx_v_self)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); + __pyx_t_4 = PyObject_Call(__pyx_builtin_super, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; + __pyx_t_1 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s____init__); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(5); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_v_lhs); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_lhs); + __Pyx_GIVEREF(__pyx_v_lhs); + __Pyx_INCREF(__pyx_v_rhs); + PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_rhs); + __Pyx_GIVEREF(__pyx_v_rhs); + __Pyx_INCREF(((PyObject *)__pyx_v_e)); + PyTuple_SET_ITEM(__pyx_t_4, 2, ((PyObject *)__pyx_v_e)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_e)); + __Pyx_INCREF(__pyx_v_scores); + PyTuple_SET_ITEM(__pyx_t_4, 3, __pyx_v_scores); + __Pyx_GIVEREF(__pyx_v_scores); + __Pyx_INCREF(__pyx_v_a); + PyTuple_SET_ITEM(__pyx_t_4, 4, __pyx_v_a); + __Pyx_GIVEREF(__pyx_v_a); + __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("_cdec.MRule.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_e); + __Pyx_XDECREF(__pyx_v_s); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + /* Python wrapper */ static void __pyx_pw_5_cdec_7Grammar_1__dealloc__(PyObject *__pyx_v_self); /*proto*/ static void __pyx_pw_5_cdec_7Grammar_1__dealloc__(PyObject *__pyx_v_self) { @@ -7782,7 +8090,7 @@ static void __pyx_pw_5_cdec_7Grammar_1__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":172 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":184 * cdef shared_ptr[grammar.Grammar]* grammar * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -7794,7 +8102,7 @@ static void __pyx_pf_5_cdec_7Grammar___dealloc__(CYTHON_UNUSED struct __pyx_obj_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":173 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":185 * * def __dealloc__(self): * del self.grammar # <<<<<<<<<<<<<< @@ -7818,7 +8126,7 @@ static PyObject *__pyx_pw_5_cdec_7Grammar_3__iter__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":175 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":187 * del self.grammar * * def __iter__(self): # <<<<<<<<<<<<<< @@ -7844,7 +8152,7 @@ static PyObject *__pyx_pf_5_cdec_7Grammar_2__iter__(struct __pyx_obj_5_cdec_Gram __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_7Grammar_4generator3, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_7Grammar_4generator3, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -7879,9 +8187,9 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":176 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":188 * * def __iter__(self): * cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot() # <<<<<<<<<<<<<< @@ -7890,7 +8198,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p */ __pyx_cur_scope->__pyx_v_root = __pyx_cur_scope->__pyx_v_self->grammar->get()->GetRoot(); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":189 * def __iter__(self): * cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot() * cdef grammar.const_RuleBin* rbin = root.GetRules() # <<<<<<<<<<<<<< @@ -7899,7 +8207,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p */ __pyx_cur_scope->__pyx_v_rbin = __pyx_cur_scope->__pyx_v_root->GetRules(); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":180 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":192 * cdef TRule trule * cdef unsigned i * for i in range(rbin.GetNumRules()): # <<<<<<<<<<<<<< @@ -7910,23 +8218,23 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":181 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":193 * cdef unsigned i * for i in range(rbin.GetNumRules()): * trule = TRule.__new__(TRule) # <<<<<<<<<<<<<< * trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i)) * yield trule */ - __pyx_t_3 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_TRule)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_TRule)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (!(likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5_cdec_TRule)))) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5_cdec_TRule)))) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_trule)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_trule)); __Pyx_GIVEREF(__pyx_t_3); __pyx_cur_scope->__pyx_v_trule = ((struct __pyx_obj_5_cdec_TRule *)__pyx_t_3); __pyx_t_3 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":182 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":194 * for i in range(rbin.GetNumRules()): * trule = TRule.__new__(TRule) * trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i)) # <<<<<<<<<<<<<< @@ -7935,7 +8243,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p */ __pyx_cur_scope->__pyx_v_trule->rule = new boost::shared_ptr(__pyx_cur_scope->__pyx_v_rbin->GetIthRule(__pyx_cur_scope->__pyx_v_i)); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":195 * trule = TRule.__new__(TRule) * trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i)) * yield trule # <<<<<<<<<<<<<< @@ -7954,7 +8262,7 @@ static PyObject *__pyx_gb_5_cdec_7Grammar_4generator3(__pyx_GeneratorObject *__p __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 183; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -7980,7 +8288,7 @@ static PyObject *__pyx_pw_5_cdec_7Grammar_4name_1__get__(PyObject *__pyx_v_self) return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":186 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":198 * * property name: * def __get__(self): # <<<<<<<<<<<<<< @@ -7993,7 +8301,7 @@ static PyObject *__pyx_pf_5_cdec_7Grammar_4name___get__(struct __pyx_obj_5_cdec_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__get__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":187 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":199 * property name: * def __get__(self): * self.grammar.get().GetGrammarName().c_str() # <<<<<<<<<<<<<< @@ -8019,7 +8327,7 @@ static int __pyx_pw_5_cdec_7Grammar_4name_3__set__(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":189 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":201 * self.grammar.get().GetGrammarName().c_str() * * def __set__(self, name): # <<<<<<<<<<<<<< @@ -8036,14 +8344,14 @@ static int __pyx_pf_5_cdec_7Grammar_4name_2__set__(struct __pyx_obj_5_cdec_Gramm int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":190 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":202 * * def __set__(self, name): * self.grammar.get().SetGrammarName(string(name)) # <<<<<<<<<<<<<< * * cdef class TextGrammar(Grammar): */ - __pyx_t_1 = PyBytes_AsString(__pyx_v_name); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_AsString(__pyx_v_name); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->grammar->get()->SetGrammarName(std::string(((char *)__pyx_t_1))); __pyx_r = 0; @@ -8081,7 +8389,7 @@ static int __pyx_pw_5_cdec_11TextGrammar_1__cinit__(PyObject *__pyx_v_self, PyOb else goto __pyx_L5_argtuple_error; } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { goto __pyx_L5_argtuple_error; @@ -8092,7 +8400,7 @@ static int __pyx_pw_5_cdec_11TextGrammar_1__cinit__(PyObject *__pyx_v_self, PyOb } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[2]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_cdec.TextGrammar.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -8103,7 +8411,7 @@ static int __pyx_pw_5_cdec_11TextGrammar_1__cinit__(PyObject *__pyx_v_self, PyOb return __pyx_r; } -/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":193 +/* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":205 * * cdef class TextGrammar(Grammar): * def __cinit__(self, rules): # <<<<<<<<<<<<<< @@ -8128,7 +8436,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":194 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":206 * cdef class TextGrammar(Grammar): * def __cinit__(self, rules): * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) # <<<<<<<<<<<<<< @@ -8137,7 +8445,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG */ __pyx_v_self->__pyx_base.grammar = new boost::shared_ptr(new TextGrammar()); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":195 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":207 * def __cinit__(self, rules): * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) * cdef grammar.TextGrammar* _g = self.grammar.get() # <<<<<<<<<<<<<< @@ -8146,7 +8454,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG */ __pyx_v__g = ((TextGrammar *)__pyx_v_self->__pyx_base.grammar->get()); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":196 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":208 * self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) * cdef grammar.TextGrammar* _g = self.grammar.get() * for trule in rules: # <<<<<<<<<<<<<< @@ -8157,7 +8465,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG __pyx_t_1 = __pyx_v_rules; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_rules); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_rules); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -8167,21 +8475,21 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; #endif } else { __pyx_t_4 = __pyx_t_3(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[2]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[2]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -8191,7 +8499,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG __pyx_v_trule = __pyx_t_4; __pyx_t_4 = 0; - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":197 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":209 * cdef grammar.TextGrammar* _g = self.grammar.get() * for trule in rules: * if isinstance(trule, _sa.Rule): # <<<<<<<<<<<<<< @@ -8204,17 +8512,17 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":198 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":210 * for trule in rules: * if isinstance(trule, _sa.Rule): * trule = convert_rule(trule) # <<<<<<<<<<<<<< * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') */ - if (!(likely(((__pyx_v_trule) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_trule, __pyx_ptype_4cdec_2sa_3_sa_Rule))))) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_trule) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_trule, __pyx_ptype_4cdec_2sa_3_sa_Rule))))) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_4 = __pyx_v_trule; __Pyx_INCREF(__pyx_t_4); - __pyx_t_6 = ((PyObject *)__pyx_f_5_cdec_convert_rule(((struct __pyx_obj_4cdec_2sa_3_sa_Rule *)__pyx_t_4))); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((PyObject *)__pyx_f_5_cdec_convert_rule(((struct __pyx_obj_4cdec_2sa_3_sa_Rule *)__pyx_t_4))); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_v_trule); @@ -8223,7 +8531,7 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG goto __pyx_L5; } - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":199 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":211 * if isinstance(trule, _sa.Rule): * trule = convert_rule(trule) * elif not isinstance(trule, TRule): # <<<<<<<<<<<<<< @@ -8237,22 +8545,22 @@ static int __pyx_pf_5_cdec_11TextGrammar___cinit__(struct __pyx_obj_5_cdec_TextG __pyx_t_7 = (!__pyx_t_5); if (__pyx_t_7) { - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":200 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":212 * trule = convert_rule(trule) * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< * _g.AddRule(( trule).rule[0]) */ - __pyx_t_6 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_14), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_14), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_Raise(__pyx_t_6, 0, 0, 0); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - {__pyx_filename = __pyx_f[2]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[2]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L5; } __pyx_L5:; - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":201 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":213 * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') * _g.AddRule(( trule).rule[0]) # <<<<<<<<<<<<<< @@ -19428,6 +19736,174 @@ static PyTypeObject __pyx_type_5_cdec_TRule = { #endif }; +static PyObject *__pyx_tp_new_5_cdec_MRule(PyTypeObject *t, PyObject *a, PyObject *k) { + PyObject *o = __pyx_tp_new_5_cdec_TRule(t, a, k); + if (!o) return 0; + return o; +} + +static PyMethodDef __pyx_methods_5_cdec_MRule[] = { + {0, 0, 0, 0} +}; + +static PyNumberMethods __pyx_tp_as_number_MRule = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_divide*/ + #endif + 0, /*nb_remainder*/ + 0, /*nb_divmod*/ + 0, /*nb_power*/ + 0, /*nb_negative*/ + 0, /*nb_positive*/ + 0, /*nb_absolute*/ + 0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_coerce*/ + #endif + 0, /*nb_int*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_long*/ + #else + 0, /*reserved*/ + #endif + 0, /*nb_float*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_oct*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*nb_hex*/ + #endif + 0, /*nb_inplace_add*/ + 0, /*nb_inplace_subtract*/ + 0, /*nb_inplace_multiply*/ + #if PY_MAJOR_VERSION < 3 + 0, /*nb_inplace_divide*/ + #endif + 0, /*nb_inplace_remainder*/ + 0, /*nb_inplace_power*/ + 0, /*nb_inplace_lshift*/ + 0, /*nb_inplace_rshift*/ + 0, /*nb_inplace_and*/ + 0, /*nb_inplace_xor*/ + 0, /*nb_inplace_or*/ + 0, /*nb_floor_divide*/ + 0, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + #if PY_VERSION_HEX >= 0x02050000 + 0, /*nb_index*/ + #endif +}; + +static PySequenceMethods __pyx_tp_as_sequence_MRule = { + 0, /*sq_length*/ + 0, /*sq_concat*/ + 0, /*sq_repeat*/ + 0, /*sq_item*/ + 0, /*sq_slice*/ + 0, /*sq_ass_item*/ + 0, /*sq_ass_slice*/ + 0, /*sq_contains*/ + 0, /*sq_inplace_concat*/ + 0, /*sq_inplace_repeat*/ +}; + +static PyMappingMethods __pyx_tp_as_mapping_MRule = { + 0, /*mp_length*/ + 0, /*mp_subscript*/ + 0, /*mp_ass_subscript*/ +}; + +static PyBufferProcs __pyx_tp_as_buffer_MRule = { + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getreadbuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getwritebuffer*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getsegcount*/ + #endif + #if PY_MAJOR_VERSION < 3 + 0, /*bf_getcharbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_getbuffer*/ + #endif + #if PY_VERSION_HEX >= 0x02060000 + 0, /*bf_releasebuffer*/ + #endif +}; + +static PyTypeObject __pyx_type_5_cdec_MRule = { + PyVarObject_HEAD_INIT(0, 0) + __Pyx_NAMESTR("_cdec.MRule"), /*tp_name*/ + sizeof(struct __pyx_obj_5_cdec_MRule), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_5_cdec_TRule, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #else + 0, /*reserved*/ + #endif + 0, /*tp_repr*/ + &__pyx_tp_as_number_MRule, /*tp_as_number*/ + &__pyx_tp_as_sequence_MRule, /*tp_as_sequence*/ + &__pyx_tp_as_mapping_MRule, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + #if CYTHON_COMPILING_IN_PYPY + __pyx_pw_5_cdec_5TRule_5__str__, /*tp_str*/ + #else + 0, /*tp_str*/ + #endif + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &__pyx_tp_as_buffer_MRule, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_5_cdec_MRule, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + __pyx_pw_5_cdec_5MRule_1__init__, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_5_cdec_MRule, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + #if PY_VERSION_HEX >= 0x02060000 + 0, /*tp_version_tag*/ + #endif +}; + static PyObject *__pyx_tp_new_5_cdec_Grammar(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { PyObject *o = (*t->tp_alloc)(t, 0); if (!o) return 0; @@ -27070,6 +27546,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s____class__, __pyx_k____class__, sizeof(__pyx_k____class__), 0, 0, 1, 1}, {&__pyx_n_s____enter__, __pyx_k____enter__, sizeof(__pyx_k____enter__), 0, 0, 1, 1}, {&__pyx_n_s____exit__, __pyx_k____exit__, sizeof(__pyx_k____exit__), 0, 0, 1, 1}, + {&__pyx_n_s____init__, __pyx_k____init__, sizeof(__pyx_k____init__), 0, 0, 1, 1}, {&__pyx_n_s____main__, __pyx_k____main__, sizeof(__pyx_k____main__), 0, 0, 1, 1}, {&__pyx_n_s____name__, __pyx_k____name__, sizeof(__pyx_k____name__), 0, 0, 1, 1}, {&__pyx_n_s____test__, __pyx_k____test__, sizeof(__pyx_k____test__), 0, 0, 1, 1}, @@ -27124,6 +27601,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__ref, __pyx_k__ref, sizeof(__pyx_k__ref), 0, 0, 1, 1}, {&__pyx_n_s__refs, __pyx_k__refs, sizeof(__pyx_k__refs), 0, 0, 1, 1}, {&__pyx_n_s__replace, __pyx_k__replace, sizeof(__pyx_k__replace), 0, 0, 1, 1}, + {&__pyx_n_s__rhs, __pyx_k__rhs, sizeof(__pyx_k__rhs), 0, 0, 1, 1}, {&__pyx_n_s__rules, __pyx_k__rules, sizeof(__pyx_k__rules), 0, 0, 1, 1}, {&__pyx_n_s__scfg, __pyx_k__scfg, sizeof(__pyx_k__scfg), 0, 0, 1, 1}, {&__pyx_n_s__score, __pyx_k__score, sizeof(__pyx_k__score), 0, 0, 1, 1}, @@ -27134,6 +27612,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__split, __pyx_k__split, sizeof(__pyx_k__split), 0, 0, 1, 1}, {&__pyx_n_s__startswith, __pyx_k__startswith, sizeof(__pyx_k__startswith), 0, 0, 1, 1}, {&__pyx_n_s__strip, __pyx_k__strip, sizeof(__pyx_k__strip), 0, 0, 1, 1}, + {&__pyx_n_s__super, __pyx_k__super, sizeof(__pyx_k__super), 0, 0, 1, 1}, {&__pyx_n_s__tagger, __pyx_k__tagger, sizeof(__pyx_k__tagger), 0, 0, 1, 1}, {&__pyx_n_s__utf8, __pyx_k__utf8, sizeof(__pyx_k__utf8), 0, 0, 1, 1}, {&__pyx_n_s__value, __pyx_k__value, sizeof(__pyx_k__value), 0, 0, 1, 1}, @@ -27146,7 +27625,8 @@ static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_KeyError = __Pyx_GetName(__pyx_b, __pyx_n_s__KeyError); if (!__pyx_builtin_KeyError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_NotImplemented = __Pyx_GetName(__pyx_b, __pyx_n_s__NotImplemented); if (!__pyx_builtin_NotImplemented) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 89; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_super = __Pyx_GetName(__pyx_b, __pyx_n_s__super); if (!__pyx_builtin_super) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_eval = __Pyx_GetName(__pyx_b, __pyx_n_s__eval); if (!__pyx_builtin_eval) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_IndexError = __Pyx_GetName(__pyx_b, __pyx_n_s__IndexError); if (!__pyx_builtin_IndexError) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -27202,13 +27682,13 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_6)); - /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":200 + /* "/Users/vchahun/Sandbox/cdec/python/src/grammar.pxi":212 * trule = convert_rule(trule) * elif not isinstance(trule, TRule): * raise ValueError('the grammar should contain TRule objects') # <<<<<<<<<<<<<< * _g.AddRule(( trule).rule[0]) */ - __pyx_k_tuple_14 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_14)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 200; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_14 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_14)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_14); __Pyx_INCREF(((PyObject *)__pyx_kp_s_13)); PyTuple_SET_ITEM(__pyx_k_tuple_14, 0, ((PyObject *)__pyx_kp_s_13)); @@ -27618,12 +28098,16 @@ PyMODINIT_FUNC PyInit__cdec(void) if (PyType_Ready(&__pyx_type_5_cdec_TRule) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_SetAttrString(__pyx_m, "TRule", (PyObject *)&__pyx_type_5_cdec_TRule) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_TRule = &__pyx_type_5_cdec_TRule; - if (PyType_Ready(&__pyx_type_5_cdec_Grammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "Grammar", (PyObject *)&__pyx_type_5_cdec_Grammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_type_5_cdec_MRule.tp_base = __pyx_ptype_5_cdec_TRule; + if (PyType_Ready(&__pyx_type_5_cdec_MRule) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "MRule", (PyObject *)&__pyx_type_5_cdec_MRule) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_5_cdec_MRule = &__pyx_type_5_cdec_MRule; + if (PyType_Ready(&__pyx_type_5_cdec_Grammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "Grammar", (PyObject *)&__pyx_type_5_cdec_Grammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_Grammar = &__pyx_type_5_cdec_Grammar; __pyx_type_5_cdec_TextGrammar.tp_base = __pyx_ptype_5_cdec_Grammar; - if (PyType_Ready(&__pyx_type_5_cdec_TextGrammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "TextGrammar", (PyObject *)&__pyx_type_5_cdec_TextGrammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_TextGrammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "TextGrammar", (PyObject *)&__pyx_type_5_cdec_TextGrammar) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_TextGrammar = &__pyx_type_5_cdec_TextGrammar; if (PyType_Ready(&__pyx_type_5_cdec_Hypergraph) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_SetAttrString(__pyx_m, "Hypergraph", (PyObject *)&__pyx_type_5_cdec_Hypergraph) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -27678,7 +28162,7 @@ PyMODINIT_FUNC PyInit__cdec(void) __pyx_ptype_5_cdec___pyx_scope_struct_5___str__ = &__pyx_type_5_cdec___pyx_scope_struct_5___str__; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_6_genexpr) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_6_genexpr = &__pyx_type_5_cdec___pyx_scope_struct_6_genexpr; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_7___iter__) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_7___iter__) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_7___iter__ = &__pyx_type_5_cdec___pyx_scope_struct_7___iter__; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_8_kbest) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_8_kbest = &__pyx_type_5_cdec___pyx_scope_struct_8_kbest; diff --git a/python/src/grammar.pxi b/python/src/grammar.pxi index 59266238..a9a5ea14 100644 --- a/python/src/grammar.pxi +++ b/python/src/grammar.pxi @@ -166,6 +166,18 @@ cdef class TRule: return '%s ||| %s ||| %s ||| %s' % (self.lhs, _phrase(self.f), _phrase(self.e), scores) +cdef class MRule(TRule): + def __init__(self, lhs, rhs, scores, a=None): + cdef unsigned i = 1 + e = [] + for s in rhs: + if isinstance(s, NT): + e.append(NTRef(i)) + i += 1 + else: + e.append(s) + super(MRule, self).__init__(lhs, rhs, e, scores, a) + cdef class Grammar: cdef shared_ptr[grammar.Grammar]* grammar -- cgit v1.2.3