Merge remote-tracking branch 'upstream/master'

author: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2012-08-01 17:32:37 +0200
committer: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2012-08-01 17:32:37 +0200
commit: 3f8e33cfe481a09c121a410e66a6074b5d05683e (patch)
tree: a41ecaf0bbb69fa91a581623abe89d41219c04f8 /python/pkg/cdec
parent: c139ce495861bb341e1b86a85ad4559f9ad53c14 (diff)
parent: 9fe0219562e5db25171cce8776381600ff9a5649 (diff)
8 files changed, 2746 insertions, 0 deletions
diff --git a/python/pkg/cdec/__init__.py b/python/pkg/cdec/__init__.py
new file mode 100644
index 00000000..531fea49
--- /dev/null
+++ b/python/pkg/cdec/__init__.py
@@ -0,0 +1 @@
+from cdec._cdec import Decoder, Lattice, TRule, MRule, NT, NTRef, ParseFailed, InvalidConfig
diff --git a/python/pkg/cdec/configobj.py b/python/pkg/cdec/configobj.py
new file mode 100644
index 00000000..c1f6e6df
--- /dev/null
+++ b/python/pkg/cdec/configobj.py
@@ -0,0 +1,2468 @@
+# configobj.py
+# A config file reader/writer that supports nested sections in config files.
+# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa
+# E-mail: fuzzyman AT voidspace DOT org DOT uk
+#         nico AT tekNico DOT net
+
+# ConfigObj 4
+# http://www.voidspace.org.uk/python/configobj.html
+
+# Released subject to the BSD License
+# Please see http://www.voidspace.org.uk/python/license.shtml
+
+# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml
+# For information about bugfixes, updates and support, please join the
+# ConfigObj mailing list:
+# http://lists.sourceforge.net/lists/listinfo/configobj-develop
+# Comments, suggestions and bug reports welcome.
+
+from __future__ import generators
+
+import os
+import re
+import sys
+
+from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE
+
+
+# imported lazily to avoid startup performance hit if it isn't used
+compiler = None
+
+# A dictionary mapping BOM to
+# the encoding to decode with, and what to set the
+# encoding attribute to.
+BOMS = {
+    BOM_UTF8: ('utf_8', None),
+    BOM_UTF16_BE: ('utf16_be', 'utf_16'),
+    BOM_UTF16_LE: ('utf16_le', 'utf_16'),
+    BOM_UTF16: ('utf_16', 'utf_16'),
+    }
+# All legal variants of the BOM codecs.
+# TODO: the list of aliases is not meant to be exhaustive, is there a
+#   better way ?
+BOM_LIST = {
+    'utf_16': 'utf_16',
+    'u16': 'utf_16',
+    'utf16': 'utf_16',
+    'utf-16': 'utf_16',
+    'utf16_be': 'utf16_be',
+    'utf_16_be': 'utf16_be',
+    'utf-16be': 'utf16_be',
+    'utf16_le': 'utf16_le',
+    'utf_16_le': 'utf16_le',
+    'utf-16le': 'utf16_le',
+    'utf_8': 'utf_8',
+    'u8': 'utf_8',
+    'utf': 'utf_8',
+    'utf8': 'utf_8',
+    'utf-8': 'utf_8',
+    }
+
+# Map of encodings to the BOM to write.
+BOM_SET = {
+    'utf_8': BOM_UTF8,
+    'utf_16': BOM_UTF16,
+    'utf16_be': BOM_UTF16_BE,
+    'utf16_le': BOM_UTF16_LE,
+    None: BOM_UTF8
+    }
+
+
+def match_utf8(encoding):
+    return BOM_LIST.get(encoding.lower()) == 'utf_8'
+
+
+# Quote strings used for writing values
+squot = "'%s'"
+dquot = '"%s"'
+noquot = "%s"
+wspace_plus = ' \r\n\v\t\'"'
+tsquot = '"""%s"""'
+tdquot = "'''%s'''"
+
+# Sentinel for use in getattr calls to replace hasattr
+MISSING = object()
+
+__version__ = '4.7.2'
+
+try:
+    any
+except NameError:
+    def any(iterable):
+        for entry in iterable:
+            if entry:
+                return True
+        return False
+
+
+__all__ = (
+    '__version__',
+    'DEFAULT_INDENT_TYPE',
+    'DEFAULT_INTERPOLATION',
+    'ConfigObjError',
+    'NestingError',
+    'ParseError',
+    'DuplicateError',
+    'ConfigspecError',
+    'ConfigObj',
+    'SimpleVal',
+    'InterpolationError',
+    'InterpolationLoopError',
+    'MissingInterpolationOption',
+    'RepeatSectionError',
+    'ReloadError',
+    'UnreprError',
+    'UnknownType',
+    'flatten_errors',
+    'get_extra_values'
+)
+
+DEFAULT_INTERPOLATION = 'configparser'
+DEFAULT_INDENT_TYPE = '    '
+MAX_INTERPOL_DEPTH = 10
+
+OPTION_DEFAULTS = {
+    'interpolation': True,
+    'raise_errors': False,
+    'list_values': True,
+    'create_empty': False,
+    'file_error': False,
+    'configspec': None,
+    'stringify': True,
+    # option may be set to one of ('', ' ', '\t')
+    'indent_type': None,
+    'encoding': None,
+    'default_encoding': None,
+    'unrepr': False,
+    'write_empty_values': False,
+}
+
+
+
+def getObj(s):
+    global compiler
+    if compiler is None:
+        import compiler
+    s = "a=" + s
+    p = compiler.parse(s)
+    return p.getChildren()[1].getChildren()[0].getChildren()[1]
+
+
+class UnknownType(Exception):
+    pass
+
+
+class Builder(object):
+    
+    def build(self, o):
+        m = getattr(self, 'build_' + o.__class__.__name__, None)
+        if m is None:
+            raise UnknownType(o.__class__.__name__)
+        return m(o)
+    
+    def build_List(self, o):
+        return map(self.build, o.getChildren())
+    
+    def build_Const(self, o):
+        return o.value
+    
+    def build_Dict(self, o):
+        d = {}
+        i = iter(map(self.build, o.getChildren()))
+        for el in i:
+            d[el] = i.next()
+        return d
+    
+    def build_Tuple(self, o):
+        return tuple(self.build_List(o))
+    
+    def build_Name(self, o):
+        if o.name == 'None':
+            return None
+        if o.name == 'True':
+            return True
+        if o.name == 'False':
+            return False
+        
+        # An undefined Name
+        raise UnknownType('Undefined Name')
+    
+    def build_Add(self, o):
+        real, imag = map(self.build_Const, o.getChildren())
+        try:
+            real = float(real)
+        except TypeError:
+            raise UnknownType('Add')
+        if not isinstance(imag, complex) or imag.real != 0.0:
+            raise UnknownType('Add')
+        return real+imag
+    
+    def build_Getattr(self, o):
+        parent = self.build(o.expr)
+        return getattr(parent, o.attrname)
+    
+    def build_UnarySub(self, o):
+        return -self.build_Const(o.getChildren()[0])
+    
+    def build_UnaryAdd(self, o):
+        return self.build_Const(o.getChildren()[0])
+
+
+_builder = Builder()
+
+
+def unrepr(s):
+    if not s:
+        return s
+    return _builder.build(getObj(s))
+
+
+
+class ConfigObjError(SyntaxError):
+    """
+    This is the base class for all errors that ConfigObj raises.
+    It is a subclass of SyntaxError.
+    """
+    def __init__(self, message='', line_number=None, line=''):
+        self.line = line
+        self.line_number = line_number
+        SyntaxError.__init__(self, message)
+
+
+class NestingError(ConfigObjError):
+    """
+    This error indicates a level of nesting that doesn't match.
+    """
+
+
+class ParseError(ConfigObjError):
+    """
+    This error indicates that a line is badly written.
+    It is neither a valid ``key = value`` line,
+    nor a valid section marker line.
+    """
+
+
+class ReloadError(IOError):
+    """
+    A 'reload' operation failed.
+    This exception is a subclass of ``IOError``.
+    """
+    def __init__(self):
+        IOError.__init__(self, 'reload failed, filename is not set.')
+
+
+class DuplicateError(ConfigObjError):
+    """
+    The keyword or section specified already exists.
+    """
+
+
+class ConfigspecError(ConfigObjError):
+    """
+    An error occured whilst parsing a configspec.
+    """
+
+
+class InterpolationError(ConfigObjError):
+    """Base class for the two interpolation errors."""
+
+
+class InterpolationLoopError(InterpolationError):
+    """Maximum interpolation depth exceeded in string interpolation."""
+
+    def __init__(self, option):
+        InterpolationError.__init__(
+            self,
+            'interpolation loop detected in value "%s".' % option)
+
+
+class RepeatSectionError(ConfigObjError):
+    """
+    This error indicates additional sections in a section with a
+    ``__many__`` (repeated) section.
+    """
+
+
+class MissingInterpolationOption(InterpolationError):
+    """A value specified for interpolation was missing."""
+    def __init__(self, option):
+        msg = 'missing option "%s" in interpolation.' % option
+        InterpolationError.__init__(self, msg)
+
+
+class UnreprError(ConfigObjError):
+    """An error parsing in unrepr mode."""
+
+
+
+class InterpolationEngine(object):
+    """
+    A helper class to help perform string interpolation.
+
+    This class is an abstract base class; its descendants perform
+    the actual work.
+    """
+
+    # compiled regexp to use in self.interpolate()
+    _KEYCRE = re.compile(r"%\(([^)]*)\)s")
+    _cookie = '%'
+
+    def __init__(self, section):
+        # the Section instance that "owns" this engine
+        self.section = section
+
+
+    def interpolate(self, key, value):
+        # short-cut
+        if not self._cookie in value:
+            return value
+        
+        def recursive_interpolate(key, value, section, backtrail):
+            """The function that does the actual work.
+
+            ``value``: the string we're trying to interpolate.
+            ``section``: the section in which that string was found
+            ``backtrail``: a dict to keep track of where we've been,
+            to detect and prevent infinite recursion loops
+
+            This is similar to a depth-first-search algorithm.
+            """
+            # Have we been here already?
+            if (key, section.name) in backtrail:
+                # Yes - infinite loop detected
+                raise InterpolationLoopError(key)
+            # Place a marker on our backtrail so we won't come back here again
+            backtrail[(key, section.name)] = 1
+
+            # Now start the actual work
+            match = self._KEYCRE.search(value)
+            while match:
+                # The actual parsing of the match is implementation-dependent,
+                # so delegate to our helper function
+                k, v, s = self._parse_match(match)
+                if k is None:
+                    # That's the signal that no further interpolation is needed
+                    replacement = v
+                else:
+                    # Further interpolation may be needed to obtain final value
+                    replacement = recursive_interpolate(k, v, s, backtrail)
+                # Replace the matched string with its final value
+                start, end = match.span()
+                value = ''.join((value[:start], replacement, value[end:]))
+                new_search_start = start + len(replacement)
+                # Pick up the next interpolation key, if any, for next time
+                # through the while loop
+                match = self._KEYCRE.search(value, new_search_start)
+
+            # Now safe to come back here again; remove marker from backtrail
+            del backtrail[(key, section.name)]
+
+            return value
+
+        # Back in interpolate(), all we have to do is kick off the recursive
+        # function with appropriate starting values
+        value = recursive_interpolate(key, value, self.section, {})
+        return value
+
+
+    def _fetch(self, key):
+        """Helper function to fetch values from owning section.
+
+        Returns a 2-tuple: the value, and the section where it was found.
+        """
+        # switch off interpolation before we try and fetch anything !
+        save_interp = self.section.main.interpolation
+        self.section.main.interpolation = False
+
+        # Start at section that "owns" this InterpolationEngine
+        current_section = self.section
+        while True:
+            # try the current section first
+            val = current_section.get(key)
+            if val is not None and not isinstance(val, Section):
+                break
+            # try "DEFAULT" next
+            val = current_section.get('DEFAULT', {}).get(key)
+            if val is not None and not isinstance(val, Section):
+                break
+            # move up to parent and try again
+            # top-level's parent is itself
+            if current_section.parent is current_section:
+                # reached top level, time to give up
+                break
+            current_section = current_section.parent
+
+        # restore interpolation to previous value before returning
+        self.section.main.interpolation = save_interp
+        if val is None:
+            raise MissingInterpolationOption(key)
+        return val, current_section
+
+
+    def _parse_match(self, match):
+        """Implementation-dependent helper function.
+
+        Will be passed a match object corresponding to the interpolation
+        key we just found (e.g., "%(foo)s" or "$foo"). Should look up that
+        key in the appropriate config file section (using the ``_fetch()``
+        helper function) and return a 3-tuple: (key, value, section)
+
+        ``key`` is the name of the key we're looking for
+        ``value`` is the value found for that key
+        ``section`` is a reference to the section where it was found
+
+        ``key`` and ``section`` should be None if no further
+        interpolation should be performed on the resulting value
+        (e.g., if we interpolated "$$" and returned "$").
+        """
+        raise NotImplementedError()
+    
+
+
+class ConfigParserInterpolation(InterpolationEngine):
+    """Behaves like ConfigParser."""
+    _cookie = '%'
+    _KEYCRE = re.compile(r"%\(([^)]*)\)s")
+
+    def _parse_match(self, match):
+        key = match.group(1)
+        value, section = self._fetch(key)
+        return key, value, section
+
+
+
+class TemplateInterpolation(InterpolationEngine):
+    """Behaves like string.Template."""
+    _cookie = '$'
+    _delimiter = '$'
+    _KEYCRE = re.compile(r"""
+        \$(?:
+          (?P<escaped>\$)              |   # Two $ signs
+          (?P<named>[_a-z][_a-z0-9]*)  |   # $name format
+          {(?P<braced>[^}]*)}              # ${name} format
+        )
+        """, re.IGNORECASE | re.VERBOSE)
+
+    def _parse_match(self, match):
+        # Valid name (in or out of braces): fetch value from section
+        key = match.group('named') or match.group('braced')
+        if key is not None:
+            value, section = self._fetch(key)
+            return key, value, section
+        # Escaped delimiter (e.g., $$): return single delimiter
+        if match.group('escaped') is not None:
+            # Return None for key and section to indicate it's time to stop
+            return None, self._delimiter, None
+        # Anything else: ignore completely, just return it unchanged
+        return None, match.group(), None
+
+
+interpolation_engines = {
+    'configparser': ConfigParserInterpolation,
+    'template': TemplateInterpolation,
+}
+
+
+def __newobj__(cls, *args):
+    # Hack for pickle
+    return cls.__new__(cls, *args) 
+
+class Section(dict):
+    """
+    A dictionary-like object that represents a section in a config file.
+    
+    It does string interpolation if the 'interpolation' attribute
+    of the 'main' object is set to True.
+    
+    Interpolation is tried first from this object, then from the 'DEFAULT'
+    section of this object, next from the parent and its 'DEFAULT' section,
+    and so on until the main object is reached.
+    
+    A Section will behave like an ordered dictionary - following the
+    order of the ``scalars`` and ``sections`` attributes.
+    You can use this to change the order of members.
+    
+    Iteration follows the order: scalars, then sections.
+    """
+
+    
+    def __setstate__(self, state):
+        dict.update(self, state[0])
+        self.__dict__.update(state[1])
+
+    def __reduce__(self):
+        state = (dict(self), self.__dict__)
+        return (__newobj__, (self.__class__,), state)
+    
+    
+    def __init__(self, parent, depth, main, indict=None, name=None):
+        """
+        * parent is the section above
+        * depth is the depth level of this section
+        * main is the main ConfigObj
+        * indict is a dictionary to initialise the section with
+        """
+        if indict is None:
+            indict = {}
+        dict.__init__(self)
+        # used for nesting level *and* interpolation
+        self.parent = parent
+        # used for the interpolation attribute
+        self.main = main
+        # level of nesting depth of this Section
+        self.depth = depth
+        # purely for information
+        self.name = name
+        #
+        self._initialise()
+        # we do this explicitly so that __setitem__ is used properly
+        # (rather than just passing to ``dict.__init__``)
+        for entry, value in indict.iteritems():
+            self[entry] = value
+            
+            
+    def _initialise(self):
+        # the sequence of scalar values in this Section
+        self.scalars = []
+        # the sequence of sections in this Section
+        self.sections = []
+        # for comments :-)
+        self.comments = {}
+        self.inline_comments = {}
+        # the configspec
+        self.configspec = None
+        # for defaults
+        self.defaults = []
+        self.default_values = {}
+        self.extra_values = []
+        self._created = False
+
+
+    def _interpolate(self, key, value):
+        try:
+            # do we already have an interpolation engine?
+            engine = self._interpolation_engine
+        except AttributeError:
+            # not yet: first time running _interpolate(), so pick the engine
+            name = self.main.interpolation
+            if name == True:  # note that "if name:" would be incorrect here
+                # backwards-compatibility: interpolation=True means use default
+                name = DEFAULT_INTERPOLATION
+            name = name.lower()  # so that "Template", "template", etc. all work
+            class_ = interpolation_engines.get(name, None)
+            if class_ is None:
+                # invalid value for self.main.interpolation
+                self.main.interpolation = False
+                return value
+            else:
+                # save reference to engine so we don't have to do this again
+                engine = self._interpolation_engine = class_(self)
+        # let the engine do the actual work
+        return engine.interpolate(key, value)
+
+
+    def __getitem__(self, key):
+        """Fetch the item and do string interpolation."""
+        val = dict.__getitem__(self, key)
+        if self.main.interpolation: 
+            if isinstance(val, basestring):
+                return self._interpolate(key, val)
+            if isinstance(val, list):
+                def _check(entry):
+                    if isinstance(entry, basestring):
+                        return self._interpolate(key, entry)
+                    return entry
+                new = [_check(entry) for entry in val]
+                if new != val:
+                    return new
+        return val
+
+
+    def __setitem__(self, key, value, unrepr=False):
+        """
+        Correctly set a value.
+        
+        Making dictionary values Section instances.
+        (We have to special case 'Section' instances - which are also dicts)
+        
+        Keys must be strings.
+        Values need only be strings (or lists of strings) if
+        ``main.stringify`` is set.
+        
+        ``unrepr`` must be set when setting a value to a dictionary, without
+        creating a new sub-section.
+        """
+        if not isinstance(key, basestring):
+            raise ValueError('The key "%s" is not a string.' % key)
+        
+        # add the comment
+        if key not in self.comments:
+            self.comments[key] = []
+            self.inline_comments[key] = ''
+        # remove the entry from defaults
+        if key in self.defaults:
+            self.defaults.remove(key)
+        #
+        if isinstance(value, Section):
+            if key not in self:
+                self.sections.append(key)
+            dict.__setitem__(self, key, value)
+        elif isinstance(value, dict) and not unrepr:
+            # First create the new depth level,
+            # then create the section
+            if key not in self:
+                self.sections.append(key)
+            new_depth = self.depth + 1
+            dict.__setitem__(
+                self,
+                key,
+                Section(
+                    self,
+                    new_depth,
+                    self.main,
+                    indict=value,
+                    name=key))
+        else:
+            if key not in self:
+                self.scalars.append(key)
+            if not self.main.stringify:
+                if isinstance(value, basestring):
+                    pass
+                elif isinstance(value, (list, tuple)):
+                    for entry in value:
+                        if not isinstance(entry, basestring):
+                            raise TypeError('Value is not a string "%s".' % entry)
+                else:
+                    raise TypeError('Value is not a string "%s".' % value)
+            dict.__setitem__(self, key, value)
+
+
+    def __delitem__(self, key):
+        """Remove items from the sequence when deleting."""
+        dict. __delitem__(self, key)
+        if key in self.scalars:
+            self.scalars.remove(key)
+        else:
+            self.sections.remove(key)
+        del self.comments[key]
+        del self.inline_comments[key]
+
+
+    def get(self, key, default=None):
+        """A version of ``get`` that doesn't bypass string interpolation."""
+        try:
+            return self[key]
+        except KeyError:
+            return default
+
+
+    def update(self, indict):
+        """
+        A version of update that uses our ``__setitem__``.
+        """
+        for entry in indict:
+            self[entry] = indict[entry]
+
+
+    def pop(self, key, default=MISSING):
+        """
+        'D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+        If key is not found, d is returned if given, otherwise KeyError is raised'
+        """
+        try:
+            val = self[key]
+        except KeyError:
+            if default is MISSING:
+                raise
+            val = default
+        else:
+            del self[key]
+        return val
+
+
+    def popitem(self):
+        """Pops the first (key,val)"""
+        sequence = (self.scalars + self.sections)
+        if not sequence:
+            raise KeyError(": 'popitem(): dictionary is empty'")
+        key = sequence[0]
+        val =  self[key]
+        del self[key]
+        return key, val
+
+
+    def clear(self):
+        """
+        A version of clear that also affects scalars/sections
+        Also clears comments and configspec.
+        
+        Leaves other attributes alone :
+            depth/main/parent are not affected
+        """
+        dict.clear(self)
+        self.scalars = []
+        self.sections = []
+        self.comments = {}
+        self.inline_comments = {}
+        self.configspec = None
+        self.defaults = []
+        self.extra_values = []
+
+
+    def setdefault(self, key, default=None):
+        """A version of setdefault that sets sequence if appropriate."""
+        try:
+            return self[key]
+        except KeyError:
+            self[key] = default
+            return self[key]
+
+
+    def items(self):
+        """D.items() -> list of D's (key, value) pairs, as 2-tuples"""
+        return zip((self.scalars + self.sections), self.values())
+
+
+    def keys(self):
+        """D.keys() -> list of D's keys"""
+        return (self.scalars + self.sections)
+
+
+    def values(self):
+        """D.values() -> list of D's values"""
+        return [self[key] for key in (self.scalars + self.sections)]
+
+
+    def iteritems(self):
+        """D.iteritems() -> an iterator over the (key, value) items of D"""
+        return iter(self.items())
+
+
+    def iterkeys(self):
+        """D.iterkeys() -> an iterator over the keys of D"""
+        return iter((self.scalars + self.sections))
+
+    __iter__ = iterkeys
+
+
+    def itervalues(self):
+        """D.itervalues() -> an iterator over the values of D"""
+        return iter(self.values())
+
+
+    def __repr__(self):
+        """x.__repr__() <==> repr(x)"""
+        def _getval(key):
+            try:
+                return self[key]
+            except MissingInterpolationOption:
+                return dict.__getitem__(self, key)
+        return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key))))
+            for key in (self.scalars + self.sections)])
+
+    __str__ = __repr__
+    __str__.__doc__ = "x.__str__() <==> str(x)"
+
+
+    # Extra methods - not in a normal dictionary
+
+    def dict(self):
+        """
+        Return a deepcopy of self as a dictionary.
+        
+        All members that are ``Section`` instances are recursively turned to
+        ordinary dictionaries - by calling their ``dict`` method.
+        
+        >>> n = a.dict()
+        >>> n == a
+        1
+        >>> n is a
+        0
+        """
+        newdict = {}
+        for entry in self:
+            this_entry = self[entry]
+            if isinstance(this_entry, Section):
+                this_entry = this_entry.dict()
+            elif isinstance(this_entry, list):
+                # create a copy rather than a reference
+                this_entry = list(this_entry)
+            elif isinstance(this_entry, tuple):
+                # create a copy rather than a reference
+                this_entry = tuple(this_entry)
+            newdict[entry] = this_entry
+        return newdict
+
+
+    def merge(self, indict):
+        """
+        A recursive update - useful for merging config files.
+        
+        >>> a = '''[section1]
+        ...     option1 = True
+        ...     [[subsection]]
+        ...     more_options = False
+        ...     # end of file'''.splitlines()
+        >>> b = '''# File is user.ini
+        ...     [section1]
+        ...     option1 = False
+        ...     # end of file'''.splitlines()
+        >>> c1 = ConfigObj(b)
+        >>> c2 = ConfigObj(a)
+        >>> c2.merge(c1)
+        >>> c2
+        ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}})
+        """
+        for key, val in indict.items():
+            if (key in self and isinstance(self[key], dict) and
+                                isinstance(val, dict)):
+                self[key].merge(val)
+            else:   
+                self[key] = val
+
+
+    def rename(self, oldkey, newkey):
+        """
+        Change a keyname to another, without changing position in sequence.
+        
+        Implemented so that transformations can be made on keys,
+        as well as on values. (used by encode and decode)
+        
+        Also renames comments.
+        """
+        if oldkey in self.scalars:
+            the_list = self.scalars
+        elif oldkey in self.sections:
+            the_list = self.sections
+        else:
+            raise KeyError('Key "%s" not found.' % oldkey)
+        pos = the_list.index(oldkey)
+        #
+        val = self[oldkey]
+        dict.__delitem__(self, oldkey)
+        dict.__setitem__(self, newkey, val)
+        the_list.remove(oldkey)
+        the_list.insert(pos, newkey)
+        comm = self.comments[oldkey]
+        inline_comment = self.inline_comments[oldkey]
+        del self.comments[oldkey]
+        del self.inline_comments[oldkey]
+        self.comments[newkey] = comm
+        self.inline_comments[newkey] = inline_comment
+
+
+    def walk(self, function, raise_errors=True,
+            call_on_sections=False, **keywargs):
+        """
+        Walk every member and call a function on the keyword and value.
+        
+        Return a dictionary of the return values
+        
+        If the function raises an exception, raise the errror
+        unless ``raise_errors=False``, in which case set the return value to
+        ``False``.
+        
+        Any unrecognised keyword arguments you pass to walk, will be pased on
+        to the function you pass in.
+        
+        Note: if ``call_on_sections`` is ``True`` then - on encountering a
+        subsection, *first* the function is called for the *whole* subsection,
+        and then recurses into it's members. This means your function must be
+        able to handle strings, dictionaries and lists. This allows you
+        to change the key of subsections as well as for ordinary members. The
+        return value when called on the whole subsection has to be discarded.
+        
+        See  the encode and decode methods for examples, including functions.
+        
+        .. admonition:: caution
+        
+            You can use ``walk`` to transform the names of members of a section
+            but you mustn't add or delete members.
+        
+        >>> config = '''[XXXXsection]
+        ... XXXXkey = XXXXvalue'''.splitlines()
+        >>> cfg = ConfigObj(config)
+        >>> cfg
+        ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}})
+        >>> def transform(section, key):
+        ...     val = section[key]
+        ...     newkey = key.replace('XXXX', 'CLIENT1')
+        ...     section.rename(key, newkey)
+        ...     if isinstance(val, (tuple, list, dict)):
+        ...         pass
+        ...     else:
+        ...         val = val.replace('XXXX', 'CLIENT1')
+        ...         section[newkey] = val
+        >>> cfg.walk(transform, call_on_sections=True)
+        {'CLIENT1section': {'CLIENT1key': None}}
+        >>> cfg
+        ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}})
+        """
+        out = {}
+        # scalars first
+        for i in range(len(self.scalars)):
+            entry = self.scalars[i]
+            try:
+                val = function(self, entry, **keywargs)
+                # bound again in case name has changed
+                entry = self.scalars[i]
+                out[entry] = val
+            except Exception:
+                if raise_errors:
+                    raise
+                else:
+                    entry = self.scalars[i]
+                    out[entry] = False
+        # then sections
+        for i in range(len(self.sections)):
+            entry = self.sections[i]
+            if call_on_sections:
+                try:
+                    function(self, entry, **keywargs)
+                except Exception:
+                    if raise_errors:
+                        raise
+                    else:
+                        entry = self.sections[i]
+                        out[entry] = False
+                # bound again in case name has changed
+                entry = self.sections[i]
+            # previous result is discarded
+            out[entry] = self[entry].walk(
+                function,
+                raise_errors=raise_errors,
+                call_on_sections=call_on_sections,
+                **keywargs)
+        return out
+
+
+    def as_bool(self, key):
+        """
+        Accepts a key as input. The corresponding value must be a string or
+        the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to
+        retain compatibility with Python 2.2.
+        
+        If the string is one of  ``True``, ``On``, ``Yes``, or ``1`` it returns 
+        ``True``.
+        
+        If the string is one of  ``False``, ``Off``, ``No``, or ``0`` it returns 
+        ``False``.
+        
+        ``as_bool`` is not case sensitive.
+        
+        Any other input will raise a ``ValueError``.
+        
+        >>> a = ConfigObj()
+        >>> a['a'] = 'fish'
+        >>> a.as_bool('a')
+        Traceback (most recent call last):
+        ValueError: Value "fish" is neither True nor False
+        >>> a['b'] = 'True'
+        >>> a.as_bool('b')
+        1
+        >>> a['b'] = 'off'
+        >>> a.as_bool('b')
+        0
+        """
+        val = self[key]
+        if val == True:
+            return True
+        elif val == False:
+            return False
+        else:
+            try:
+                if not isinstance(val, basestring):
+                    # TODO: Why do we raise a KeyError here?
+                    raise KeyError()
+                else:
+                    return self.main._bools[val.lower()]
+            except KeyError:
+                raise ValueError('Value "%s" is neither True nor False' % val)
+
+
+    def as_int(self, key):
+        """
+        A convenience method which coerces the specified value to an integer.
+        
+        If the value is an invalid literal for ``int``, a ``ValueError`` will
+        be raised.
+        
+        >>> a = ConfigObj()
+        >>> a['a'] = 'fish'
+        >>> a.as_int('a')
+        Traceback (most recent call last):
+        ValueError: invalid literal for int() with base 10: 'fish'
+        >>> a['b'] = '1'
+        >>> a.as_int('b')
+        1
+        >>> a['b'] = '3.2'
+        >>> a.as_int('b')
+        Traceback (most recent call last):
+        ValueError: invalid literal for int() with base 10: '3.2'
+        """
+        return int(self[key])
+
+
+    def as_float(self, key):
+        """
+        A convenience method which coerces the specified value to a float.
+        
+        If the value is an invalid literal for ``float``, a ``ValueError`` will
+        be raised.
+        
+        >>> a = ConfigObj()
+        >>> a['a'] = 'fish'
+        >>> a.as_float('a')
+        Traceback (most recent call last):
+        ValueError: invalid literal for float(): fish
+        >>> a['b'] = '1'
+        >>> a.as_float('b')
+        1.0
+        >>> a['b'] = '3.2'
+        >>> a.as_float('b')
+        3.2000000000000002
+        """
+        return float(self[key])
+    
+    
+    def as_list(self, key):
+        """
+        A convenience method which fetches the specified value, guaranteeing
+        that it is a list.
+        
+        >>> a = ConfigObj()
+        >>> a['a'] = 1
+        >>> a.as_list('a')
+        [1]
+        >>> a['a'] = (1,)
+        >>> a.as_list('a')
+        [1]
+        >>> a['a'] = [1]
+        >>> a.as_list('a')
+        [1]
+        """
+        result = self[key]
+        if isinstance(result, (tuple, list)):
+            return list(result)
+        return [result]
+        
+
+    def restore_default(self, key):
+        """
+        Restore (and return) default value for the specified key.
+        
+        This method will only work for a ConfigObj that was created
+        with a configspec and has been validated.
+        
+        If there is no default value for this key, ``KeyError`` is raised.
+        """
+        default = self.default_values[key]
+        dict.__setitem__(self, key, default)
+        if key not in self.defaults:
+            self.defaults.append(key)
+        return default
+
+    
+    def restore_defaults(self):
+        """
+        Recursively restore default values to all members
+        that have them.
+        
+        This method will only work for a ConfigObj that was created
+        with a configspec and has been validated.
+        
+        It doesn't delete or modify entries without default values.
+        """
+        for key in self.default_values:
+            self.restore_default(key)
+            
+        for section in self.sections:
+            self[section].restore_defaults()
+
+
+class ConfigObj(Section):
+    """An object to read, create, and write config files."""
+
+    _keyword = re.compile(r'''^ # line start
+        (\s*)                   # indentation
+        (                       # keyword
+            (?:".*?")|          # double quotes
+            (?:'.*?')|          # single quotes
+            (?:[^'"=].*?)       # no quotes
+        )
+        \s*=\s*                 # divider
+        (.*)                    # value (including list values and comments)
+        $   # line end
+        ''',
+        re.VERBOSE)
+
+    _sectionmarker = re.compile(r'''^
+        (\s*)                     # 1: indentation
+        ((?:\[\s*)+)              # 2: section marker open
+        (                         # 3: section name open
+            (?:"\s*\S.*?\s*")|    # at least one non-space with double quotes
+            (?:'\s*\S.*?\s*')|    # at least one non-space with single quotes
+            (?:[^'"\s].*?)        # at least one non-space unquoted
+        )                         # section name close
+        ((?:\s*\])+)              # 4: section marker close
+        \s*(\#.*)?                # 5: optional comment
+        $''',
+        re.VERBOSE)
+
+    # this regexp pulls list values out as a single string
+    # or single values and comments
+    # FIXME: this regex adds a '' to the end of comma terminated lists
+    #   workaround in ``_handle_value``
+    _valueexp = re.compile(r'''^
+        (?:
+            (?:
+                (
+                    (?:
+                        (?:
+                            (?:".*?")|              # double quotes
+                            (?:'.*?')|              # single quotes
+                            (?:[^'",\#][^,\#]*?)    # unquoted
+                        )
+                        \s*,\s*                     # comma
+                    )*      # match all list items ending in a comma (if any)
+                )
+                (
+                    (?:".*?")|                      # double quotes
+                    (?:'.*?')|                      # single quotes
+                    (?:[^'",\#\s][^,]*?)|           # unquoted
+                    (?:(?<!,))                      # Empty value
+                )?          # last item in a list - or string value
+            )|
+            (,)             # alternatively a single comma - empty list
+        )
+        \s*(\#.*)?          # optional comment
+        $''',
+        re.VERBOSE)
+
+    # use findall to get the members of a list value
+    _listvalueexp = re.compile(r'''
+        (
+            (?:".*?")|          # double quotes
+            (?:'.*?')|          # single quotes
+            (?:[^'",\#]?.*?)       # unquoted
+        )
+        \s*,\s*                 # comma
+        ''',
+        re.VERBOSE)
+
+    # this regexp is used for the value
+    # when lists are switched off
+    _nolistvalue = re.compile(r'''^
+        (
+            (?:".*?")|          # double quotes
+            (?:'.*?')|          # single quotes
+            (?:[^'"\#].*?)|     # unquoted
+            (?:)                # Empty value
+        )
+        \s*(\#.*)?              # optional comment
+        $''',
+        re.VERBOSE)
+
+    # regexes for finding triple quoted values on one line
+    _single_line_single = re.compile(r"^'''(.*?)'''\s*(#.*)?$")
+    _single_line_double = re.compile(r'^"""(.*?)"""\s*(#.*)?$')
+    _multi_line_single = re.compile(r"^(.*?)'''\s*(#.*)?$")
+    _multi_line_double = re.compile(r'^(.*?)"""\s*(#.*)?$')
+
+    _triple_quote = {
+        "'''": (_single_line_single, _multi_line_single),
+        '"""': (_single_line_double, _multi_line_double),
+    }
+
+    # Used by the ``istrue`` Section method
+    _bools = {
+        'yes': True, 'no': False,
+        'on': True, 'off': False,
+        '1': True, '0': False,
+        'true': True, 'false': False,
+        }
+
+
+    def __init__(self, infile=None, options=None, configspec=None, encoding=None,
+                 interpolation=True, raise_errors=False, list_values=True,
+                 create_empty=False, file_error=False, stringify=True,
+                 indent_type=None, default_encoding=None, unrepr=False,
+                 write_empty_values=False, _inspec=False):
+        """
+        Parse a config file or create a config file object.
+        
+        ``ConfigObj(infile=None, configspec=None, encoding=None,
+                    interpolation=True, raise_errors=False, list_values=True,
+                    create_empty=False, file_error=False, stringify=True,
+                    indent_type=None, default_encoding=None, unrepr=False,
+                    write_empty_values=False, _inspec=False)``
+        """
+        self._inspec = _inspec
+        # init the superclass
+        Section.__init__(self, self, 0, self)
+        
+        infile = infile or []
+        
+        _options = {'configspec': configspec,
+                    'encoding': encoding, 'interpolation': interpolation,
+                    'raise_errors': raise_errors, 'list_values': list_values,
+                    'create_empty': create_empty, 'file_error': file_error,
+                    'stringify': stringify, 'indent_type': indent_type,
+                    'default_encoding': default_encoding, 'unrepr': unrepr,
+                    'write_empty_values': write_empty_values}
+
+        if options is None:
+            options = _options
+        else:
+            import warnings
+            warnings.warn('Passing in an options dictionary to ConfigObj() is '
+                          'deprecated. Use **options instead.',
+                          DeprecationWarning, stacklevel=2)
+            
+            # TODO: check the values too.
+            for entry in options:
+                if entry not in OPTION_DEFAULTS:
+                    raise TypeError('Unrecognised option "%s".' % entry)
+            for entry, value in OPTION_DEFAULTS.items():
+                if entry not in options:
+                    options[entry] = value
+                keyword_value = _options[entry]
+                if value != keyword_value:
+                    options[entry] = keyword_value
+        
+        # XXXX this ignores an explicit list_values = True in combination
+        # with _inspec. The user should *never* do that anyway, but still...
+        if _inspec:
+            options['list_values'] = False
+        
+        self._initialise(options)
+        configspec = options['configspec']
+        self._original_configspec = configspec
+        self._load(infile, configspec)
+        
+        
+    def _load(self, infile, configspec):
+        if isinstance(infile, basestring):
+            self.filename = infile
+            if os.path.isfile(infile):
+                h = open(infile, 'rb')
+                infile = h.read() or []
+                h.close()
+            elif self.file_error:
+                # raise an error if the file doesn't exist
+                raise IOError('Config file not found: "%s".' % self.filename)
+            else:
+                # file doesn't already exist
+                if self.create_empty:
+                    # this is a good test that the filename specified
+                    # isn't impossible - like on a non-existent device
+                    h = open(infile, 'w')
+                    h.write('')
+                    h.close()
+                infile = []
+                
+        elif isinstance(infile, (list, tuple)):
+            infile = list(infile)
+            
+        elif isinstance(infile, dict):
+            # initialise self
+            # the Section class handles creating subsections
+            if isinstance(infile, ConfigObj):
+                # get a copy of our ConfigObj
+                def set_section(in_section, this_section):
+                    for entry in in_section.scalars:
+                        this_section[entry] = in_section[entry]
+                    for section in in_section.sections:
+                        this_section[section] = {}
+                        set_section(in_section[section], this_section[section])
+                set_section(infile, self)
+                
+            else:
+                for entry in infile:
+                    self[entry] = infile[entry]
+            del self._errors
+            
+            if configspec is not None:
+                self._handle_configspec(configspec)
+            else:
+                self.configspec = None
+            return
+        
+        elif getattr(infile, 'read', MISSING) is not MISSING:
+            # This supports file like objects
+            infile = infile.read() or []
+            # needs splitting into lines - but needs doing *after* decoding
+            # in case it's not an 8 bit encoding
+        else:
+            raise TypeError('infile must be a filename, file like object, or list of lines.')
+        
+        if infile:
+            # don't do it for the empty ConfigObj
+            infile = self._handle_bom(infile)
+            # infile is now *always* a list
+            #
+            # Set the newlines attribute (first line ending it finds)
+            # and strip trailing '\n' or '\r' from lines
+            for line in infile:
+                if (not line) or (line[-1] not in ('\r', '\n', '\r\n')):
+                    continue
+                for end in ('\r\n', '\n', '\r'):
+                    if line.endswith(end):
+                        self.newlines = end
+                        break
+                break
+
+            infile = [line.rstrip('\r\n') for line in infile]
+            
+        self._parse(infile)
+        # if we had any errors, now is the time to raise them
+        if self._errors:
+            info = "at line %s." % self._errors[0].line_number
+            if len(self._errors) > 1:
+                msg = "Parsing failed with several errors.\nFirst error %s" % info
+                error = ConfigObjError(msg)
+            else:
+                error = self._errors[0]
+            # set the errors attribute; it's a list of tuples:
+            # (error_type, message, line_number)
+            error.errors = self._errors
+            # set the config attribute
+            error.config = self
+            raise error
+        # delete private attributes
+        del self._errors
+        
+        if configspec is None:
+            self.configspec = None
+        else:
+            self._handle_configspec(configspec)
+    
+    
+    def _initialise(self, options=None):
+        if options is None:
+            options = OPTION_DEFAULTS
+            
+        # initialise a few variables
+        self.filename = None
+        self._errors = []
+        self.raise_errors = options['raise_errors']
+        self.interpolation = options['interpolation']
+        self.list_values = options['list_values']
+        self.create_empty = options['create_empty']
+        self.file_error = options['file_error']
+        self.stringify = options['stringify']
+        self.indent_type = options['indent_type']
+        self.encoding = options['encoding']
+        self.default_encoding = options['default_encoding']
+        self.BOM = False
+        self.newlines = None
+        self.write_empty_values = options['write_empty_values']
+        self.unrepr = options['unrepr']
+        
+        self.initial_comment = []
+        self.final_comment = []
+        self.configspec = None
+        
+        if self._inspec:
+            self.list_values = False
+        
+        # Clear section attributes as well
+        Section._initialise(self)
+        
+        
+    def __repr__(self):
+        def _getval(key):
+            try:
+                return self[key]
+            except MissingInterpolationOption:
+                return dict.__getitem__(self, key)
+        return ('ConfigObj({%s})' % 
+                ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) 
+                for key in (self.scalars + self.sections)]))
+    
+    
+    def _handle_bom(self, infile):
+        """
+        Handle any BOM, and decode if necessary.
+        
+        If an encoding is specified, that *must* be used - but the BOM should
+        still be removed (and the BOM attribute set).
+        
+        (If the encoding is wrongly specified, then a BOM for an alternative
+        encoding won't be discovered or removed.)
+        
+        If an encoding is not specified, UTF8 or UTF16 BOM will be detected and
+        removed. The BOM attribute will be set. UTF16 will be decoded to
+        unicode.
+        
+        NOTE: This method must not be called with an empty ``infile``.
+        
+        Specifying the *wrong* encoding is likely to cause a
+        ``UnicodeDecodeError``.
+        
+        ``infile`` must always be returned as a list of lines, but may be
+        passed in as a single string.
+        """
+        if ((self.encoding is not None) and
+            (self.encoding.lower() not in BOM_LIST)):
+            # No need to check for a BOM
+            # the encoding specified doesn't have one
+            # just decode
+            return self._decode(infile, self.encoding)
+        
+        if isinstance(infile, (list, tuple)):
+            line = infile[0]
+        else:
+            line = infile
+        if self.encoding is not None:
+            # encoding explicitly supplied
+            # And it could have an associated BOM
+            # TODO: if encoding is just UTF16 - we ought to check for both
+            # TODO: big endian and little endian versions.
+            enc = BOM_LIST[self.encoding.lower()]
+            if enc == 'utf_16':
+                # For UTF16 we try big endian and little endian
+                for BOM, (encoding, final_encoding) in BOMS.items():
+                    if not final_encoding:
+                        # skip UTF8
+                        continue
+                    if infile.startswith(BOM):
+                        ### BOM discovered
+                        ##self.BOM = True
+                        # Don't need to remove BOM
+                        return self._decode(infile, encoding)
+                    
+                # If we get this far, will *probably* raise a DecodeError
+                # As it doesn't appear to start with a BOM
+                return self._decode(infile, self.encoding)
+            
+            # Must be UTF8
+            BOM = BOM_SET[enc]
+            if not line.startswith(BOM):
+                return self._decode(infile, self.encoding)
+            
+            newline = line[len(BOM):]
+            
+            # BOM removed
+            if isinstance(infile, (list, tuple)):
+                infile[0] = newline
+            else:
+                infile = newline
+            self.BOM = True
+            return self._decode(infile, self.encoding)
+        
+        # No encoding specified - so we need to check for UTF8/UTF16
+        for BOM, (encoding, final_encoding) in BOMS.items():
+            if not line.startswith(BOM):
+                continue
+            else:
+                # BOM discovered
+                self.encoding = final_encoding
+                if not final_encoding:
+                    self.BOM = True
+                    # UTF8
+                    # remove BOM
+                    newline = line[len(BOM):]
+                    if isinstance(infile, (list, tuple)):
+                        infile[0] = newline
+                    else:
+                        infile = newline
+                    # UTF8 - don't decode
+                    if isinstance(infile, basestring):
+                        return infile.splitlines(True)
+                    else:
+                        return infile
+                # UTF16 - have to decode
+                return self._decode(infile, encoding)
+            
+        # No BOM discovered and no encoding specified, just return
+        if isinstance(infile, basestring):
+            # infile read from a file will be a single string
+            return infile.splitlines(True)
+        return infile
+
+
+    def _a_to_u(self, aString):
+        """Decode ASCII strings to unicode if a self.encoding is specified."""
+        if self.encoding:
+            return aString.decode('ascii')
+        else:
+            return aString
+
+
+    def _decode(self, infile, encoding):
+        """
+        Decode infile to unicode. Using the specified encoding.
+        
+        if is a string, it also needs converting to a list.
+        """
+        if isinstance(infile, basestring):
+            # can't be unicode
+            # NOTE: Could raise a ``UnicodeDecodeError``
+            return infile.decode(encoding).splitlines(True)
+        for i, line in enumerate(infile):
+            if not isinstance(line, unicode):
+                # NOTE: The isinstance test here handles mixed lists of unicode/string
+                # NOTE: But the decode will break on any non-string values
+                # NOTE: Or could raise a ``UnicodeDecodeError``
+                infile[i] = line.decode(encoding)
+        return infile
+
+
+    def _decode_element(self, line):
+        """Decode element to unicode if necessary."""
+        if not self.encoding:
+            return line
+        if isinstance(line, str) and self.default_encoding:
+            return line.decode(self.default_encoding)
+        return line
+
+
+    def _str(self, value):
+        """
+        Used by ``stringify`` within validate, to turn non-string values
+        into strings.
+        """
+        if not isinstance(value, basestring):
+            return str(value)
+        else:
+            return value
+
+
+    def _parse(self, infile):
+        """Actually parse the config file."""
+        temp_list_values = self.list_values
+        if self.unrepr:
+            self.list_values = False
+            
+        comment_list = []
+        done_start = False
+        this_section = self
+        maxline = len(infile) - 1
+        cur_index = -1
+        reset_comment = False
+        
+        while cur_index < maxline:
+            if reset_comment:
+                comment_list = []
+            cur_index += 1
+            line = infile[cur_index]
+            sline = line.strip()
+            # do we have anything on the line ?
+            if not sline or sline.startswith('#'):
+                reset_comment = False
+                comment_list.append(line)
+                continue
+            
+            if not done_start:
+                # preserve initial comment
+                self.initial_comment = comment_list
+                comment_list = []
+                done_start = True
+                
+            reset_comment = True
+            # first we check if it's a section marker
+            mat = self._sectionmarker.match(line)
+            if mat is not None:
+                # is a section line
+                (indent, sect_open, sect_name, sect_close, comment) = mat.groups()
+                if indent and (self.indent_type is None):
+                    self.indent_type = indent
+                cur_depth = sect_open.count('[')
+                if cur_depth != sect_close.count(']'):
+                    self._handle_error("Cannot compute the section depth at line %s.",
+                                       NestingError, infile, cur_index)
+                    continue
+                
+                if cur_depth < this_section.depth:
+                    # the new section is dropping back to a previous level
+                    try:
+                        parent = self._match_depth(this_section,
+                                                   cur_depth).parent
+                    except SyntaxError:
+                        self._handle_error("Cannot compute nesting level at line %s.",
+                                           NestingError, infile, cur_index)
+                        continue
+                elif cur_depth == this_section.depth:
+                    # the new section is a sibling of the current section
+                    parent = this_section.parent
+                elif cur_depth == this_section.depth + 1:
+                    # the new section is a child the current section
+                    parent = this_section
+                else:
+                    self._handle_error("Section too nested at line %s.",
+                                       NestingError, infile, cur_index)
+                    
+                sect_name = self._unquote(sect_name)
+                if sect_name in parent:
+                    self._handle_error('Duplicate section name at line %s.',
+                                       DuplicateError, infile, cur_index)
+                    continue
+                
+                # create the new section
+                this_section = Section(
+                    parent,
+                    cur_depth,
+                    self,
+                    name=sect_name)
+                parent[sect_name] = this_section
+                parent.inline_comments[sect_name] = comment
+                parent.comments[sect_name] = comment_list
+                continue
+            #
+            # it's not a section marker,
+            # so it should be a valid ``key = value`` line
+            mat = self._keyword.match(line)
+            if mat is None:
+                # it neither matched as a keyword
+                # or a section marker
+                self._handle_error(
+                    'Invalid line at line "%s".',
+                    ParseError, infile, cur_index)
+            else:
+                # is a keyword value
+                # value will include any inline comment
+                (indent, key, value) = mat.groups()
+                if indent and (self.indent_type is None):
+                    self.indent_type = indent
+                # check for a multiline value
+                if value[:3] in ['"""', "'''"]:
+                    try:
+                        value, comment, cur_index = self._multiline(
+                            value, infile, cur_index, maxline)
+                    except SyntaxError:
+                        self._handle_error(
+                            'Parse error in value at line %s.',
+                            ParseError, infile, cur_index)
+                        continue
+                    else:
+                        if self.unrepr:
+                            comment = ''
+                            try:
+                                value = unrepr(value)
+                            except Exception, e:
+                                if type(e) == UnknownType:
+                                    msg = 'Unknown name or type in value at line %s.'
+                                else:
+                                    msg = 'Parse error in value at line %s.'
+                                self._handle_error(msg, UnreprError, infile,
+                                    cur_index)
+                                continue
+                else:
+                    if self.unrepr:
+                        comment = ''
+                        try:
+                            value = unrepr(value)
+                        except Exception, e:
+                            if isinstance(e, UnknownType):
+                                msg = 'Unknown name or type in value at line %s.'
+                            else:
+                                msg = 'Parse error in value at line %s.'
+                            self._handle_error(msg, UnreprError, infile,
+                                cur_index)
+                            continue
+                    else:
+                        # extract comment and lists
+                        try:
+                            (value, comment) = self._handle_value(value)
+                        except SyntaxError:
+                            self._handle_error(
+                                'Parse error in value at line %s.',
+                                ParseError, infile, cur_index)
+                            continue
+                #
+                key = self._unquote(key)
+                if key in this_section:
+                    self._handle_error(
+                        'Duplicate keyword name at line %s.',
+                        DuplicateError, infile, cur_index)
+                    continue
+                # add the key.
+                # we set unrepr because if we have got this far we will never
+                # be creating a new section
+                this_section.__setitem__(key, value, unrepr=True)
+                this_section.inline_comments[key] = comment
+                this_section.comments[key] = comment_list
+                continue
+        #
+        if self.indent_type is None:
+            # no indentation used, set the type accordingly
+            self.indent_type = ''
+
+        # preserve the final comment
+        if not self and not self.initial_comment:
+            self.initial_comment = comment_list
+        elif not reset_comment:
+            self.final_comment = comment_list
+        self.list_values = temp_list_values
+
+
+    def _match_depth(self, sect, depth):
+        """
+        Given a section and a depth level, walk back through the sections
+        parents to see if the depth level matches a previous section.
+        
+        Return a reference to the right section,
+        or raise a SyntaxError.
+        """
+        while depth < sect.depth:
+            if sect is sect.parent:
+                # we've reached the top level already
+                raise SyntaxError()
+            sect = sect.parent
+        if sect.depth == depth:
+            return sect
+        # shouldn't get here
+        raise SyntaxError()
+
+
+    def _handle_error(self, text, ErrorClass, infile, cur_index):
+        """
+        Handle an error according to the error settings.
+        
+        Either raise the error or store it.
+        The error will have occured at ``cur_index``
+        """
+        line = infile[cur_index]
+        cur_index += 1
+        message = text % cur_index
+        error = ErrorClass(message, cur_index, line)
+        if self.raise_errors:
+            # raise the error - parsing stops here
+            raise error
+        # store the error
+        # reraise when parsing has finished
+        self._errors.append(error)
+
+
+    def _unquote(self, value):
+        """Return an unquoted version of a value"""
+        if not value:
+            # should only happen during parsing of lists
+            raise SyntaxError
+        if (value[0] == value[-1]) and (value[0] in ('"', "'")):
+            value = value[1:-1]
+        return value
+
+
+    def _quote(self, value, multiline=True):
+        """
+        Return a safely quoted version of a value.
+        
+        Raise a ConfigObjError if the value cannot be safely quoted.
+        If multiline is ``True`` (default) then use triple quotes
+        if necessary.
+        
+        * Don't quote values that don't need it.
+        * Recursively quote members of a list and return a comma joined list.
+        * Multiline is ``False`` for lists.
+        * Obey list syntax for empty and single member lists.
+        
+        If ``list_values=False`` then the value is only quoted if it contains
+        a ``\\n`` (is multiline) or '#'.
+        
+        If ``write_empty_values`` is set, and the value is an empty string, it
+        won't be quoted.
+        """
+        if multiline and self.write_empty_values and value == '':
+            # Only if multiline is set, so that it is used for values not
+            # keys, and not values that are part of a list
+            return ''
+        
+        if multiline and isinstance(value, (list, tuple)):
+            if not value:
+                return ','
+            elif len(value) == 1:
+                return self._quote(value[0], multiline=False) + ','
+            return ', '.join([self._quote(val, multiline=False)
+                for val in value])
+        if not isinstance(value, basestring):
+            if self.stringify:
+                value = str(value)
+            else:
+                raise TypeError('Value "%s" is not a string.' % value)
+
+        if not value:
+            return '""'
+        
+        no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value
+        need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value ))
+        hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value)
+        check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote
+        
+        if check_for_single:
+            if not self.list_values:
+                # we don't quote if ``list_values=False``
+                quot = noquot
+            # for normal values either single or double quotes will do
+            elif '\n' in value:
+                # will only happen if multiline is off - e.g. '\n' in key
+                raise ConfigObjError('Value "%s" cannot be safely quoted.' % value)
+            elif ((value[0] not in wspace_plus) and
+                    (value[-1] not in wspace_plus) and
+                    (',' not in value)):
+                quot = noquot
+            else:
+                quot = self._get_single_quote(value)
+        else:
+            # if value has '\n' or "'" *and* '"', it will need triple quotes
+            quot = self._get_triple_quote(value)
+        
+        if quot == noquot and '#' in value and self.list_values:
+            quot = self._get_single_quote(value)
+                
+        return quot % value
+    
+    
+    def _get_single_quote(self, value):
+        if ("'" in value) and ('"' in value):
+            raise ConfigObjError('Value "%s" cannot be safely quoted.' % value)
+        elif '"' in value:
+            quot = squot
+        else:
+            quot = dquot
+        return quot
+    
+    
+    def _get_triple_quote(self, value):
+        if (value.find('"""') != -1) and (value.find("'''") != -1):
+            raise ConfigObjError('Value "%s" cannot be safely quoted.' % value)
+        if value.find('"""') == -1:
+            quot = tdquot
+        else:
+            quot = tsquot 
+        return quot
+
+
+    def _handle_value(self, value):
+        """
+        Given a value string, unquote, remove comment,
+        handle lists. (including empty and single member lists)
+        """
+        if self._inspec:
+            # Parsing a configspec so don't handle comments
+            return (value, '')
+        # do we look for lists in values ?
+        if not self.list_values:
+            mat = self._nolistvalue.match(value)
+            if mat is None:
+                raise SyntaxError()
+            # NOTE: we don't unquote here
+            return mat.groups()
+        #
+        mat = self._valueexp.match(value)
+        if mat is None:
+            # the value is badly constructed, probably badly quoted,
+            # or an invalid list
+            raise SyntaxError()
+        (list_values, single, empty_list, comment) = mat.groups()
+        if (list_values == '') and (single is None):
+            # change this if you want to accept empty values
+            raise SyntaxError()
+        # NOTE: note there is no error handling from here if the regex
+        # is wrong: then incorrect values will slip through
+        if empty_list is not None:
+            # the single comma - meaning an empty list
+            return ([], comment)
+        if single is not None:
+            # handle empty values
+            if list_values and not single:
+                # FIXME: the '' is a workaround because our regex now matches
+                #   '' at the end of a list if it has a trailing comma
+                single = None
+            else:
+                single = single or '""'
+                single = self._unquote(single)
+        if list_values == '':
+            # not a list value
+            return (single, comment)
+        the_list = self._listvalueexp.findall(list_values)
+        the_list = [self._unquote(val) for val in the_list]
+        if single is not None:
+            the_list += [single]
+        return (the_list, comment)
+
+
+    def _multiline(self, value, infile, cur_index, maxline):
+        """Extract the value, where we are in a multiline situation."""
+        quot = value[:3]
+        newvalue = value[3:]
+        single_line = self._triple_quote[quot][0]
+        multi_line = self._triple_quote[quot][1]
+        mat = single_line.match(value)
+        if mat is not None:
+            retval = list(mat.groups())
+            retval.append(cur_index)
+            return retval
+        elif newvalue.find(quot) != -1:
+            # somehow the triple quote is missing
+            raise SyntaxError()
+        #
+        while cur_index < maxline:
+            cur_index += 1
+            newvalue += '\n'
+            line = infile[cur_index]
+            if line.find(quot) == -1:
+                newvalue += line
+            else:
+                # end of multiline, process it
+                break
+        else:
+            # we've got to the end of the config, oops...
+            raise SyntaxError()
+        mat = multi_line.match(line)
+        if mat is None:
+            # a badly formed line
+            raise SyntaxError()
+        (value, comment) = mat.groups()
+        return (newvalue + value, comment, cur_index)
+
+
+    def _handle_configspec(self, configspec):
+        """Parse the configspec."""
+        # FIXME: Should we check that the configspec was created with the 
+        #        correct settings ? (i.e. ``list_values=False``)
+        if not isinstance(configspec, ConfigObj):
+            try:
+                configspec = ConfigObj(configspec,
+                                       raise_errors=True,
+                                       file_error=True,
+                                       _inspec=True)
+            except ConfigObjError, e:
+                # FIXME: Should these errors have a reference
+                #        to the already parsed ConfigObj ?
+                raise ConfigspecError('Parsing configspec failed: %s' % e)
+            except IOError, e:
+                raise IOError('Reading configspec failed: %s' % e)
+        
+        self.configspec = configspec
+            
+
+        
+    def _set_configspec(self, section, copy):
+        """
+        Called by validate. Handles setting the configspec on subsections
+        including sections to be validated by __many__
+        """
+        configspec = section.configspec
+        many = configspec.get('__many__')
+        if isinstance(many, dict):
+            for entry in section.sections:
+                if entry not in configspec:
+                    section[entry].configspec = many
+                    
+        for entry in configspec.sections:
+            if entry == '__many__':
+                continue
+            if entry not in section:
+                section[entry] = {}
+                section[entry]._created = True
+                if copy:
+                    # copy comments
+                    section.comments[entry] = configspec.comments.get(entry, [])
+                    section.inline_comments[entry] = configspec.inline_comments.get(entry, '')
+                
+            # Could be a scalar when we expect a section
+            if isinstance(section[entry], Section):
+                section[entry].configspec = configspec[entry]
+                        
+
+    def _write_line(self, indent_string, entry, this_entry, comment):
+        """Write an individual line, for the write method"""
+        # NOTE: the calls to self._quote here handles non-StringType values.
+        if not self.unrepr:
+            val = self._decode_element(self._quote(this_entry))
+        else:
+            val = repr(this_entry)
+        return '%s%s%s%s%s' % (indent_string,
+                               self._decode_element(self._quote(entry, multiline=False)),
+                               self._a_to_u(' = '),
+                               val,
+                               self._decode_element(comment))
+
+
+    def _write_marker(self, indent_string, depth, entry, comment):
+        """Write a section marker line"""
+        return '%s%s%s%s%s' % (indent_string,
+                               self._a_to_u('[' * depth),
+                               self._quote(self._decode_element(entry), multiline=False),
+                               self._a_to_u(']' * depth),
+                               self._decode_element(comment))
+
+
+    def _handle_comment(self, comment):
+        """Deal with a comment."""
+        if not comment:
+            return ''
+        start = self.indent_type
+        if not comment.startswith('#'):
+            start += self._a_to_u(' # ')
+        return (start + comment)
+
+
+    # Public methods
+
+    def write(self, outfile=None, section=None):
+        """
+        Write the current ConfigObj as a file
+        
+        tekNico: FIXME: use StringIO instead of real files
+        
+        >>> filename = a.filename
+        >>> a.filename = 'test.ini'
+        >>> a.write()
+        >>> a.filename = filename
+        >>> a == ConfigObj('test.ini', raise_errors=True)
+        1
+        >>> import os
+        >>> os.remove('test.ini')
+        """
+        if self.indent_type is None:
+            # this can be true if initialised from a dictionary
+            self.indent_type = DEFAULT_INDENT_TYPE
+            
+        out = []
+        cs = self._a_to_u('#')
+        csp = self._a_to_u('# ')
+        if section is None:
+            int_val = self.interpolation
+            self.interpolation = False
+            section = self
+            for line in self.initial_comment:
+                line = self._decode_element(line)
+                stripped_line = line.strip()
+                if stripped_line and not stripped_line.startswith(cs):
+                    line = csp + line
+                out.append(line)
+                
+        indent_string = self.indent_type * section.depth
+        for entry in (section.scalars + section.sections):
+            if entry in section.defaults:
+                # don't write out default values
+                continue
+            for comment_line in section.comments[entry]:
+                comment_line = self._decode_element(comment_line.lstrip())
+                if comment_line and not comment_line.startswith(cs):
+                    comment_line = csp + comment_line
+                out.append(indent_string + comment_line)
+            this_entry = section[entry]
+            comment = self._handle_comment(section.inline_comments[entry])
+            
+            if isinstance(this_entry, dict):
+                # a section
+                out.append(self._write_marker(
+                    indent_string,
+                    this_entry.depth,
+                    entry,
+                    comment))
+                out.extend(self.write(section=this_entry))
+            else:
+                out.append(self._write_line(
+                    indent_string,
+                    entry,
+                    this_entry,
+                    comment))
+                
+        if section is self:
+            for line in self.final_comment:
+                line = self._decode_element(line)
+                stripped_line = line.strip()
+                if stripped_line and not stripped_line.startswith(cs):
+                    line = csp + line
+                out.append(line)
+            self.interpolation = int_val
+            
+        if section is not self:
+            return out
+        
+        if (self.filename is None) and (outfile is None):
+            # output a list of lines
+            # might need to encode
+            # NOTE: This will *screw* UTF16, each line will start with the BOM
+            if self.encoding:
+                out = [l.encode(self.encoding) for l in out]
+            if (self.BOM and ((self.encoding is None) or
+                (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))):
+                # Add the UTF8 BOM
+                if not out:
+                    out.append('')
+                out[0] = BOM_UTF8 + out[0]
+            return out
+        
+        # Turn the list to a string, joined with correct newlines
+        newline = self.newlines or os.linesep
+        if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w'
+            and sys.platform == 'win32' and newline == '\r\n'):
+            # Windows specific hack to avoid writing '\r\r\n'
+            newline = '\n'
+        output = self._a_to_u(newline).join(out)
+        if self.encoding:
+            output = output.encode(self.encoding)
+        if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)):
+            # Add the UTF8 BOM
+            output = BOM_UTF8 + output
+            
+        if not output.endswith(newline):
+            output += newline
+        if outfile is not None:
+            outfile.write(output)
+        else:
+            h = open(self.filename, 'wb')
+            h.write(output)
+            h.close()
+
+
+    def validate(self, validator, preserve_errors=False, copy=False,
+                 section=None):
+        """
+        Test the ConfigObj against a configspec.
+        
+        It uses the ``validator`` object from *validate.py*.
+        
+        To run ``validate`` on the current ConfigObj, call: ::
+        
+            test = config.validate(validator)
+        
+        (Normally having previously passed in the configspec when the ConfigObj
+        was created - you can dynamically assign a dictionary of checks to the
+        ``configspec`` attribute of a section though).
+        
+        It returns ``True`` if everything passes, or a dictionary of
+        pass/fails (True/False). If every member of a subsection passes, it
+        will just have the value ``True``. (It also returns ``False`` if all
+        members fail).
+        
+        In addition, it converts the values from strings to their native
+        types if their checks pass (and ``stringify`` is set).
+        
+        If ``preserve_errors`` is ``True`` (``False`` is default) then instead
+        of a marking a fail with a ``False``, it will preserve the actual
+        exception object. This can contain info about the reason for failure.
+        For example the ``VdtValueTooSmallError`` indicates that the value
+        supplied was too small. If a value (or section) is missing it will
+        still be marked as ``False``.
+        
+        You must have the validate module to use ``preserve_errors=True``.
+        
+        You can then use the ``flatten_errors`` function to turn your nested
+        results dictionary into a flattened list of failures - useful for
+        displaying meaningful error messages.
+        """
+        if section is None:
+            if self.configspec is None:
+                raise ValueError('No configspec supplied.')
+            if preserve_errors:
+                # We do this once to remove a top level dependency on the validate module
+                # Which makes importing configobj faster
+                from validate import VdtMissingValue
+                self._vdtMissingValue = VdtMissingValue
+                
+            section = self
+
+            if copy:
+                section.initial_comment = section.configspec.initial_comment
+                section.final_comment = section.configspec.final_comment
+                section.encoding = section.configspec.encoding
+                section.BOM = section.configspec.BOM
+                section.newlines = section.configspec.newlines
+                section.indent_type = section.configspec.indent_type
+            
+        #
+        # section.default_values.clear() #??
+        configspec = section.configspec
+        self._set_configspec(section, copy)
+
+        
+        def validate_entry(entry, spec, val, missing, ret_true, ret_false):
+            section.default_values.pop(entry, None)
+                
+            try:
+                section.default_values[entry] = validator.get_default_value(configspec[entry])
+            except (KeyError, AttributeError, validator.baseErrorClass):
+                # No default, bad default or validator has no 'get_default_value'
+                # (e.g. SimpleVal)
+                pass
+            
+            try:
+                check = validator.check(spec,
+                                        val,
+                                        missing=missing
+                                        )
+            except validator.baseErrorClass, e:
+                if not preserve_errors or isinstance(e, self._vdtMissingValue):
+                    out[entry] = False
+                else:
+                    # preserve the error
+                    out[entry] = e
+                    ret_false = False
+                ret_true = False
+            else:
+                ret_false = False
+                out[entry] = True
+                if self.stringify or missing:
+                    # if we are doing type conversion
+                    # or the value is a supplied default
+                    if not self.stringify:
+                        if isinstance(check, (list, tuple)):
+                            # preserve lists
+                            check = [self._str(item) for item in check]
+                        elif missing and check is None:
+                            # convert the None from a default to a ''
+                            check = ''
+                        else:
+                            check = self._str(check)
+                    if (check != val) or missing:
+                        section[entry] = check
+                if not copy and missing and entry not in section.defaults:
+                    section.defaults.append(entry)
+            return ret_true, ret_false
+        
+        #
+        out = {}
+        ret_true = True
+        ret_false = True
+        
+        unvalidated = [k for k in section.scalars if k not in configspec]
+        incorrect_sections = [k for k in configspec.sections if k in section.scalars]        
+        incorrect_scalars = [k for k in configspec.scalars if k in section.sections]
+        
+        for entry in configspec.scalars:
+            if entry in ('__many__', '___many___'):
+                # reserved names
+                continue
+            if (not entry in section.scalars) or (entry in section.defaults):
+                # missing entries
+                # or entries from defaults
+                missing = True
+                val = None
+                if copy and entry not in section.scalars:
+                    # copy comments
+                    section.comments[entry] = (
+                        configspec.comments.get(entry, []))
+                    section.inline_comments[entry] = (
+                        configspec.inline_comments.get(entry, ''))
+                #
+            else:
+                missing = False
+                val = section[entry]
+            
+            ret_true, ret_false = validate_entry(entry, configspec[entry], val, 
+                                                 missing, ret_true, ret_false)
+        
+        many = None
+        if '__many__' in configspec.scalars:
+            many = configspec['__many__']
+        elif '___many___' in configspec.scalars:
+            many = configspec['___many___']
+        
+        if many is not None:
+            for entry in unvalidated:
+                val = section[entry]
+                ret_true, ret_false = validate_entry(entry, many, val, False,
+                                                     ret_true, ret_false)
+            unvalidated = []
+
+        for entry in incorrect_scalars:
+            ret_true = False
+            if not preserve_errors:
+                out[entry] = False
+            else:
+                ret_false = False
+                msg = 'Value %r was provided as a section' % entry
+                out[entry] = validator.baseErrorClass(msg)
+        for entry in incorrect_sections:
+            ret_true = False
+            if not preserve_errors:
+                out[entry] = False
+            else:
+                ret_false = False
+                msg = 'Section %r was provided as a single value' % entry
+                out[entry] = validator.baseErrorClass(msg)
+                
+        # Missing sections will have been created as empty ones when the
+        # configspec was read.
+        for entry in section.sections:
+            # FIXME: this means DEFAULT is not copied in copy mode
+            if section is self and entry == 'DEFAULT':
+                continue
+            if section[entry].configspec is None:
+                unvalidated.append(entry)
+                continue
+            if copy:
+                section.comments[entry] = configspec.comments.get(entry, [])
+                section.inline_comments[entry] = configspec.inline_comments.get(entry, '')
+            check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry])
+            out[entry] = check
+            if check == False:
+                ret_true = False
+            elif check == True:
+                ret_false = False
+            else:
+                ret_true = False
+        
+        section.extra_values = unvalidated
+        if preserve_errors and not section._created:
+            # If the section wasn't created (i.e. it wasn't missing)
+            # then we can't return False, we need to preserve errors
+            ret_false = False
+        #
+        if ret_false and preserve_errors and out:
+            # If we are preserving errors, but all
+            # the failures are from missing sections / values
+            # then we can return False. Otherwise there is a
+            # real failure that we need to preserve.
+            ret_false = not any(out.values())
+        if ret_true:
+            return True
+        elif ret_false:
+            return False
+        return out
+
+
+    def reset(self):
+        """Clear ConfigObj instance and restore to 'freshly created' state."""
+        self.clear()
+        self._initialise()
+        # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload)
+        #        requires an empty dictionary
+        self.configspec = None
+        # Just to be sure ;-)
+        self._original_configspec = None
+        
+        
+    def reload(self):
+        """
+        Reload a ConfigObj from file.
+        
+        This method raises a ``ReloadError`` if the ConfigObj doesn't have
+        a filename attribute pointing to a file.
+        """
+        if not isinstance(self.filename, basestring):
+            raise ReloadError()
+
+        filename = self.filename
+        current_options = {}
+        for entry in OPTION_DEFAULTS:
+            if entry == 'configspec':
+                continue
+            current_options[entry] = getattr(self, entry)
+            
+        configspec = self._original_configspec
+        current_options['configspec'] = configspec
+            
+        self.clear()
+        self._initialise(current_options)
+        self._load(filename, configspec)
+        
+
+
+class SimpleVal(object):
+    """
+    A simple validator.
+    Can be used to check that all members expected are present.
+    
+    To use it, provide a configspec with all your members in (the value given
+    will be ignored). Pass an instance of ``SimpleVal`` to the ``validate``
+    method of your ``ConfigObj``. ``validate`` will return ``True`` if all
+    members are present, or a dictionary with True/False meaning
+    present/missing. (Whole missing sections will be replaced with ``False``)
+    """
+    
+    def __init__(self):
+        self.baseErrorClass = ConfigObjError
+    
+    def check(self, check, member, missing=False):
+        """A dummy check method, always returns the value unchanged."""
+        if missing:
+            raise self.baseErrorClass()
+        return member
+
+
+def flatten_errors(cfg, res, levels=None, results=None):
+    """
+    An example function that will turn a nested dictionary of results
+    (as returned by ``ConfigObj.validate``) into a flat list.
+    
+    ``cfg`` is the ConfigObj instance being checked, ``res`` is the results
+    dictionary returned by ``validate``.
+    
+    (This is a recursive function, so you shouldn't use the ``levels`` or
+    ``results`` arguments - they are used by the function.)
+    
+    Returns a list of keys that failed. Each member of the list is a tuple::
+    
+        ([list of sections...], key, result)
+    
+    If ``validate`` was called with ``preserve_errors=False`` (the default)
+    then ``result`` will always be ``False``.
+
+    *list of sections* is a flattened list of sections that the key was found
+    in.
+    
+    If the section was missing (or a section was expected and a scalar provided
+    - or vice-versa) then key will be ``None``.
+    
+    If the value (or section) was missing then ``result`` will be ``False``.
+    
+    If ``validate`` was called with ``preserve_errors=True`` and a value
+    was present, but failed the check, then ``result`` will be the exception
+    object returned. You can use this as a string that describes the failure.
+    
+    For example *The value "3" is of the wrong type*.
+    """
+    if levels is None:
+        # first time called
+        levels = []
+        results = []
+    if res == True:
+        return results
+    if res == False or isinstance(res, Exception):
+        results.append((levels[:], None, res))
+        if levels:
+            levels.pop()
+        return results
+    for (key, val) in res.items():
+        if val == True:
+            continue
+        if isinstance(cfg.get(key), dict):
+            # Go down one level
+            levels.append(key)
+            flatten_errors(cfg[key], val, levels, results)
+            continue
+        results.append((levels[:], key, val))
+    #
+    # Go up one level
+    if levels:
+        levels.pop()
+    #
+    return results
+
+
+def get_extra_values(conf, _prepend=()):
+    """
+    Find all the values and sections not in the configspec from a validated
+    ConfigObj.
+    
+    ``get_extra_values`` returns a list of tuples where each tuple represents
+    either an extra section, or an extra value.
+    
+    The tuples contain two values, a tuple representing the section the value 
+    is in and the name of the extra values. For extra values in the top level
+    section the first member will be an empty tuple. For values in the 'foo'
+    section the first member will be ``('foo',)``. For members in the 'bar'
+    subsection of the 'foo' section the first member will be ``('foo', 'bar')``.
+    
+    NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't
+    been validated it will return an empty list.
+    """
+    out = []
+    
+    out.extend([(_prepend, name) for name in conf.extra_values])
+    for name in conf.sections:
+        if name not in conf.extra_values:
+            out.extend(get_extra_values(conf[name], _prepend + (name,)))
+    return out
+
+
+"""*A programming language is a medium of expression.* - Paul Graham"""
diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py
new file mode 100644
index 00000000..fd4a4148
--- /dev/null
+++ b/python/pkg/cdec/sa/__init__.py
@@ -0,0 +1,4 @@
+from cdec.sa._sa import sym_fromstring,\
+        SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\
+        HieroCachingRuleFactory, Sampler
+from cdec.sa.extractor import GrammarExtractor
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py
new file mode 100644
index 00000000..2a89243b
--- /dev/null
+++ b/python/pkg/cdec/sa/compile.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+import argparse
+import os
+import logging
+import cdec.configobj
+import cdec.sa
+
+MAX_PHRASE_LENGTH = 4
+def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2):
+    lcp = cdec.sa.LCP(f_sa)
+    stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True)
+    precomp = cdec.sa.Precomputation(from_stats=stats,
+            fsarray=f_sa,
+            precompute_rank=rank1,
+            precompute_secondary_rank=rank2,
+            max_length=max_len,
+            max_nonterminals=max_nt,
+            train_max_initial_size=max_size,
+            train_min_gap_size=min_gap)
+    return precomp
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger('cdec.sa.compile')
+    parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.')
+    parser.add_argument('--maxnt', '-n', type=int, default=2,
+                        help='Maximum number of non-terminal symbols')
+    parser.add_argument('--maxlen', '-l', type=int, default=5,
+                        help='Maximum number of terminals')
+    parser.add_argument('--maxsize', '-s', type=int, default=15,
+                        help='Maximum rule span')
+    parser.add_argument('--mingap', '-g', type=int, default=1,
+                        help='Minimum gap size')
+    parser.add_argument('--rank1', '-r1', type=int, default=100,
+                        help='Number of pre-computed frequent patterns')
+    parser.add_argument('--rank2', '-r2', type=int, default=10,
+                        help='Number of pre-computed super-frequent patterns)')
+    parser.add_argument('-c', '--config', default='/dev/stdout',
+                        help='Output configuration')
+    parser.add_argument('-f', '--source',
+                        help='Source language corpus')
+    parser.add_argument('-e', '--target',
+                        help='Target language corpus')
+    parser.add_argument('-b', '--bitext',
+                        help='Parallel text (source ||| target)')
+    parser.add_argument('-a', '--alignment', required=True,
+                        help='Bitext word alignment')
+    parser.add_argument('-o', '--output', required=True,
+                        help='Output path')
+    args = parser.parse_args()
+
+    if not ((args.source and args.target) or args.bitext):
+        parser.error('a parallel corpus is required\n'
+        '\tuse -f (source) with -e (target) or -b (bitext)')
+
+    param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2")
+    params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2)
+
+    if not os.path.exists(args.output):
+        os.mkdir(args.output)
+
+    f_sa_bin = os.path.join(args.output, 'f.sa.bin')
+    e_bin = os.path.join(args.output, 'e.bin')
+    precomp_file = 'precomp.{0}.{1}.{2}.{3}.{4}.{5}.bin'.format(*params)
+    precomp_bin = os.path.join(args.output, precomp_file)
+    a_bin = os.path.join(args.output, 'a.bin')
+    lex_bin = os.path.join(args.output, 'lex.bin')
+
+    logger.info('Compiling source suffix array')
+    if args.bitext:
+        f_sa = cdec.sa.SuffixArray(from_text=args.bitext, side='source')
+    else:
+        f_sa = cdec.sa.SuffixArray(from_text=args.source)
+    f_sa.write_binary(f_sa_bin)
+
+    logger.info('Compiling target data array')
+    if args.bitext:
+        e = cdec.sa.DataArray(from_text=args.bitext, side='target')
+    else:
+        e = cdec.sa.DataArray(from_text=args.target)
+    e.write_binary(e_bin)
+
+    logger.info('Precomputing frequent phrases')
+    precompute(f_sa, *params).write_binary(precomp_bin)
+
+    logger.info('Compiling alignment')
+    a = cdec.sa.Alignment(from_text=args.alignment)
+    a.write_binary(a_bin)
+
+    logger.info('Compiling bilexical dictionary')
+    lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa)
+    lex.write_binary(lex_bin)
+    
+    # Write configuration
+    config = cdec.configobj.ConfigObj(args.config, unrepr=True)
+    config['f_sa_file'] = f_sa_bin
+    config['e_file'] = e_bin
+    config['a_file'] = a_bin
+    config['lex_file'] = lex_bin
+    config['precompute_file'] = precomp_bin
+    for name, value in zip(param_names, params):
+        config[name] = value
+    config.write()
+
+if __name__ == '__main__':
+    main()
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
new file mode 100644
index 00000000..918aa3bb
--- /dev/null
+++ b/python/pkg/cdec/sa/extract.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+import sys
+import os
+import argparse
+import logging
+import cdec.sa
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.')
+    parser.add_argument('-c', '--config', required=True,
+                        help='Extractor configuration')
+    parser.add_argument('-g', '--grammars', required=True,
+                        help='Grammar output path')
+    args = parser.parse_args()
+
+    if not os.path.exists(args.grammars):
+        os.mkdir(args.grammars)
+
+    extractor = cdec.sa.GrammarExtractor(args.config)
+    for i, sentence in enumerate(sys.stdin):
+        sentence = sentence[:-1]
+        grammar_file = os.path.join(args.grammars, 'grammar.{0}'.format(i))
+        with open(grammar_file, 'w') as output:
+            for rule in extractor.grammar(sentence):
+                output.write(str(rule)+'\n')
+        grammar_file = os.path.abspath(grammar_file)
+        print('<seg grammar="{0}">{1}</seg>'.format(grammar_file, sentence))
+
+if __name__ == '__main__':
+    main()
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
new file mode 100644
index 00000000..bb912e16
--- /dev/null
+++ b/python/pkg/cdec/sa/extractor.py
@@ -0,0 +1,78 @@
+from itertools import chain
+import os
+import cdec.configobj
+from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\
+        MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE
+import cdec.sa
+
+# maximum span of a grammar rule in TEST DATA
+MAX_INITIAL_SIZE = 15
+
+class GrammarExtractor:
+    def __init__(self, config):
+        if isinstance(config, str) or isinstance(config, unicode):
+            if not os.path.exists(config):
+                raise IOError('cannot read configuration from {0}'.format(config))
+            config = cdec.configobj.ConfigObj(config, unrepr=True)
+        alignment = cdec.sa.Alignment(from_binary=config['a_file'])
+        self.factory = cdec.sa.HieroCachingRuleFactory(
+                # compiled alignment object (REQUIRED)
+                alignment,
+                # name of generic nonterminal used by Hiero
+                category="[X]",
+                # maximum number of contiguous chunks of terminal symbols in RHS of a rule
+                max_chunks=config['max_nt']+1,
+                # maximum span of a grammar rule in TEST DATA
+                max_initial_size=MAX_INITIAL_SIZE,
+                # maximum number of symbols (both T and NT) allowed in a rule
+                max_length=config['max_len'],
+                # maximum number of nonterminals allowed in a rule (set >2 at your own risk)
+                max_nonterminals=config['max_nt'],
+                # maximum number of contiguous chunks of terminal symbols
+                # in target-side RHS of a rule.
+                max_target_chunks=config['max_nt']+1,
+                # maximum number of target side symbols (both T and NT) allowed in a rule.
+                max_target_length=MAX_INITIAL_SIZE,
+                # minimum span of a nonterminal in the RHS of a rule in TEST DATA
+                min_gap_size=1,
+                # filename of file containing precomputed collocations
+                precompute_file=config['precompute_file'],
+                # maximum frequency rank of patterns used to compute triples (< 20)
+                precompute_secondary_rank=config['rank2'],
+                # maximum frequency rank of patterns used to compute collocations (< 300)
+                precompute_rank=config['rank1'],
+                # require extracted rules to have at least one aligned word
+                require_aligned_terminal=True,
+                # require each contiguous chunk of extracted rules
+                # to have at least one aligned word
+                require_aligned_chunks=False,
+                # maximum span of a grammar rule extracted from TRAINING DATA
+                train_max_initial_size=config['max_size'],
+                # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA
+                train_min_gap_size=config['min_gap'],
+                # True if phrases should be tight, False otherwise (better but slower)
+                tight_phrases=True,
+                )
+
+        # lexical weighting tables
+        tt = cdec.sa.BiLex(from_binary=config['lex_file'])
+
+        self.models = (EgivenFCoherent, SampleCountF, CountEF, 
+                MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE)
+
+        fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file'])
+        edarray = cdec.sa.DataArray(from_binary=config['e_file'])
+
+        # lower=faster, higher=better; improvements level off above 200-300 range,
+        # -1 = don't sample, use all data (VERY SLOW!)
+        sampler = cdec.sa.Sampler(300, fsarray)
+
+        self.factory.configure(fsarray, edarray, sampler)
+
+    def grammar(self, sentence):
+        if isinstance(sentence, unicode):
+            sentence = sentence.encode('utf8')
+        cnet = chain(('<s>',), sentence.split(), ('</s>',))
+        cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet)
+        cnet = tuple(((word, None, 1), ) for word in cnet)
+        return self.factory.input(cnet, self.models)
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
new file mode 100644
index 00000000..325b9e13
--- /dev/null
+++ b/python/pkg/cdec/sa/features.py
@@ -0,0 +1,57 @@
+from __future__ import division
+import math
+
+MAXSCORE = 99
+
+def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f)
+    return -math.log10(paircount/fcount)
+
+def CountEF(fphrase, ephrase, paircount, fcount, fsample_count):
+    return math.log10(1 + paircount)
+
+def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count):
+    return math.log10(1 + fsample_count)
+
+def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count):
+    prob = paircount/fsample_count
+    return -math.log10(prob) if prob > 0 else MAXSCORE
+
+def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count):
+    return -math.log10(fcount/fsample_count)
+
+def MaxLexEgivenF(ttable):
+    def feature(fphrase, ephrase, paircount, fcount, fsample_count):
+        fwords = fphrase.words
+        fwords.append('NULL')
+        def score():
+            for e in ephrase.words:
+              maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return sum(score())
+    return feature
+
+def MaxLexFgivenE(ttable):
+    def feature(fphrase, ephrase, paircount, fcount, fsample_count):
+        ewords = ephrase.words
+        ewords.append('NULL')
+        def score():
+            for f in fphrase.words:
+              maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return sum(score())
+    return feature
+
+def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count):
+    return (fcount == 1)
+
+def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count):
+    return (paircount == 1)
+
+def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count):
+    return (fcount > 1)
+
+def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count):
+    return (paircount > 1)
+
+def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count):
+    return (paircount > 0.01)
diff --git a/python/pkg/cdec/score.py b/python/pkg/cdec/score.py
new file mode 100644
index 00000000..22257774
--- /dev/null
+++ b/python/pkg/cdec/score.py
@@ -0,0 +1 @@
+from _cdec import BLEU, TER, CER, Metric
author	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2012-08-01 17:32:37 +0200
committer	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2012-08-01 17:32:37 +0200
commit	3f8e33cfe481a09c121a410e66a6074b5d05683e (patch)
tree	a41ecaf0bbb69fa91a581623abe89d41219c04f8 /python/pkg/cdec
parent	c139ce495861bb341e1b86a85ad4559f9ad53c14 (diff)
parent	9fe0219562e5db25171cce8776381600ff9a5649 (diff)