[python] conversion from cdec.sa.Rule to cdec.TRule

+ remove configobj dependency + re-structure packages (no more top-level library) + "const" stuff + use __new__ instead of constructor for some objects
author: Victor Chahuneau <vchahune@cs.cmu.edu> 2012-07-27 22:25:15 -0400
committer: Victor Chahuneau <vchahune@cs.cmu.edu> 2012-07-27 22:25:15 -0400
commit: 1d481414a2fa8505a2591c88e2b7b8f86a682ca2 (patch)
tree: ed5e9dff569d89da453578ce3d109991623d9303 /python/cdec/sa
parent: b317e0efd2398d75d70e027bb1e2cf442e683981 (diff)
5 files changed, 14 insertions, 13 deletions
diff --git a/python/cdec/sa/__init__.py b/python/cdec/sa/__init__.py
index ddefa280..8645e837 100644
--- a/python/cdec/sa/__init__.py
+++ b/python/cdec/sa/__init__.py
@@ -1,4 +1,4 @@
-from _cdec_sa import sym_tostring, sym_isvar, sym_fromstring,\
+from _sa import sym_fromstring,\
         SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\
         HieroCachingRuleFactory, Sampler
 from extractor import GrammarExtractor
diff --git a/python/cdec/sa/compile.py b/python/cdec/sa/compile.py
index 061cdab2..30e605a6 100644
--- a/python/cdec/sa/compile.py
+++ b/python/cdec/sa/compile.py
@@ -2,7 +2,7 @@
 import argparse
 import os
 import logging
-import configobj
+import cdec.configobj
 import cdec.sa
 
 MAX_PHRASE_LENGTH = 4
@@ -80,7 +80,7 @@ def main():
     lex.write_binary(lex_bin)
     
     # Write configuration
-    config = configobj.ConfigObj(args.config, unrepr=True)
+    config = cdec.configobj.ConfigObj(args.config, unrepr=True)
     config['f_sa_file'] = f_sa_bin
     config['e_file'] = e_bin
     config['a_file'] = a_bin
diff --git a/python/cdec/sa/extract.py b/python/cdec/sa/extract.py
index c6da5e9d..918aa3bb 100644
--- a/python/cdec/sa/extract.py
+++ b/python/cdec/sa/extract.py
@@ -3,7 +3,6 @@ import sys
 import os
 import argparse
 import logging
-import configobj
 import cdec.sa
 
 def main():
@@ -18,7 +17,7 @@ def main():
     if not os.path.exists(args.grammars):
         os.mkdir(args.grammars)
 
-    extractor = cdec.sa.GrammarExtractor(configobj.ConfigObj(args.config, unrepr=True))
+    extractor = cdec.sa.GrammarExtractor(args.config)
     for i, sentence in enumerate(sys.stdin):
         sentence = sentence[:-1]
         grammar_file = os.path.join(args.grammars, 'grammar.{0}'.format(i))
diff --git a/python/cdec/sa/extractor.py b/python/cdec/sa/extractor.py
index c97b3c6f..bb912e16 100644
--- a/python/cdec/sa/extractor.py
+++ b/python/cdec/sa/extractor.py
@@ -1,4 +1,6 @@
 from itertools import chain
+import os
+import cdec.configobj
 from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\
         MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE
 import cdec.sa
@@ -8,7 +10,10 @@ MAX_INITIAL_SIZE = 15
 
 class GrammarExtractor:
     def __init__(self, config):
-        # TODO if str, read config
+        if isinstance(config, str) or isinstance(config, unicode):
+            if not os.path.exists(config):
+                raise IOError('cannot read configuration from {0}'.format(config))
+            config = cdec.configobj.ConfigObj(config, unrepr=True)
         alignment = cdec.sa.Alignment(from_binary=config['a_file'])
         self.factory = cdec.sa.HieroCachingRuleFactory(
                 # compiled alignment object (REQUIRED)
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py
index 8d35d8e6..325b9e13 100644
--- a/python/cdec/sa/features.py
+++ b/python/cdec/sa/features.py
@@ -1,6 +1,5 @@
 from __future__ import division
 import math
-import cdec.sa
 
 MAXSCORE = 99
 
@@ -22,11 +21,10 @@ def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count):
 
 def MaxLexEgivenF(ttable):
     def feature(fphrase, ephrase, paircount, fcount, fsample_count):
-        fwords = [cdec.sa.sym_tostring(w) for w in fphrase if not cdec.sa.sym_isvar(w)]
+        fwords = fphrase.words
         fwords.append('NULL')
-        ewords = (cdec.sa.sym_tostring(w) for w in ephrase if not cdec.sa.sym_isvar(w))
         def score():
-            for e in ewords:
+            for e in ephrase.words:
               maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
               yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
         return sum(score())
@@ -34,11 +32,10 @@ def MaxLexEgivenF(ttable):
 
 def MaxLexFgivenE(ttable):
     def feature(fphrase, ephrase, paircount, fcount, fsample_count):
-        fwords = (cdec.sa.sym_tostring(w) for w in fphrase if not cdec.sa.sym_isvar(w))
-        ewords = [cdec.sa.sym_tostring(w) for w in ephrase if not cdec.sa.sym_isvar(w)]
+        ewords = ephrase.words
         ewords.append('NULL')
         def score():
-            for f in fwords:
+            for f in fphrase.words:
               maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
               yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
         return sum(score())
author	Victor Chahuneau <vchahune@cs.cmu.edu>	2012-07-27 22:25:15 -0400
committer	Victor Chahuneau <vchahune@cs.cmu.edu>	2012-07-27 22:25:15 -0400
commit	1d481414a2fa8505a2591c88e2b7b8f86a682ca2 (patch)
tree	ed5e9dff569d89da453578ce3d109991623d9303 /python/cdec/sa
parent	b317e0efd2398d75d70e027bb1e2cf442e683981 (diff)