summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-27 23:45:06 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-27 23:45:06 -0500
commit07efbe1156be5a4d92f4f76a1a0e44e9ef409a45 (patch)
tree87a9e7cfc871acb4036f642be4845c599fee639b /python/pkg/cdec/sa
parent0a4ee9a74cb84595880d815b4d7610664cbd7655 (diff)
Online phrase extraction speaks rulefactory's language.
Diffstat (limited to 'python/pkg/cdec/sa')
-rw-r--r--python/pkg/cdec/sa/extract.py1
-rw-r--r--python/pkg/cdec/sa/extractor.py6
2 files changed, 6 insertions, 1 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index d1861101..9fc37345 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -53,6 +53,7 @@ def extract(inp):
# Add training instance _after_ extracting grammars
if online:
extractor.add_instance(sentence, reference, alignment)
+ extractor.dump_online_stats()
grammar_file = os.path.abspath(grammar_file)
return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix)
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index f3a86d9d..62a251a7 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -88,4 +88,8 @@ class GrammarExtractor:
f_words = cdec.sa.encode_words(sentence.split())
e_words = cdec.sa.encode_words(reference.split())
al = sorted(tuple(int(i) for i in pair.split('-')) for pair in alignment.split())
- self.factory.add_instance(f_words, e_words, al) \ No newline at end of file
+ self.factory.add_instance(f_words, e_words, al)
+
+ # Debugging
+ def dump_online_stats(self):
+ self.factory.dump_online_stats() \ No newline at end of file