summaryrefslogtreecommitdiff
path: root/python/pkg/cdec
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-27 23:45:06 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2012-12-27 23:45:06 -0500
commit608d670b9d94805aacfb94b96b7e1659881630b2 (patch)
treee588dbf2061586f16567c1deea8c200f99c81ce7 /python/pkg/cdec
parentd4cb5aac6e7e083572dfb4d3393ceceb0dbad99c (diff)
Online phrase extraction speaks rulefactory's language.
Diffstat (limited to 'python/pkg/cdec')
-rw-r--r--python/pkg/cdec/sa/extract.py1
-rw-r--r--python/pkg/cdec/sa/extractor.py6
2 files changed, 6 insertions, 1 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index d1861101..9fc37345 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -53,6 +53,7 @@ def extract(inp):
# Add training instance _after_ extracting grammars
if online:
extractor.add_instance(sentence, reference, alignment)
+ extractor.dump_online_stats()
grammar_file = os.path.abspath(grammar_file)
return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix)
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index f3a86d9d..62a251a7 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -88,4 +88,8 @@ class GrammarExtractor:
f_words = cdec.sa.encode_words(sentence.split())
e_words = cdec.sa.encode_words(reference.split())
al = sorted(tuple(int(i) for i in pair.split('-')) for pair in alignment.split())
- self.factory.add_instance(f_words, e_words, al) \ No newline at end of file
+ self.factory.add_instance(f_words, e_words, al)
+
+ # Debugging
+ def dump_online_stats(self):
+ self.factory.dump_online_stats() \ No newline at end of file