diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-27 23:45:06 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-27 23:45:06 -0500 |
commit | 608d670b9d94805aacfb94b96b7e1659881630b2 (patch) | |
tree | e588dbf2061586f16567c1deea8c200f99c81ce7 /python/pkg/cdec/sa | |
parent | d4cb5aac6e7e083572dfb4d3393ceceb0dbad99c (diff) |
Online phrase extraction speaks rulefactory's language.
Diffstat (limited to 'python/pkg/cdec/sa')
-rw-r--r-- | python/pkg/cdec/sa/extract.py | 1 | ||||
-rw-r--r-- | python/pkg/cdec/sa/extractor.py | 6 |
2 files changed, 6 insertions, 1 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py index d1861101..9fc37345 100644 --- a/python/pkg/cdec/sa/extract.py +++ b/python/pkg/cdec/sa/extract.py @@ -53,6 +53,7 @@ def extract(inp): # Add training instance _after_ extracting grammars if online: extractor.add_instance(sentence, reference, alignment) + extractor.dump_online_stats() grammar_file = os.path.abspath(grammar_file) return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index f3a86d9d..62a251a7 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -88,4 +88,8 @@ class GrammarExtractor: f_words = cdec.sa.encode_words(sentence.split()) e_words = cdec.sa.encode_words(reference.split()) al = sorted(tuple(int(i) for i in pair.split('-')) for pair in alignment.split()) - self.factory.add_instance(f_words, e_words, al)
\ No newline at end of file + self.factory.add_instance(f_words, e_words, al) + + # Debugging + def dump_online_stats(self): + self.factory.dump_online_stats()
\ No newline at end of file |