summaryrefslogtreecommitdiff
path: root/python/cdec/sa/extractor.py
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-11-13 18:12:10 +0100
committerPatrick Simianer <p@simianer.de>2013-11-13 18:12:10 +0100
commitd6e6babf2cfe49fed040b651624b7e34d1a9b507 (patch)
tree2a00ab18f10a7f93e7e172551c01b48cc9f20b8c /python/cdec/sa/extractor.py
parent2d2d5eced93d58bc77894d8c328195cd9950b96d (diff)
parent8a24bb77bc2e9fd17a6f6529a2942cde96a6af49 (diff)
merge w/ upstream
Diffstat (limited to 'python/cdec/sa/extractor.py')
-rw-r--r--python/cdec/sa/extractor.py18
1 files changed, 8 insertions, 10 deletions
diff --git a/python/cdec/sa/extractor.py b/python/cdec/sa/extractor.py
index acc13cbc..5a95ded8 100644
--- a/python/cdec/sa/extractor.py
+++ b/python/cdec/sa/extractor.py
@@ -84,23 +84,21 @@ class GrammarExtractor:
for fn in cdec.sa._SA_CONFIGURE:
fn(config)
- def grammar(self, sentence):
+ def grammar(self, sentence, ctx_name=None):
if isinstance(sentence, unicode):
sentence = sentence.encode('utf8')
words = tuple(chain(('<s>',), sentence.split(), ('</s>',)))
meta = cdec.sa.annotate(words)
cnet = cdec.sa.make_lattice(words)
- return self.factory.input(cnet, meta)
+ return self.factory.input(cnet, meta, ctx_name)
# Add training instance to data
- def add_instance(self, sentence, reference, alignment):
+ def add_instance(self, sentence, reference, alignment, ctx_name=None):
f_words = cdec.sa.encode_words(sentence.split())
e_words = cdec.sa.encode_words(reference.split())
al = sorted(tuple(int(i) for i in pair.split('-')) for pair in alignment.split())
- self.factory.add_instance(f_words, e_words, al)
-
- # Debugging
- def dump_online_stats(self):
- self.factory.dump_online_stats()
- def dump_online_rules(self):
- self.factory.dump_online_rules() \ No newline at end of file
+ self.factory.add_instance(f_words, e_words, al, ctx_name)
+
+ # Remove all incremental data for a context
+ def drop_ctx(self, ctx_name=None):
+ self.factory.drop_ctx(ctx_name)