summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/extractor.py
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-12-28 10:28:55 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-12-28 10:28:55 +0100
commitcf7d5f83d2fd5731bda7b1265775a395a5d3c1ba (patch)
tree3223f3e7ffba5126e3efda46aadb4ca09c737718 /python/pkg/cdec/sa/extractor.py
parentf9ec8ec31e5a0b6ccc352c834733462eae2481a0 (diff)
parenta72b677092b44d35fec07cc2acf1064210c14ec5 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'python/pkg/cdec/sa/extractor.py')
-rw-r--r--python/pkg/cdec/sa/extractor.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index e09f79ea..62a251a7 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -1,5 +1,5 @@
from itertools import chain
-import os
+import os, sys
import cdec.configobj
from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\
MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE
@@ -82,3 +82,14 @@ class GrammarExtractor:
meta = cdec.sa.annotate(words)
cnet = cdec.sa.make_lattice(words)
return self.factory.input(cnet, meta)
+
+ # Add training instance to data
+ def add_instance(self, sentence, reference, alignment):
+ f_words = cdec.sa.encode_words(sentence.split())
+ e_words = cdec.sa.encode_words(reference.split())
+ al = sorted(tuple(int(i) for i in pair.split('-')) for pair in alignment.split())
+ self.factory.add_instance(f_words, e_words, al)
+
+ # Debugging
+ def dump_online_stats(self):
+ self.factory.dump_online_stats() \ No newline at end of file