diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-23 21:58:24 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2012-12-23 21:58:24 -0500 |
commit | 80686468fc5afcacc1c47820d4498b3e87859905 (patch) | |
tree | acf86d05046ad941c4898e38de9f942349fbe588 /python/src/sa | |
parent | ea90d5c6d38365122beff2ac615c8223d87b4c0f (diff) |
debugging
Diffstat (limited to 'python/src/sa')
-rwxr-xr-x | python/src/sa/online_extractor.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/src/sa/online_extractor.py b/python/src/sa/online_extractor.py index 90087f30..8aae3959 100755 --- a/python/src/sa/online_extractor.py +++ b/python/src/sa/online_extractor.py @@ -111,7 +111,7 @@ class OnlineGrammarExtractor: # f_ i and j are current, e_ i and j are previous def extract(f_i, f_j, e_i, e_j, wc, links, nt, nt_open): # Phrase extraction limits - if wc > self.max_length or (f_j + 1) > f_len or \ + if wc + len(nt) > self.max_length or (f_j + 1) > f_len or \ (f_j - f_i) + 1 > self.max_size: return # Unaligned word @@ -327,6 +327,7 @@ def main(argv): extractor = OnlineGrammarExtractor() for line in sys.stdin: + print >> sys.stderr, line.strip() f_words, e_words, a_str = (x.split() for x in line.split('|||')) alignment = sorted(tuple(int(y) for y in x.split('-')) for x in a_str) extractor.add_instance(f_words, e_words, alignment) |