From 80686468fc5afcacc1c47820d4498b3e87859905 Mon Sep 17 00:00:00 2001
From: Michael Denkowski <michael.j.denkowski@gmail.com>
Date: Sun, 23 Dec 2012 21:58:24 -0500
Subject: debugging

---
 python/src/sa/online_extractor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'python/src')

diff --git a/python/src/sa/online_extractor.py b/python/src/sa/online_extractor.py
index 90087f30..8aae3959 100755
--- a/python/src/sa/online_extractor.py
+++ b/python/src/sa/online_extractor.py
@@ -111,7 +111,7 @@ class OnlineGrammarExtractor:
         # f_ i and j are current, e_ i and j are previous
         def extract(f_i, f_j, e_i, e_j, wc, links, nt, nt_open):
             # Phrase extraction limits
-            if wc > self.max_length or (f_j + 1) > f_len or \
+            if wc + len(nt) > self.max_length or (f_j + 1) > f_len or \
                     (f_j - f_i) + 1 > self.max_size:
                 return
             # Unaligned word
@@ -327,6 +327,7 @@ def main(argv):
     extractor = OnlineGrammarExtractor()
 
     for line in sys.stdin:
+        print >> sys.stderr, line.strip()
         f_words, e_words, a_str = (x.split() for x in line.split('|||'))
         alignment = sorted(tuple(int(y) for y in x.split('-')) for x in a_str)
         extractor.add_instance(f_words, e_words, alignment)
-- 
cgit v1.2.3