diff options
author | Patrick Simianer <p@simianer.de> | 2016-06-03 13:34:29 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-06-03 13:34:29 +0200 |
commit | 0885987afd448fe1aedba7c6a2fdeff64c426623 (patch) | |
tree | 2d6a8b957d3514769ee80d7a66cf0231133160ae /phrase2_extraction | |
parent | aeec004a2d99b595365e991d66d959adb010ae97 (diff) |
NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix
Diffstat (limited to 'phrase2_extraction')
-rwxr-xr-x | phrase2_extraction/phrase2_extraction.rb | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 547e0be..01bdae9 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -5,9 +5,9 @@ require 'zipf' module PhrasePhraseExtraction DEBUG = false -MAX_NT = 2 # Chiang: 2 -MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases! -MAX_SRC_SZ = 10 # Chiang: 5 words, -> words! +MAX_NT = 1 # Chiang: 2 +MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases! +MAX_SRC_SZ = 7 # Chiang: 5 words, -> words! FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true class Rule @@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el } rules.last.rebase_alignment fs, estart fe += 1 - break if has_alignment(a, fe, "src")||fe>=elen + break if has_alignment(a, fe, "src")||fe>=flen end fs -= 1 break has_alignment(a, fs, "src")||fs<0 @@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules prev = false end } - b + c = false + prev = false + r.target.each { |i| + if i.is_a? String + if prev + c = true + break + end + prev = true + else + prev = false + end + } + b || c } end |