summaryrefslogtreecommitdiff
path: root/phrase2_extraction
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-06-03 13:36:55 +0200
committerPatrick Simianer <p@simianer.de>2016-06-03 13:36:55 +0200
commit6c62dc57ae90fc6b016da507cae22d96fc128af2 (patch)
tree28e955964b96e7a568517d3f25b2574d86b64e1c /phrase2_extraction
parent8596c2775346757c7f7c37e603f015a671f86b88 (diff)
parent0885987afd448fe1aedba7c6a2fdeff64c426623 (diff)
Merge branch 'master' of github.com:pks/lfpe
Diffstat (limited to 'phrase2_extraction')
-rwxr-xr-xphrase2_extraction/phrase2_extraction.rb23
1 files changed, 18 insertions, 5 deletions
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 547e0be..01bdae9 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -5,9 +5,9 @@ require 'zipf'
module PhrasePhraseExtraction
DEBUG = false
-MAX_NT = 2 # Chiang: 2
-MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases!
-MAX_SRC_SZ = 10 # Chiang: 5 words, -> words!
+MAX_NT = 1 # Chiang: 2
+MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases!
+MAX_SRC_SZ = 7 # Chiang: 5 words, -> words!
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
@@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el
}
rules.last.rebase_alignment fs, estart
fe += 1
- break if has_alignment(a, fe, "src")||fe>=elen
+ break if has_alignment(a, fe, "src")||fe>=flen
end
fs -= 1
break has_alignment(a, fs, "src")||fs<0
@@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules
prev = false
end
}
- b
+ c = false
+ prev = false
+ r.target.each { |i|
+ if i.is_a? String
+ if prev
+ c = true
+ break
+ end
+ prev = true
+ else
+ prev = false
+ end
+ }
+ b || c
}
end