diff options
author | Patrick Simianer <p@simianer.de> | 2016-02-09 19:22:02 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-02-09 19:22:02 +0100 |
commit | aa2832b55b1b9825ad626aa0483a97c5ba9c991c (patch) | |
tree | 23fc14c36329ecd0fd1e0239b54beade82e6cc8c /phrase2_extraction | |
parent | fee5d5a36f373f6d1f02bbddfbfa960f3af2f9dd (diff) |
corrected rule extraction, fixed some bugs, nicer interface
Diffstat (limited to 'phrase2_extraction')
-rwxr-xr-x | phrase2_extraction/phrase2_extraction.rb | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 6540626..48dfd73 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -110,7 +110,7 @@ class Rule } end - def as_trule_string + def get_source_string source_string = "" @source.each { |i| if i.is_a? Range @@ -119,6 +119,12 @@ class Rule source_string += " #{i} " end } + source_string = source_string.lstrip.strip + + return source_string + end + + def get_target_string target_string = "" @target.each { |i| if i.is_a? Range @@ -127,29 +133,31 @@ class Rule target_string += " #{i} " end } - source_string = source_string.lstrip.strip target_string = target_string.lstrip.strip + return target_string + end + + def as_trule_string + source_string = get_source_string + target_string = get_target_string + astr = "" @alignment.each { |p| astr += " #{p.first}-#{p.last}" } astr.strip! - #source_string.gsub!(/\[X,\d+\]/, "[X]") return "[X] ||| #{source_string} ||| #{target_string} ||| NewRule=1 ||| #{astr}" end def is_terminal? - #return false if @source.size>1 - #return false if @target.size>1 @source.each { |i| return false if !i.is_a? Range } @target.each { |i| return false if !i.is_a? Range } return true end - # check if other_rule is a part of self - def mergeable_with? other_rule + def mergeable_with? other_rule # check if other_rule is a part of self return false if !other_rule.is_terminal? other_source_begin = other_rule.source.first.first other_source_end = other_rule.source.first.last @@ -559,7 +567,7 @@ def PhrasePhraseExtraction.extract_rules f, e, as, expand=false } rules = PhrasePhraseExtraction.make_seed_rules a, e,f seed_rules = PhrasePhraseExtraction.remove_too_large_seed_phrases rules - seed_rules.uniq! + seed_rules.uniq! { |r| "#{r.get_source_string} ||| #{r.get_target_string}" } if DEBUG STDERR.write "seed rules:\n" @@ -584,8 +592,6 @@ def PhrasePhraseExtraction.extract_rules f, e, as, expand=false r.alignment.size == 0 } - rules.uniq! - return rules end @@ -741,7 +747,7 @@ def main rules = PhrasePhraseExtraction.remove_too_long_src_sides rules - rules.uniq! + rules.uniq! { |r| "#{r.get_source_string} ||| #{r.get_target_string}" } rules.each { |r| puts r.as_trule_string |