summaryrefslogtreecommitdiff
path: root/phrase2_extraction
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-02-09 19:22:02 +0100
committerPatrick Simianer <p@simianer.de>2016-02-09 19:22:02 +0100
commitaa2832b55b1b9825ad626aa0483a97c5ba9c991c (patch)
tree23fc14c36329ecd0fd1e0239b54beade82e6cc8c /phrase2_extraction
parentfee5d5a36f373f6d1f02bbddfbfa960f3af2f9dd (diff)
corrected rule extraction, fixed some bugs, nicer interface
Diffstat (limited to 'phrase2_extraction')
-rwxr-xr-xphrase2_extraction/phrase2_extraction.rb28
1 files changed, 17 insertions, 11 deletions
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 6540626..48dfd73 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -110,7 +110,7 @@ class Rule
}
end
- def as_trule_string
+ def get_source_string
source_string = ""
@source.each { |i|
if i.is_a? Range
@@ -119,6 +119,12 @@ class Rule
source_string += " #{i} "
end
}
+ source_string = source_string.lstrip.strip
+
+ return source_string
+ end
+
+ def get_target_string
target_string = ""
@target.each { |i|
if i.is_a? Range
@@ -127,29 +133,31 @@ class Rule
target_string += " #{i} "
end
}
- source_string = source_string.lstrip.strip
target_string = target_string.lstrip.strip
+ return target_string
+ end
+
+ def as_trule_string
+ source_string = get_source_string
+ target_string = get_target_string
+
astr = ""
@alignment.each { |p|
astr += " #{p.first}-#{p.last}"
}
astr.strip!
- #source_string.gsub!(/\[X,\d+\]/, "[X]")
return "[X] ||| #{source_string} ||| #{target_string} ||| NewRule=1 ||| #{astr}"
end
def is_terminal?
- #return false if @source.size>1
- #return false if @target.size>1
@source.each { |i| return false if !i.is_a? Range }
@target.each { |i| return false if !i.is_a? Range }
return true
end
- # check if other_rule is a part of self
- def mergeable_with? other_rule
+ def mergeable_with? other_rule # check if other_rule is a part of self
return false if !other_rule.is_terminal?
other_source_begin = other_rule.source.first.first
other_source_end = other_rule.source.first.last
@@ -559,7 +567,7 @@ def PhrasePhraseExtraction.extract_rules f, e, as, expand=false
}
rules = PhrasePhraseExtraction.make_seed_rules a, e,f
seed_rules = PhrasePhraseExtraction.remove_too_large_seed_phrases rules
- seed_rules.uniq!
+ seed_rules.uniq! { |r| "#{r.get_source_string} ||| #{r.get_target_string}" }
if DEBUG
STDERR.write "seed rules:\n"
@@ -584,8 +592,6 @@ def PhrasePhraseExtraction.extract_rules f, e, as, expand=false
r.alignment.size == 0
}
- rules.uniq!
-
return rules
end
@@ -741,7 +747,7 @@ def main
rules = PhrasePhraseExtraction.remove_too_long_src_sides rules
- rules.uniq!
+ rules.uniq! { |r| "#{r.get_source_string} ||| #{r.get_target_string}" }
rules.each { |r|
puts r.as_trule_string