From fac00976168c6b3c94d01d76babede147e4a0710 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Wed, 20 Jul 2016 10:55:13 +0200 Subject: noloo new rules: align all --- phrase2_extraction/phrase2_extraction.rb | 10 ++++++++++ server.rb | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 1f268cd..b376953 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -178,6 +178,16 @@ class Rule } astr.strip! + #a = [] + #source_string.strip.lstrip.split.each_with_index { |s,i| + # target_string.strip.lstrip.split.each_with_index { |t,j| + # if !s.match /\[X,\d+\]/ and !t.match /\[X,\d+\]/ + # a << "#{i}-#{j}" + # end + # } + #} + #astr = a.join ' ' + return "[X] ||| #{source_string} ||| #{target_string} ||| NewRule=1 ||| #{astr}" end diff --git a/server.rb b/server.rb index 4a3e8f6..752d0d5 100755 --- a/server.rb +++ b/server.rb @@ -307,7 +307,17 @@ def process_next reply } tmp_rules_known = tmp_rules - tmp_rules_new tmp_rules_known.each { |i| no_loo_known_rules << "[X] ||| #{i[0]} ||| #{i[1]} ||| KnownRule=1 ||| 0-0" } - tmp_rules_new.each { |i| no_loo_new_rules << "[X] ||| #{i[0]} ||| #{i[1]} ||| NewRule=1 ||| 0-0" } + tmp_rules_new.each { |i| + a = [] + i[0].strip.lstrip.split.each_with_index { |s,ii| + i[1].strip.lstrip.split.each_with_index { |t,j| + if !s.match /\[X,\d+\]/ and !t.match /\[X,\d+\]/ + a << "#{ii}-#{j}" + end + } + } + no_loo_new_rules << "[X] ||| #{i[0]} ||| #{i[1]} ||| NewRule=1 ||| #{a.join ' '}" + } end # regular new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true -- cgit v1.2.3