From fbf4cb550ebdcefc4552167e5c6938a5fce2b86d Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Wed, 11 Nov 2015 16:09:50 +0100 Subject: handle un-aligned groups --- derivation_to_json/derivation_to_json.rb | 39 ++++++++++++++++++++++++++++++++ derivation_to_json/example.4.raw | 16 +++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 derivation_to_json/example.4.raw (limited to 'derivation_to_json') diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb index f7307cc..3a4eb65 100755 --- a/derivation_to_json/derivation_to_json.rb +++ b/derivation_to_json/derivation_to_json.rb @@ -245,8 +245,47 @@ def proc_deriv s count_target.clear } + # find non-aligned target + rgroups.each_with_index { |i,j| + if !phrase_align.flatten.index(j) + add_to = [] + phrase_align.each_with_index { |a,k| + a.each { |q| + if rgroups[q]==i + add_to << k + end + } + } + puts add_to.to_s + puts phrase_align.to_s + add_to.each { |k| + phrase_align[k] << j + } + puts phrase_align.to_s + end + } + + # find non-aligned source + phrase_align.each_with_index { |i,j| + add = [] + if i.size == 0 + x = source_rgroups[j] + rgroups.each_with_index { |j,k| + if j==x + add << k + end + } + end + add.each { |k| + phrase_align[j] << k + } + } + h = {} h[:phrase_alignment] = phrase_align + h[:source_rgroups] = source_rgroups + h[:target_rgroups] = rgroups + h[:rules_by_span_id] = rules_by_span_id h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " } h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " } diff --git a/derivation_to_json/example.4.raw b/derivation_to_json/example.4.raw new file mode 100644 index 0000000..453d2ce --- /dev/null +++ b/derivation_to_json/example.4.raw @@ -0,0 +1,16 @@ +({<0,29> [Goal] ||| [S] ||| [1]}({<0,29> [S] ||| [S] [X] ||| [1] [2]}({<0,14> [S] ||| [X] ||| [1]}({<0,14> [X] ||| [X] zur Erfindung [X] ||| the invention [1] relates to [2]}({<0,2> [X] ||| weiterhin gehört ||| also}) ({<4,14> [X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [1] cardiovascular diseases}({<4,13> [X] ||| [X] Therapie und Prophylaxe von ||| [1] therapy and prophylaxis of}({<4,9> [X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the}) ) ) ) ) ({<14,29> [X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [1] cancerous diseases [2]}({<16,27> [X] ||| im Zusammenhang [X] oder ||| in conjunction [1] or}({<18,26> [X] ||| [X] , Knochenerkrankungen ||| [1] , osteopathy}({<18,24> [X] ||| mit einer erhöhten [X] ||| with an increased [1]}({<21,24> [X] ||| Thrombozytenaggregation , [X] ||| the platelet aggregation , [1]}({<23,24> [X] ||| Stoffwechsel-Erkrankungen ||| metabolism illnesses}) ) ) ) ) ({<28,29> [X] ||| . ||| .}) ) ) ) +[X] ||| weiterhin gehört ||| also ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.458975 MaxLexFgivenE=4.79441 CountEF=0.30103 SampleCountF=0.477121 EgivenFCoherent=0.30103 ||| 0-0 1-0 +[X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.93053 MaxLexFgivenE=3.26928 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 0-0 1-1 2-3 3-4 4-5 +[X] ||| [X] Therapie und Prophylaxe von ||| [1] therapy and prophylaxis of ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.926982 MaxLexFgivenE=1.42237 CountEF=0.90309 SampleCountF=1.47712 EgivenFCoherent=0.6173 ||| 1-1 2-2 3-3 4-4 +[X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [1] cardiovascular diseases ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.739399 MaxLexFgivenE=0.797149 CountEF=1.38021 SampleCountF=1.69897 EgivenFCoherent=0.328468 ||| 1-1 1-2 +[X] ||| [X] zur Erfindung [X] ||| the invention [1] relates to [2] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=3.63316 MaxLexFgivenE=1.80404 CountEF=0.30103 SampleCountF=1.27875 EgivenFCoherent=1.25527 ||| 1-4 2-3 +[S] ||| [X] ||| [1] +[X] ||| Stoffwechsel-Erkrankungen ||| metabolism illnesses ||| ForceRule=1 ||| 0-0 0-1 +[X] ||| Thrombozytenaggregation , [X] ||| the platelet aggregation , [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=2.11327 MaxLexFgivenE=1.16228 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-1 0-2 +[X] ||| mit einer erhöhten [X] ||| with an increased [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.72017 MaxLexFgivenE=2.04771 CountEF=0.954243 SampleCountF=1.6902 EgivenFCoherent=0.778151 ||| 0-0 1-1 2-2 +[X] ||| [X] , Knochenerkrankungen ||| [1] , osteopathy ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11792 MaxLexFgivenE=0.186321 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-1 2-2 +[X] ||| im Zusammenhang [X] oder ||| in conjunction [1] or ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.43987 MaxLexFgivenE=2.45332 CountEF=0.778151 SampleCountF=1.44716 EgivenFCoherent=0.732394 ||| 0-0 1-1 3-3 +[X] ||| . ||| . ||| IsSupportedOnline=1 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.0201086 MaxLexFgivenE=0.135104 CountEF=2.39967 SampleCountF=2.48287 EgivenFCoherent=0.0835026 ||| 0-0 +[X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [1] cancerous diseases [2] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.3856 MaxLexFgivenE=0.862494 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 1-1 3-3 3-4 +[S] ||| [S] [X] ||| [1] [2] ||| Glue=1 +[Goal] ||| [S] ||| [1] -- cgit v1.2.3