summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-11-11 16:09:50 +0100
committerPatrick Simianer <p@simianer.de>2015-11-11 16:09:50 +0100
commitfbf4cb550ebdcefc4552167e5c6938a5fce2b86d (patch)
treef26130683fae36f799b9e8cce4c5e99ab03795b0
parent7d4e419c0eeef01bfd673e49266eef07e3f714ca (diff)
handle un-aligned groups
-rwxr-xr-xderivation_to_json/derivation_to_json.rb39
-rw-r--r--derivation_to_json/example.4.raw16
2 files changed, 55 insertions, 0 deletions
diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb
index f7307cc..3a4eb65 100755
--- a/derivation_to_json/derivation_to_json.rb
+++ b/derivation_to_json/derivation_to_json.rb
@@ -245,8 +245,47 @@ def proc_deriv s
count_target.clear
}
+ # find non-aligned target
+ rgroups.each_with_index { |i,j|
+ if !phrase_align.flatten.index(j)
+ add_to = []
+ phrase_align.each_with_index { |a,k|
+ a.each { |q|
+ if rgroups[q]==i
+ add_to << k
+ end
+ }
+ }
+ puts add_to.to_s
+ puts phrase_align.to_s
+ add_to.each { |k|
+ phrase_align[k] << j
+ }
+ puts phrase_align.to_s
+ end
+ }
+
+ # find non-aligned source
+ phrase_align.each_with_index { |i,j|
+ add = []
+ if i.size == 0
+ x = source_rgroups[j]
+ rgroups.each_with_index { |j,k|
+ if j==x
+ add << k
+ end
+ }
+ end
+ add.each { |k|
+ phrase_align[j] << k
+ }
+ }
+
h = {}
h[:phrase_alignment] = phrase_align
+ h[:source_rgroups] = source_rgroups
+ h[:target_rgroups] = rgroups
+ h[:rules_by_span_id] = rules_by_span_id
h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " }
h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " }
diff --git a/derivation_to_json/example.4.raw b/derivation_to_json/example.4.raw
new file mode 100644
index 0000000..453d2ce
--- /dev/null
+++ b/derivation_to_json/example.4.raw
@@ -0,0 +1,16 @@
+({<0,29> [Goal] ||| [S] ||| [1]}({<0,29> [S] ||| [S] [X] ||| [1] [2]}({<0,14> [S] ||| [X] ||| [1]}({<0,14> [X] ||| [X] zur Erfindung [X] ||| the invention [1] relates to [2]}({<0,2> [X] ||| weiterhin gehört ||| also}) ({<4,14> [X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [1] cardiovascular diseases}({<4,13> [X] ||| [X] Therapie und Prophylaxe von ||| [1] therapy and prophylaxis of}({<4,9> [X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the}) ) ) ) ) ({<14,29> [X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [1] cancerous diseases [2]}({<16,27> [X] ||| im Zusammenhang [X] oder ||| in conjunction [1] or}({<18,26> [X] ||| [X] , Knochenerkrankungen ||| [1] , osteopathy}({<18,24> [X] ||| mit einer erhöhten [X] ||| with an increased [1]}({<21,24> [X] ||| Thrombozytenaggregation , [X] ||| the platelet aggregation , [1]}({<23,24> [X] ||| Stoffwechsel-Erkrankungen ||| metabolism illnesses}) ) ) ) ) ({<28,29> [X] ||| . ||| .}) ) ) )
+[X] ||| weiterhin gehört ||| also ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.458975 MaxLexFgivenE=4.79441 CountEF=0.30103 SampleCountF=0.477121 EgivenFCoherent=0.30103 ||| 0-0 1-0
+[X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.93053 MaxLexFgivenE=3.26928 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 0-0 1-1 2-3 3-4 4-5
+[X] ||| [X] Therapie und Prophylaxe von ||| [1] therapy and prophylaxis of ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.926982 MaxLexFgivenE=1.42237 CountEF=0.90309 SampleCountF=1.47712 EgivenFCoherent=0.6173 ||| 1-1 2-2 3-3 4-4
+[X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [1] cardiovascular diseases ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.739399 MaxLexFgivenE=0.797149 CountEF=1.38021 SampleCountF=1.69897 EgivenFCoherent=0.328468 ||| 1-1 1-2
+[X] ||| [X] zur Erfindung [X] ||| the invention [1] relates to [2] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=3.63316 MaxLexFgivenE=1.80404 CountEF=0.30103 SampleCountF=1.27875 EgivenFCoherent=1.25527 ||| 1-4 2-3
+[S] ||| [X] ||| [1]
+[X] ||| Stoffwechsel-Erkrankungen ||| metabolism illnesses ||| ForceRule=1 ||| 0-0 0-1
+[X] ||| Thrombozytenaggregation , [X] ||| the platelet aggregation , [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=2.11327 MaxLexFgivenE=1.16228 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-1 0-2
+[X] ||| mit einer erhöhten [X] ||| with an increased [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.72017 MaxLexFgivenE=2.04771 CountEF=0.954243 SampleCountF=1.6902 EgivenFCoherent=0.778151 ||| 0-0 1-1 2-2
+[X] ||| [X] , Knochenerkrankungen ||| [1] , osteopathy ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11792 MaxLexFgivenE=0.186321 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-1 2-2
+[X] ||| im Zusammenhang [X] oder ||| in conjunction [1] or ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.43987 MaxLexFgivenE=2.45332 CountEF=0.778151 SampleCountF=1.44716 EgivenFCoherent=0.732394 ||| 0-0 1-1 3-3
+[X] ||| . ||| . ||| IsSupportedOnline=1 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.0201086 MaxLexFgivenE=0.135104 CountEF=2.39967 SampleCountF=2.48287 EgivenFCoherent=0.0835026 ||| 0-0
+[X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [1] cancerous diseases [2] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.3856 MaxLexFgivenE=0.862494 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 1-1 3-3 3-4
+[S] ||| [S] [X] ||| [1] [2] ||| Glue=1
+[Goal] ||| [S] ||| [1]