summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--derivation_to_json/README9
-rw-r--r--derivation_to_json/after.json53
-rw-r--r--derivation_to_json/before.json127
-rw-r--r--derivation_to_json/example.1.json1
-rw-r--r--derivation_to_json/example.1.output2
-rw-r--r--derivation_to_json/example.1.raw1
-rw-r--r--derivation_to_json/example.json1
-rw-r--r--derivation_to_json/example.output1
-rw-r--r--derivation_to_json/example.raw1
-rw-r--r--derivation_to_json/in.json162
-rw-r--r--derivation_to_json/in2.json331
-rw-r--r--derivation_to_json/in3.json64
-rw-r--r--derivation_to_json/in4.json85
-rw-r--r--derivation_to_json/in5.json170
-rw-r--r--derivation_to_json/in6.json85
-rw-r--r--derivation_to_json/in7.json233
-rw-r--r--derivation_to_json/out2.json52
-rw-r--r--derivation_to_json/out3.json20
-rw-r--r--derivation_to_json/out4.json23
-rw-r--r--derivation_to_json/out5.json33
-rw-r--r--derivation_to_json/out6.json20
-rw-r--r--derivation_to_json/out7.json44
-rwxr-xr-xderivation_to_json/rec.rb79
-rwxr-xr-xderivation_to_json/rules.rb42
24 files changed, 6 insertions, 1633 deletions
diff --git a/derivation_to_json/README b/derivation_to_json/README
index 947cefd..0f83d5a 100644
--- a/derivation_to_json/README
+++ b/derivation_to_json/README
@@ -1,4 +1,7 @@
-This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules"
-into data structures and tries to align "groups" in source and target sides
-of rules in a smart, presentable way.
+This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into data structures and tries to align "groups" in source and target sides
+of rules in a smart, presentable way. The result resembles a phrase-based
+system, given that the word alignment gives enough hints.
+
+To run:
+ ./derivation_to_json.rb < <one of the .raw files>
diff --git a/derivation_to_json/after.json b/derivation_to_json/after.json
deleted file mode 100644
index fb58467..0000000
--- a/derivation_to_json/after.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
- "source": [
- "Weiterhin gehört",
- "zur Erfindung",
- "die Verwendung dieser Zusammensetzungen zur",
- "Therapie und Prophylaxe von",
- "Herz-Kreislauf-Erkrankungen",
- ", Erkrankungen",
- "im Zusammenhang",
- "mit einer erhöhten",
- "Thrombozytenaggregation,",
- "Stoffwechsel-Erkrankungen",
- ", Knochenerkrankungen",
- "oder",
- "Krebserkrankungen",
- "."
- ],
- "target": [
- "Additionally,",
- "the invention relates to",
- "the use of said compositions for the",
- "therapy and prophylaxis of",
- "cardiovascular diseases",
- ", diseases",
- "in conjunction",
- "with an increased",
- "platelet aggregation,",
- "\tmetabolic diseases",
- ", osteopathy",
- "or",
- "cancerous diseases",
- "."
- ],
- "align": [
- "0-0",
- "1-1",
- "2-2",
- "3-3",
- "4-4",
- "5-5",
- "6-6",
- "7-7",
- "8-8",
- "9-9",
- "10-10",
- "11-11",
- "12-12",
- "13-13"
- ],
- "post_edit": "Additionally, the invention relates to the use of said compositions for the therapy and prophylaxis of cardiovascular diseases , diseases in conjunction with an increased platelet aggregation, metabolic diseases , osteopathy or cancerous diseases .",
- "duration": 212272,
- "source_value": "weiterhin gehört zur Erfindung die Verwendung dieser Zusammensetzungen zur Therapie und Prophylaxe von Herz-Kreislauf-Erkrankungen , Erkrankungen im Zusammenhang mit einer erhöhten Thrombozytenaggregation , Stoffwechsel-Erkrankungen , Knochenerkrankungen oder Krebserkrankungen ."
-}
diff --git a/derivation_to_json/before.json b/derivation_to_json/before.json
deleted file mode 100644
index 1d2c911..0000000
--- a/derivation_to_json/before.json
+++ /dev/null
@@ -1,127 +0,0 @@
-{
- "phrase_alignment": [
- [
- 1
- ],
- [
- 2,
- 0
- ],
- [
- 3
- ],
- [
- 4
- ],
- [
- 5
- ],
- [
- 6
- ],
- [
- 7
- ],
- [
- 8
- ],
- [
- 9
- ],
- [
- 10
- ],
- [
- 11
- ],
- [
- 12
- ],
- [
- 13
- ],
- [
- 14
- ]
- ],
- "source_rgroups": [
- 4,
- 3,
- 7,
- 6,
- 5,
- 8,
- 9,
- 11,
- 12,
- 13,
- 10,
- 9,
- 8,
- 14
- ],
- "target_rgroups": [
- 3,
- 4,
- 3,
- 7,
- 6,
- 5,
- 8,
- 9,
- 11,
- 12,
- 13,
- 10,
- 9,
- 8,
- 14
- ],
- "rules_by_span_id": {
- "4": "[X] ||| weiterhin gehört ||| also ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.458975 MaxLexFgivenE=4.79441 CountEF=0.30103 SampleCountF=0.477121 EgivenFCoherent=0.30103 ||| 0-0 1-0\n",
- "3": "[X] ||| [X] zur Erfindung [X] ||| the invention [X] relates to [X] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=3.63316 MaxLexFgivenE=1.80404 CountEF=0.30103 SampleCountF=1.27875 EgivenFCoherent=1.25527 ||| 1-4 2-3\n",
- "7": "[X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.93053 MaxLexFgivenE=3.26928 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 0-0 1-1 2-3 3-4 4-5\n",
- "6": "[X] ||| [X] Therapie und Prophylaxe von ||| [X] therapy and prophylaxis of ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.926982 MaxLexFgivenE=1.42237 CountEF=0.90309 SampleCountF=1.47712 EgivenFCoherent=0.6173 ||| 1-1 2-2 3-3 4-4\n",
- "5": "[X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [X] cardiovascular diseases ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.739399 MaxLexFgivenE=0.797149 CountEF=1.38021 SampleCountF=1.69897 EgivenFCoherent=0.328468 ||| 1-1 1-2\n",
- "8": "[X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [X] cancerous diseases [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.3856 MaxLexFgivenE=0.862494 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 1-1 3-3 3-4\n",
- "9": "[X] ||| im Zusammenhang [X] oder ||| in conjunction [X] or ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.43987 MaxLexFgivenE=2.45332 CountEF=0.778151 SampleCountF=1.44716 EgivenFCoherent=0.732394 ||| 0-0 1-1 3-3\n",
- "11": "[X] ||| mit einer erhöhten [X] ||| with an increased [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.72017 MaxLexFgivenE=2.04771 CountEF=0.954243 SampleCountF=1.6902 EgivenFCoherent=0.778151 ||| 0-0 1-1 2-2\n",
- "12": "[X] ||| Thrombozytenaggregation , [X] ||| platelet aggregation [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.886057 MaxLexFgivenE=1.98654 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-0 0-1\n",
- "13": "[X] ||| Stoffwechsel-Erkrankungen ||| asdf ||| ForceRule=1 ||| 0-0\n",
- "10": "[X] ||| [X] , Knochenerkrankungen ||| [X] , osteopathy ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11792 MaxLexFgivenE=0.186321 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-1 2-2\n",
- "14": "[X] ||| . ||| . ||| IsSupportedOnline=1 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.0201086 MaxLexFgivenE=0.135104 CountEF=2.39967 SampleCountF=2.48287 EgivenFCoherent=0.0835026 ||| 0-0\n"
- },
- "source_groups": [
- "weiterhin gehört",
- "zur Erfindung",
- "die Verwendung dieser Zusammensetzungen zur",
- "Therapie und Prophylaxe von",
- "Herz-Kreislauf-Erkrankungen",
- ", Erkrankungen",
- "im Zusammenhang",
- "mit einer erhöhten",
- "Thrombozytenaggregation ,",
- "Stoffwechsel-Erkrankungen",
- ", Knochenerkrankungen",
- "oder",
- "Krebserkrankungen",
- "."
- ],
- "target_groups": [
- "the invention",
- "also",
- "relates to",
- "the use of said compositions for the",
- "therapy and prophylaxis of",
- "cardiovascular diseases",
- ", diseases",
- "in conjunction",
- "with an increased",
- "platelet aggregation",
- "asdf",
- ", osteopathy",
- "or",
- "cancerous diseases",
- "."
- ]
-}
diff --git a/derivation_to_json/example.1.json b/derivation_to_json/example.1.json
deleted file mode 100644
index ab793c2..0000000
--- a/derivation_to_json/example.1.json
+++ /dev/null
@@ -1 +0,0 @@
-{"phrase_alignment":[[0],[1],[2],[3]],"source_groups":["(","ein","test",")"],"target_groups":["(","another","test","level )"]}
diff --git a/derivation_to_json/example.1.output b/derivation_to_json/example.1.output
deleted file mode 100644
index 65e4e27..0000000
--- a/derivation_to_json/example.1.output
+++ /dev/null
@@ -1,2 +0,0 @@
-["<0,4> [Goal] ||| [S] ||| [1]", "<0,4> [S] ||| [S] [X] ||| [1] [2]", "<0,2> [S] ||| [S] [X] ||| [1] [2]", "<0,1> [S] ||| [X] ||| [1]", "<0,1> [X] ||| ( ||| (", "<1,2> [X] ||| ein ||| another", "<2,4> [X] ||| [X] ) ||| [1] level )", "<2,3> [X] ||| test ||| test"]
-( another test level
diff --git a/derivation_to_json/example.1.raw b/derivation_to_json/example.1.raw
deleted file mode 100644
index 0a2a6b7..0000000
--- a/derivation_to_json/example.1.raw
+++ /dev/null
@@ -1 +0,0 @@
-({<0,4> [Goal] ||| [S] ||| [1]}({<0,4> [S] ||| [S] [X] ||| [1] [2]}({<0,2> [S] ||| [S] [X] ||| [1] [2]}({<0,1> [S] ||| [X] ||| [1]}({<0,1> [X] ||| ( ||| (}) ) ({<1,2> [X] ||| ein ||| another}) ) ({<2,4> [X] ||| [X] ) ||| [1] level )}({<2,3> [X] ||| test ||| test}) ) ) )
diff --git a/derivation_to_json/example.json b/derivation_to_json/example.json
deleted file mode 100644
index 6bb2b19..0000000
--- a/derivation_to_json/example.json
+++ /dev/null
@@ -1 +0,0 @@
-{"phrase_alignment":[[0,2],[1],[0,2],[3],[4],[5],[6],[7,9],[8],[7,9],[10,18],[11],[10,18],[12],[13],[14],[15],[16],[17],[10,18],[19],[20],[21,23],[22],[21,23],[24],[25]],"source_groups":["hier also","ein bescheidener",",","auf alle","demokratien","anzuwendender","vorschlag",":","der markt für","ideen","funktioniert","besser",",","wenn es den","bürgern","leichter","fällt , die","zielkonflikte zwischen","treffsicherheit","der","aussagen und","unterhaltung","oder zwischen","treffsicherheit","und","parteitreue","zu erkennen ."],"target_groups":["so here","a modest",",","to all","democracies","anzuwendender","proposal",":","the market for","ideas","works","better","if","citizens","easier",", the","trade @-@ offs between","treffsicherheit","the","statements and","entertainment","or","treffsicherheit","and","parteitreue","."]}
diff --git a/derivation_to_json/example.output b/derivation_to_json/example.output
deleted file mode 100644
index ac66de7..0000000
--- a/derivation_to_json/example.output
+++ /dev/null
@@ -1 +0,0 @@
-so here a modest , to all democracies anzuwendender proposal : the market for ideas works better if citizens easier , the trade @-@ offs between treffsicherheit the statements and entertainment or treffsicherheit and parteitreue .
diff --git a/derivation_to_json/example.raw b/derivation_to_json/example.raw
deleted file mode 100644
index 5128e09..0000000
--- a/derivation_to_json/example.raw
+++ /dev/null
@@ -1 +0,0 @@
-({<0,41> [Goal] ||| [S] ||| [1]}({<0,41> [S] ||| [S] [X] ||| [1] [2]}({<0,37> [S] ||| [S] [X] ||| [1] [2]}({<0,33> [S] ||| [S] [X] ||| [1] [2]}({<0,32> [S] ||| [S] [X] ||| [1] [2]}({<0,30> [S] ||| [S] [X] ||| [1] [2]}({<0,15> [S] ||| [S] [X] ||| [1] [2]}({<0,10> [S] ||| [S] [X] ||| [1] [2]}({<0,9> [S] ||| [S] [X] ||| [1] [2]}({<0,8> [S] ||| [S] [X] ||| [1] [2]}({<0,7> [S] ||| [S] [X] ||| [1] [2]}({<0,5> [S] ||| [X] ||| [1]}({<0,5> [X] ||| hier also [X] , ||| so here [1] ,}({<2,4> [X] ||| ein bescheidener ||| a modest}) ) ) ({<5,7> [X] ||| auf alle ||| to all}) ) ({<7,8> [X] ||| demokratien ||| democracies}) ) ({<8,9> [X] ||| anzuwendender ||| anzuwendender}) ) ({<9,10> [X] ||| vorschlag ||| proposal}) ) ({<10,15> [X] ||| : [X] ideen ||| : [1] ideas}({<11,14> [X] ||| der markt für ||| the market for}) ) ) ({<15,30> [X] ||| funktioniert [X] , [X] der ||| works [1] [2] the}({<16,17> [X] ||| besser ||| better}) ({<18,29> [X] ||| wenn es den [X] ||| if [1]}({<21,29> [X] ||| [X] fällt , die [X] ||| [1] , the [2]}({<21,23> [X] ||| [X] leichter ||| [1] easier}({<21,22> [X] ||| bürgern ||| citizens}) ) ({<26,29> [X] ||| zielkonflikte zwischen [X] ||| trade @-@ offs between [1]}({<28,29> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ) ) ) ({<30,32> [X] ||| aussagen und ||| statements and}) ) ({<32,33> [X] ||| unterhaltung ||| entertainment}) ) ({<33,37> [X] ||| oder zwischen [X] und ||| or [1] and}({<35,36> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ({<37,41> [X] ||| [X] zu erkennen . ||| [1] .}({<37,38> [X] ||| parteitreue ||| parteitreue}) ) ) )
diff --git a/derivation_to_json/in.json b/derivation_to_json/in.json
deleted file mode 100644
index 6fd2c88..0000000
--- a/derivation_to_json/in.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ],
- [
- 3
- ],
- [
- 4
- ],
- [
- 5
- ],
- [
- 6
- ],
- [
- 7
- ]
- ],
- "source_rgroups": [
- 4,
- 6,
- 5,
- 4,
- 3,
- 7,
- 8,
- 7
- ],
- "target_rgroups": [
- 4,
- 6,
- 5,
- 4,
- 3,
- 7,
- 8,
- 7
- ],
- "rules_by_span_id": {
- "4": "[X] ||| die [X] auf basis von ||| the [1] which are based on metal ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.95916 MaxLexFgivenE=3.2265 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-4 3-4 3-5 4-6",
- "6": "[X] ||| neuerung ||| invention ||| ForceRule=1 ||| 0-0",
- "5": "[X] ||| [X] bezieht sich auf gassensoren ||| [1] relates to gas sensors ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.45124 MaxLexFgivenE=2.73473 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 1-1 1-2 2-1 4-3 4-4",
- "3": "[X] ||| [X] metalloxid @-@ halbleitern , ||| [1] @-@ oxide semiconductors and which ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.79715 MaxLexFgivenE=2.26688 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-2 2-1 3-3 4-5",
- "7": "[X] ||| die sehr [X] sind . ||| are very [1] . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.456219 MaxLexFgivenE=2.16613 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 3-3 4-3",
- "8": "[X] ||| empfindlich und wenig temperaturabhängig ||| sensitive and not appreciably temperature @-@ dependent ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=6.5059 MaxLexFgivenE=1.51642 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-3 3-3 3-4 3-6"
- },
- "source_groups": [
- "die",
- "neuerung",
- "bezieht sich auf gassensoren",
- "auf basis von",
- "metalloxid @-@ halbleitern ,",
- "die sehr",
- "empfindlich und wenig temperaturabhängig",
- "sind ."
- ],
- "target_groups": [
- "the",
- "invention",
- "relates to gas sensors",
- "which are based on metal",
- "@-@ oxide semiconductors and which",
- "are very",
- "sensitive and not appreciably temperature @-@ dependent",
- "."
- ],
- "span_info": {
- "1": [
- [
- 0,
- 21
- ],
- [
-
- ]
- ],
- "3": [
- [
- 0,
- 13
- ],
- [
- [
- 0,
- 9
- ]
- ]
- ],
- "4": [
- [
- 0,
- 9
- ],
- [
- [
- 1,
- 6
- ]
- ]
- ],
- "5": [
- [
- 1,
- 6
- ],
- [
- [
- 1,
- 2
- ]
- ]
- ],
- "6": [
- [
- 1,
- 2
- ],
- [
-
- ]
- ],
- "7": [
- [
- 13,
- 21
- ],
- [
- [
- 15,
- 19
- ]
- ]
- ],
- "8": [
- [
- 15,
- 19
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 21]": 1,
- "[0, 13]": 3,
- "[0, 9]": 4,
- "[1, 6]": 5,
- "[1, 2]": 6,
- "[13, 21]": 7,
- "[15, 19]": 8
- }
-}
diff --git a/derivation_to_json/in2.json b/derivation_to_json/in2.json
deleted file mode 100644
index 8b7859e..0000000
--- a/derivation_to_json/in2.json
+++ /dev/null
@@ -1,331 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ],
- [
- 4
- ],
- [
- 3
- ],
- [
- 5
- ],
- [
- 6
- ],
- [
- 7
- ],
- [
- 8
- ],
- [
- 9
- ],
- [
- 10
- ],
- [
- 11
- ],
- [
- 12
- ],
- [
- 13
- ]
- ],
- "source_rgroups": [
- 6,
- 7,
- 5,
- 9,
- 8,
- 11,
- 10,
- 12,
- 13,
- 14,
- 15,
- 16,
- 17,
- 18
- ],
- "target_rgroups": [
- 6,
- 7,
- 5,
- 8,
- 9,
- 11,
- 10,
- 12,
- 13,
- 14,
- 15,
- 16,
- 17,
- 18
- ],
- "rules_by_span_id": {
- "6": "[X] ||| die [X] ||| the [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3972 MaxLexFgivenE=0.936969 CountEF=1.72428 SampleCountF=2.48144 EgivenFCoherent=0.764004 ||| 0-0",
- "7": "[X] ||| neuerung ||| invention ||| ForceRule=1 ||| 0-0",
- "5": "[X] ||| [X] sieht ||| [1] provides ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.30103 MaxLexFgivenE=0.425969 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 1-1",
- "9": "[X] ||| hierfür ||| this ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0 MaxLexFgivenE=1.79588 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0",
- "8": "[X] ||| [X] gassensoren ||| gas sensors [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.671584 MaxLexFgivenE=0 CountEF=0.477121 SampleCountF=0.90309 EgivenFCoherent=0.544068 ||| 1-0 1-1",
- "11": "[X] ||| vor ||| , ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=1.29576 MaxLexFgivenE=3.07287 CountEF=0.30103 SampleCountF=1.34242 EgivenFCoherent=1.32222 ||| 0-0",
- "10": "[X] ||| [X] , bei denen [X] ||| [1] in which [2] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3792 MaxLexFgivenE=3.12049 CountEF=0.60206 SampleCountF=0.954243 EgivenFCoherent=0.425969 ||| 1-2 2-1 3-2",
- "12": "[X] ||| der metalloxid @-@ [X] ||| the metal @-@ oxide [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.46878 MaxLexFgivenE=2.10214 CountEF=0.60206 SampleCountF=0.60206 EgivenFCoherent=-0 ||| 0-0 1-1 1-3 2-2",
- "13": "[X] ||| halbleiter auf dem [X] ||| semiconductor is on the [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.07042 MaxLexFgivenE=2.63438 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-2 2-1 2-3",
- "14": "[X] ||| substrat ( 1 ) ||| substrate ( 1 ) ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.754079 MaxLexFgivenE=1.55196 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3",
- "15": "[X] ||| in form einer amorphen [X] ||| in the form of an amorphous [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.39105 MaxLexFgivenE=2.14549 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-2 1-3 1-4 2-4 3-5",
- "16": "[X] ||| metalloxid @-@ halbleiter @-@ [X] ||| metal @-@ oxide semiconductor [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.08127 MaxLexFgivenE=2.41368 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0 0-2 1-1 2-3 3-1",
- "17": "[X] ||| dünnschicht [X] ||| thin layer [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=3.04136 MaxLexFgivenE=0.929419 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0",
- "18": "[X] ||| ( 2 ) vorliegt . ||| ( 2 ) . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.776902 MaxLexFgivenE=4.75559 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-0 2-2 4-3"
- },
- "source_groups": [
- "die",
- "neuerung",
- "sieht",
- "hierfür",
- "gassensoren",
- "vor",
- ", bei denen",
- "der metalloxid @-@",
- "halbleiter auf dem",
- "substrat ( 1 )",
- "in form einer amorphen",
- "metalloxid @-@ halbleiter @-@",
- "dünnschicht",
- "( 2 ) vorliegt ."
- ],
- "target_groups": [
- "the",
- "invention",
- "provides",
- "gas sensors",
- "this",
- ",",
- "in which",
- "the metal @-@ oxide",
- "semiconductor is on the",
- "substrate ( 1 )",
- "in the form of an amorphous",
- "metal @-@ oxide semiconductor",
- "thin layer",
- "( 2 ) ."
- ],
- "span_info": {
- "1": [
- [
- 0,
- 33
- ],
- [
-
- ]
- ],
- "2": [
- [
- 0,
- 19
- ],
- [
-
- ]
- ],
- "3": [
- [
- 0,
- 5
- ],
- [
-
- ]
- ],
- "5": [
- [
- 0,
- 3
- ],
- [
- [
- 0,
- 2
- ]
- ]
- ],
- "6": [
- [
- 0,
- 2
- ],
- [
- [
- 1,
- 2
- ]
- ]
- ],
- "7": [
- [
- 1,
- 2
- ],
- [
-
- ]
- ],
- "8": [
- [
- 3,
- 5
- ],
- [
- [
- 3,
- 4
- ]
- ]
- ],
- "9": [
- [
- 3,
- 4
- ],
- [
-
- ]
- ],
- "10": [
- [
- 5,
- 19
- ],
- [
- [
- 5,
- 6
- ],
- [
- 9,
- 19
- ]
- ]
- ],
- "11": [
- [
- 5,
- 6
- ],
- [
-
- ]
- ],
- "12": [
- [
- 9,
- 19
- ],
- [
- [
- 12,
- 19
- ]
- ]
- ],
- "13": [
- [
- 12,
- 19
- ],
- [
- [
- 15,
- 19
- ]
- ]
- ],
- "14": [
- [
- 15,
- 19
- ],
- [
-
- ]
- ],
- "15": [
- [
- 19,
- 33
- ],
- [
- [
- 23,
- 33
- ]
- ]
- ],
- "16": [
- [
- 23,
- 33
- ],
- [
- [
- 27,
- 33
- ]
- ]
- ],
- "17": [
- [
- 27,
- 33
- ],
- [
- [
- 28,
- 33
- ]
- ]
- ],
- "18": [
- [
- 28,
- 33
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 33]": 1,
- "[0, 19]": 2,
- "[0, 5]": 3,
- "[0, 3]": 5,
- "[0, 2]": 6,
- "[1, 2]": 7,
- "[3, 5]": 8,
- "[3, 4]": 9,
- "[5, 19]": 10,
- "[5, 6]": 11,
- "[9, 19]": 12,
- "[12, 19]": 13,
- "[15, 19]": 14,
- "[19, 33]": 15,
- "[23, 33]": 16,
- "[27, 33]": 17,
- "[28, 33]": 18
- }
-}
diff --git a/derivation_to_json/in3.json b/derivation_to_json/in3.json
deleted file mode 100644
index 2181e2a..0000000
--- a/derivation_to_json/in3.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ]
- ],
- "source_rgroups": [
- 2,
- 3,
- 2
- ],
- "target_rgroups": [
- 2,
- 3,
- 2
- ],
- "rules_by_span_id": {
- "2": "[X] ||| das [X] kristallin sein . ||| the [1] crystalline . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.317153 MaxLexFgivenE=2.02464 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-2 3-2 4-3",
- "3": "[X] ||| substrat kann amorph oder ||| substrate may be amorphous or ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.89753 MaxLexFgivenE=1.43405 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 2-3 3-4"
- },
- "source_groups": [
- "das",
- "substrat kann amorph oder",
- "kristallin sein ."
- ],
- "target_groups": [
- "the",
- "substrate may be amorphous or",
- "crystalline ."
- ],
- "span_info": {
- "2": [
- [
- 0,
- 8
- ],
- [
- [
- 1,
- 5
- ]
- ]
- ],
- "3": [
- [
- 1,
- 5
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 8]": 2,
- "[1, 5]": 3
- }
-}
diff --git a/derivation_to_json/in4.json b/derivation_to_json/in4.json
deleted file mode 100644
index 566b91f..0000000
--- a/derivation_to_json/in4.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ],
- [
- 3
- ]
- ],
- "source_rgroups": [
- 2,
- 3,
- 4,
- 3
- ],
- "target_rgroups": [
- 2,
- 3,
- 4,
- 3
- ],
- "rules_by_span_id": {
- "2": "[X] ||| der metalloxid @-@ halbleiter [X] ||| the metal @-@ oxide semiconductor [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.56569 MaxLexFgivenE=2.8425 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-0 1-1 1-3 2-2 3-4",
- "3": "[X] ||| kann [X] dotierungselement enthalten . ||| may contain [1] doping element . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.14956 MaxLexFgivenE=1.87655 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-3 2-4 3-3 4-5",
- "4": "[X] ||| mindestens ein ||| at least one ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.61108 MaxLexFgivenE=1.75632 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 0-1 1-2"
- },
- "source_groups": [
- "der metalloxid @-@ halbleiter",
- "kann",
- "mindestens ein",
- "dotierungselement enthalten ."
- ],
- "target_groups": [
- "the metal @-@ oxide semiconductor",
- "may contain",
- "at least one",
- "doping element ."
- ],
- "span_info": {
- "2": [
- [
- 0,
- 10
- ],
- [
- [
- 4,
- 10
- ]
- ]
- ],
- "3": [
- [
- 4,
- 10
- ],
- [
- [
- 5,
- 7
- ]
- ]
- ],
- "4": [
- [
- 5,
- 7
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 10]": 2,
- "[4, 10]": 3,
- "[5, 7]": 4
- }
-}
diff --git a/derivation_to_json/in5.json b/derivation_to_json/in5.json
deleted file mode 100644
index 9d320ae..0000000
--- a/derivation_to_json/in5.json
+++ /dev/null
@@ -1,170 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ],
- [
- 3
- ],
- [
- 4
- ],
- [
- 5
- ],
- [
- 6
- ]
- ],
- "source_rgroups": [
- 5,
- 6,
- 7,
- 9,
- 8,
- 10,
- 8
- ],
- "target_rgroups": [
- 5,
- 6,
- 7,
- 9,
- 8,
- 10,
- 8
- ],
- "rules_by_span_id": {
- "5": "[X] ||| die ||| the ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3972 MaxLexFgivenE=0.936969 CountEF=1.97313 SampleCountF=2.48287 EgivenFCoherent=0.51296 ||| 0-0",
- "6": "[X] ||| apparatur ||| device ||| ForceRule=1 ||| 0-0",
- "7": "[X] ||| dient der ||| is used ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11454 MaxLexFgivenE=2.49742 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 0-1 1-0",
- "9": "[X] ||| messung des auf eine arbeitsmaschine ||| measuring the load moment of a working ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=8.74061 MaxLexFgivenE=5.92883 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-4 3-5 4-6",
- "8": "[X] ||| [X] , [X] lastmomentes . ||| [1] machine , [2] appliance . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.9196 MaxLexFgivenE=1.76182 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-2 3-4 4-5",
- "10": "[X] ||| insbesondere ein hebezeug wirkenden ||| in particular a lifting ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.67342 MaxLexFgivenE=4.61451 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 0-1 1-2 2-1 3-3"
- },
- "source_groups": [
- "die",
- "apparatur",
- "dient der",
- "messung des auf eine arbeitsmaschine",
- ",",
- "insbesondere ein hebezeug wirkenden",
- "lastmomentes ."
- ],
- "target_groups": [
- "the",
- "device",
- "is used",
- "measuring the load moment of a working",
- "machine ,",
- "in particular a lifting",
- "appliance ."
- ],
- "span_info": {
- "1": [
- [
- 0,
- 16
- ],
- [
-
- ]
- ],
- "2": [
- [
- 0,
- 4
- ],
- [
-
- ]
- ],
- "3": [
- [
- 0,
- 2
- ],
- [
-
- ]
- ],
- "5": [
- [
- 0,
- 1
- ],
- [
-
- ]
- ],
- "6": [
- [
- 1,
- 2
- ],
- [
-
- ]
- ],
- "7": [
- [
- 2,
- 4
- ],
- [
-
- ]
- ],
- "8": [
- [
- 4,
- 16
- ],
- [
- [
- 4,
- 9
- ],
- [
- 10,
- 14
- ]
- ]
- ],
- "9": [
- [
- 4,
- 9
- ],
- [
-
- ]
- ],
- "10": [
- [
- 10,
- 14
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 16]": 1,
- "[0, 4]": 2,
- "[0, 2]": 3,
- "[0, 1]": 5,
- "[1, 2]": 6,
- "[2, 4]": 7,
- "[4, 16]": 8,
- "[4, 9]": 9,
- "[10, 14]": 10
- }
-}
diff --git a/derivation_to_json/in6.json b/derivation_to_json/in6.json
deleted file mode 100644
index 0aa652e..0000000
--- a/derivation_to_json/in6.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 2
- ]
- ],
- "source_rgroups": [
- 3,
- 4,
- 5
- ],
- "target_rgroups": [
- 3,
- 4,
- 5
- ],
- "rules_by_span_id": {
- "3": "[X] ||| diese komponente ist ein [X] ||| this component is a [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.35358 MaxLexFgivenE=3.52666 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3",
- "4": "[X] ||| unmittelbares ||| immediate ||| ForceRule=1 ||| 0-0",
- "5": "[X] ||| mass für das lastmoment . ||| measure for the load moment . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.13152 MaxLexFgivenE=3.40485 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3 3-4 4-5"
- },
- "source_groups": [
- "diese komponente ist ein",
- "unmittelbares",
- "mass für das lastmoment ."
- ],
- "target_groups": [
- "this component is a",
- "immediate",
- "measure for the load moment ."
- ],
- "span_info": {
- "1": [
- [
- 0,
- 10
- ],
- [
-
- ]
- ],
- "3": [
- [
- 0,
- 5
- ],
- [
- [
- 4,
- 5
- ]
- ]
- ],
- "4": [
- [
- 4,
- 5
- ],
- [
-
- ]
- ],
- "5": [
- [
- 5,
- 10
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 10]": 1,
- "[0, 5]": 3,
- "[4, 5]": 4,
- "[5, 10]": 5
- }
-}
diff --git a/derivation_to_json/in7.json b/derivation_to_json/in7.json
deleted file mode 100644
index 8dd3531..0000000
--- a/derivation_to_json/in7.json
+++ /dev/null
@@ -1,233 +0,0 @@
-{
- "phrase_alignment": [
- [
- 0
- ],
- [
- 1
- ],
- [
- 5
- ],
- [
- 2,
- 4
- ],
- [
- 3
- ],
- [
- 5
- ],
- [
- 6
- ],
- [
- 8
- ],
- [
- 7
- ],
- [
- 8
- ],
- [
- 9
- ]
- ],
- "source_rgroups": [
- 4,
- 6,
- 5,
- 7,
- 8,
- 5,
- 9,
- 10,
- 11,
- 10,
- 12
- ],
- "target_rgroups": [
- 4,
- 6,
- 7,
- 8,
- 7,
- 5,
- 9,
- 11,
- 10,
- 12
- ],
- "rules_by_span_id": {
- "4": "[X] ||| in einer ||| in a ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.605196 MaxLexFgivenE=1.51196 CountEF=0.90309 SampleCountF=1.63347 EgivenFCoherent=0.778151 ||| 0-0 1-1",
- "6": "[X] ||| favorisierten ||| favorited ||| ForceRule=1 ||| 0-0",
- "5": "[X] ||| [X] ausführung [X] zugleich ||| [1] [2] at the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.87793 MaxLexFgivenE=7.30253 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 3-3",
- "7": "[X] ||| dient einer der [X] ||| of the [1] is used ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.41969 MaxLexFgivenE=4.25111 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-3 0-4 1-0 2-1",
- "8": "[X] ||| schwenkbolzen ( 8 ) ||| hinge bolts ( 8 ) ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.81279 MaxLexFgivenE=2.05233 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 1-0 1-2 1-4 2-3 3-4",
- "9": "[X] ||| als messbolzen , indem [X] ||| time as a measuring bolt , [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=8.72269 MaxLexFgivenE=5.55315 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 1-0 2-0",
- "10": "[X] ||| an ihm [X] ( [X] ||| [1] strips ( [2] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.86875 MaxLexFgivenE=4.61571 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 3-2",
- "11": "[X] ||| dehnungsmess @-@ streifen ||| by having strain gauge ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.6656 MaxLexFgivenE=2.456 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-2 1-0 2-3",
- "12": "[X] ||| 12 ) angebracht sind . ||| 12 ) attached to it . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.60745 MaxLexFgivenE=3.33055 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3 4-5"
- },
- "source_groups": [
- "in einer",
- "favorisierten",
- "ausführung",
- "dient einer der",
- "schwenkbolzen ( 8 )",
- "zugleich",
- "als messbolzen , indem",
- "an ihm",
- "dehnungsmess @-@ streifen",
- "(",
- "12 ) angebracht sind ."
- ],
- "target_groups": [
- "in a",
- "favorited",
- "of the",
- "hinge bolts ( 8 )",
- "is used",
- "at the",
- "time as a measuring bolt ,",
- "by having strain gauge",
- "strips (",
- "12 ) attached to it ."
- ],
- "span_info": {
- "1": [
- [
- 0,
- 27
- ],
- [
-
- ]
- ],
- "2": [
- [
- 0,
- 12
- ],
- [
-
- ]
- ],
- "4": [
- [
- 0,
- 2
- ],
- [
-
- ]
- ],
- "5": [
- [
- 2,
- 12
- ],
- [
- [
- 2,
- 3
- ],
- [
- 4,
- 11
- ]
- ]
- ],
- "6": [
- [
- 2,
- 3
- ],
- [
-
- ]
- ],
- "7": [
- [
- 4,
- 11
- ],
- [
- [
- 7,
- 11
- ]
- ]
- ],
- "8": [
- [
- 7,
- 11
- ],
- [
-
- ]
- ],
- "9": [
- [
- 12,
- 27
- ],
- [
- [
- 16,
- 27
- ]
- ]
- ],
- "10": [
- [
- 16,
- 27
- ],
- [
- [
- 18,
- 21
- ],
- [
- 22,
- 27
- ]
- ]
- ],
- "11": [
- [
- 18,
- 21
- ],
- [
-
- ]
- ],
- "12": [
- [
- 22,
- 27
- ],
- [
-
- ]
- ]
- },
- "span2id": {
- "[0, 27]": 1,
- "[0, 12]": 2,
- "[0, 2]": 4,
- "[2, 12]": 5,
- "[2, 3]": 6,
- "[4, 11]": 7,
- "[7, 11]": 8,
- "[12, 27]": 9,
- "[16, 27]": 10,
- "[18, 21]": 11,
- "[22, 27]": 12
- }
-}
diff --git a/derivation_to_json/out2.json b/derivation_to_json/out2.json
deleted file mode 100644
index 84913e1..0000000
--- a/derivation_to_json/out2.json
+++ /dev/null
@@ -1,52 +0,0 @@
-{
- "source": [
- "Die",
- "neuerung",
- "sieht",
- "hierfür",
- "gassensoren",
- "vor",
- ", Bei denen",
- "der metalloxid-",
- "halbleiter auf dem",
- "substrat (1)",
- "in form einer amorphen",
- "metalloxid-halbleiter-",
- "dünnschicht",
- "(2) vorliegt."
- ],
- "target": [
- "The",
- "invention",
- "provides",
- "gas sensors",
- "for this purpose",
- ",",
- "in which",
- "the metal-oxide",
- "semiconductor is on the",
- "substrate (1)",
- "in the form of an amorphous",
- "metal-oxide semiconductor",
- "thin layer",
- "(2)."
- ],
- "align": [
- "0-0",
- "1-1",
- "2-2",
- "3-4",
- "4-3",
- "6-6",
- "7-7",
- "8-8",
- "9-9",
- "10-10",
- "11-11",
- "12-12",
- "13-13"
- ],
- "post_edit": "The invention provides gas sensors for this purpose , in which the metal-oxide semiconductor is on the substrate (1) in the form of an amorphous metal-oxide semiconductor thin layer (2).",
- "duration": 1735996,
- "source_value": "die neuerung sieht hierfür gassensoren vor , bei denen der metalloxid @-@ halbleiter auf dem substrat ( 1 ) in form einer amorphen metalloxid @-@ halbleiter @-@ dünnschicht ( 2 ) vorliegt ."
-}
diff --git a/derivation_to_json/out3.json b/derivation_to_json/out3.json
deleted file mode 100644
index b1282d0..0000000
--- a/derivation_to_json/out3.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "source": [
- "Das",
- "substrat kann amorph oder",
- "kristallin sein."
- ],
- "target": [
- "c",
- "b",
- "a"
- ],
- "align": [
- "0-2",
- "1-1",
- "2-0"
- ],
- "post_edit": "c b a",
- "duration": 320043,
- "source_value": "das substrat kann amorph oder kristallin sein ."
-}
diff --git a/derivation_to_json/out4.json b/derivation_to_json/out4.json
deleted file mode 100644
index 0ef13fa..0000000
--- a/derivation_to_json/out4.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- "source": [
- "Der metalloxid-halbleiter",
- "kann",
- "mindestens ein",
- "dotierungselement enthalten."
- ],
- "target": [
- "doping element.",
- "may contain",
- "The metal-oxide semiconductor",
- "at least one"
- ],
- "align": [
- "0-2",
- "1-1",
- "2-3",
- "3-0"
- ],
- "post_edit": "doping element. may contain The metal-oxide semiconductor at least one",
- "duration": 228277,
- "source_value": "der metalloxid @-@ halbleiter kann mindestens ein dotierungselement enthalten ."
-}
diff --git a/derivation_to_json/out5.json b/derivation_to_json/out5.json
deleted file mode 100644
index 775e7e0..0000000
--- a/derivation_to_json/out5.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "source": [
- "Die",
- "apparatur",
- "dient der",
- "messung des auf eine arbeitsmaschine",
- ",",
- "insbesondere ein hebezeug wirkenden",
- "lastmomentes."
- ],
- "target": [
- "measuring the load moment of a working",
- "appliance.",
- "in particular a lifting",
- "is used",
- "machine,",
- "device",
- "The"
- ],
- "align": [
- "0-6",
- "1-5",
- "2-3",
- "3-0",
- "4-4",
- "5-2",
- "6-1",
- "1-0"
- ],
- "post_edit": "measuring the load moment of a working appliance. in particular a lifting is used machine, device The",
- "duration": 49037,
- "source_value": "die apparatur dient der messung des auf eine arbeitsmaschine , insbesondere ein hebezeug wirkenden lastmomentes ."
-}
diff --git a/derivation_to_json/out6.json b/derivation_to_json/out6.json
deleted file mode 100644
index 628133e..0000000
--- a/derivation_to_json/out6.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "source": [
- "Diese komponente ist ein",
- "unmittelbares",
- "mass für das lastmoment."
- ],
- "target": [
- "measure for the load moment.",
- "This component is a",
- "immediate"
- ],
- "align": [
- "0-1",
- "1-2",
- "2-0"
- ],
- "post_edit": "measure for the load moment. This component is a immediate",
- "duration": 18198,
- "source_value": "diese komponente ist ein unmittelbares mass für das lastmoment ."
-}
diff --git a/derivation_to_json/out7.json b/derivation_to_json/out7.json
deleted file mode 100644
index b5586a7..0000000
--- a/derivation_to_json/out7.json
+++ /dev/null
@@ -1,44 +0,0 @@
-{
- "source": [
- "In einer",
- "favorisierten",
- "ausführung",
- "dient einer der",
- "schwenkbolzen (8)",
- "zugleich",
- "als messbolzen, indem",
- "an ihm",
- "dehnungsmess-streifen",
- "(",
- "12) angebracht sind."
- ],
- "target": [
- "12) attached to it.",
- "of the",
- "is used",
- "strips (",
- "by having strain gauge",
- "time as a measuring bolt,",
- "at the",
- "hinge bolts (8)",
- "favorited",
- "In a"
- ],
- "align": [
- "0-9",
- "1-8",
- "2-6",
- "3-1",
- "3-2",
- "4-7",
- "5-6",
- "6-5",
- "7-3",
- "8-4",
- "9-3",
- "10-0"
- ],
- "post_edit": "12) attached to it. of the is used strips ( by having strain gauge time as a measuring bolt, at the hinge bolts (8) favorited In a",
- "duration": 41261,
- "source_value": "in einer favorisierten ausführung dient einer der schwenkbolzen ( 8 ) zugleich als messbolzen , indem an ihm dehnungsmess @-@ streifen ( 12 ) angebracht sind ."
-}
diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb
deleted file mode 100755
index 84bdc0d..0000000
--- a/derivation_to_json/rec.rb
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'json'
-require 'zipf'
-
-
-before = JSON.parse(ReadFile.read('in7.json'))
-after = JSON.parse(ReadFile.read('out7.json'))
-
-alignment = {}
-after["align"].each { |i|
- a,b = i.split '-'
- a = a.to_i
- b = b.to_i
- if alignment[a]
- alignment[a] << b
- else
- alignment[a] = [b]
- end
-}
-
-srg2idx = {}
-before['source_rgroups'].uniq.each { |k|
- srg2idx[k] = []
- before['source_rgroups'].each_with_index { |i,j|
- if i==k
- srg2idx[k] << j
- end
- }
-}
-
-def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false
- a = []
- tgt = []
- target_phrases = [] # alignment seen from target
- v.each { |i|
- a << after["source"][i]
- target_phrases << alignment[i].first if alignment[i]
- }
- target_phrases.sort! if !dontsort
- target_phrases.each { |j|
- tgt << after["target"][j]
- }
-
- return a, tgt, target_phrases
-end
-
-
-# k is a rule id in after['rules_by_span_id']
-srg2idx.each_pair { |k,v|
- a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v
- rule_before = before['rules_by_span_id'][k.to_s]
- src_side_before = splitpipe(rule_before)[1]
- x = src_side_before.split
- a.first.insert(0, " [X] ") if x[0] == "[X]"
- a[a.size-1] += " [X] " if x[x.size-1] == "[X]"
- puts rule_before
- puts "#{k} #{a.join " [X] "}"
- puts tgt.to_s
- puts before["span_info"][k.to_s].to_s
- puts "target phrases #{target_phrases}"
- s = ""
- target_phrases.uniq.each { |j| s += after["target"][j]+" " }
- puts "S: #{s}"
- puts "nothing to do" if before["span_info"][k.to_s][1].size==0
- target_phrase_sub = []
- before["span_info"][k.to_s][1].each { |subspan|
- puts subspan.to_s
- subid = before["span2id"][subspan.to_s]
- puts "subid #{subid}"
- puts "XXX #{srg2idx[subid]}"
- _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true
- target_phrase_sub << tp
- }
- puts "targ ph sub #{target_phrase_sub.to_s}"
- puts "---"
- puts
-}
-
diff --git a/derivation_to_json/rules.rb b/derivation_to_json/rules.rb
deleted file mode 100755
index b0d267b..0000000
--- a/derivation_to_json/rules.rb
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-src = ['Synergistische', 'pharmazeutische Zusammensetzung enthaltend', 'ein Peptid', 'mit 2 bis 5', 'Aminosaeuren']
-target = ["A", "synergistic", "pharmaceutical composition containing", "a peptide", "with 2 to 5", "amino acis"]
-align = [[1], [2], [0,3], [4], [5]]
-
-
-def single_nt a
- r = []
- r << a
- max_sz = a.size-2
- if max_sz<0
- return r
- end
- a.each_index { |i|
- b = Array.new a
- b[i] = "[X]"
- r << b
- c = Array.new b
- (1).upto(a.size-(i+1)) { |k|
- c = Array.new c
- c.delete_at(i+1)
- break if c.size<2
- r << c
- }
- }
-
- return r
-end
-
-src.each_with_index { |i,j|
- src[j..src.size-1].each_with_index { |k,l|
- sub = src[j..(j+l)]
- r = single_nt sub
- r.each { |i|
- puts i.to_s
- }
- }
-}
-