diff options
author | Patrick Simianer <p@simianer.de> | 2015-12-11 16:17:46 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-12-11 16:17:46 +0100 |
commit | eac9dd96a4ec3334a93db60a4b97a91dbb7f8257 (patch) | |
tree | 5db18f37f0548bf1bdec62fe612c62544b8712dd | |
parent | ad309390887b947d997e4040dac98126ee9a356c (diff) |
removed dead examples
-rw-r--r-- | derivation_to_json/README | 9 | ||||
-rw-r--r-- | derivation_to_json/after.json | 53 | ||||
-rw-r--r-- | derivation_to_json/before.json | 127 | ||||
-rw-r--r-- | derivation_to_json/example.1.json | 1 | ||||
-rw-r--r-- | derivation_to_json/example.1.output | 2 | ||||
-rw-r--r-- | derivation_to_json/example.1.raw | 1 | ||||
-rw-r--r-- | derivation_to_json/example.json | 1 | ||||
-rw-r--r-- | derivation_to_json/example.output | 1 | ||||
-rw-r--r-- | derivation_to_json/example.raw | 1 | ||||
-rw-r--r-- | derivation_to_json/in.json | 162 | ||||
-rw-r--r-- | derivation_to_json/in2.json | 331 | ||||
-rw-r--r-- | derivation_to_json/in3.json | 64 | ||||
-rw-r--r-- | derivation_to_json/in4.json | 85 | ||||
-rw-r--r-- | derivation_to_json/in5.json | 170 | ||||
-rw-r--r-- | derivation_to_json/in6.json | 85 | ||||
-rw-r--r-- | derivation_to_json/in7.json | 233 | ||||
-rw-r--r-- | derivation_to_json/out2.json | 52 | ||||
-rw-r--r-- | derivation_to_json/out3.json | 20 | ||||
-rw-r--r-- | derivation_to_json/out4.json | 23 | ||||
-rw-r--r-- | derivation_to_json/out5.json | 33 | ||||
-rw-r--r-- | derivation_to_json/out6.json | 20 | ||||
-rw-r--r-- | derivation_to_json/out7.json | 44 | ||||
-rwxr-xr-x | derivation_to_json/rec.rb | 79 | ||||
-rwxr-xr-x | derivation_to_json/rules.rb | 42 |
24 files changed, 6 insertions, 1633 deletions
diff --git a/derivation_to_json/README b/derivation_to_json/README index 947cefd..0f83d5a 100644 --- a/derivation_to_json/README +++ b/derivation_to_json/README @@ -1,4 +1,7 @@ -This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" -into data structures and tries to align "groups" in source and target sides -of rules in a smart, presentable way. +This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into data structures and tries to align "groups" in source and target sides +of rules in a smart, presentable way. The result resembles a phrase-based +system, given that the word alignment gives enough hints. + +To run: + ./derivation_to_json.rb < <one of the .raw files> diff --git a/derivation_to_json/after.json b/derivation_to_json/after.json deleted file mode 100644 index fb58467..0000000 --- a/derivation_to_json/after.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "source": [ - "Weiterhin gehört", - "zur Erfindung", - "die Verwendung dieser Zusammensetzungen zur", - "Therapie und Prophylaxe von", - "Herz-Kreislauf-Erkrankungen", - ", Erkrankungen", - "im Zusammenhang", - "mit einer erhöhten", - "Thrombozytenaggregation,", - "Stoffwechsel-Erkrankungen", - ", Knochenerkrankungen", - "oder", - "Krebserkrankungen", - "." - ], - "target": [ - "Additionally,", - "the invention relates to", - "the use of said compositions for the", - "therapy and prophylaxis of", - "cardiovascular diseases", - ", diseases", - "in conjunction", - "with an increased", - "platelet aggregation,", - "\tmetabolic diseases", - ", osteopathy", - "or", - "cancerous diseases", - "." - ], - "align": [ - "0-0", - "1-1", - "2-2", - "3-3", - "4-4", - "5-5", - "6-6", - "7-7", - "8-8", - "9-9", - "10-10", - "11-11", - "12-12", - "13-13" - ], - "post_edit": "Additionally, the invention relates to the use of said compositions for the therapy and prophylaxis of cardiovascular diseases , diseases in conjunction with an increased platelet aggregation, metabolic diseases , osteopathy or cancerous diseases .", - "duration": 212272, - "source_value": "weiterhin gehört zur Erfindung die Verwendung dieser Zusammensetzungen zur Therapie und Prophylaxe von Herz-Kreislauf-Erkrankungen , Erkrankungen im Zusammenhang mit einer erhöhten Thrombozytenaggregation , Stoffwechsel-Erkrankungen , Knochenerkrankungen oder Krebserkrankungen ." -} diff --git a/derivation_to_json/before.json b/derivation_to_json/before.json deleted file mode 100644 index 1d2c911..0000000 --- a/derivation_to_json/before.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "phrase_alignment": [ - [ - 1 - ], - [ - 2, - 0 - ], - [ - 3 - ], - [ - 4 - ], - [ - 5 - ], - [ - 6 - ], - [ - 7 - ], - [ - 8 - ], - [ - 9 - ], - [ - 10 - ], - [ - 11 - ], - [ - 12 - ], - [ - 13 - ], - [ - 14 - ] - ], - "source_rgroups": [ - 4, - 3, - 7, - 6, - 5, - 8, - 9, - 11, - 12, - 13, - 10, - 9, - 8, - 14 - ], - "target_rgroups": [ - 3, - 4, - 3, - 7, - 6, - 5, - 8, - 9, - 11, - 12, - 13, - 10, - 9, - 8, - 14 - ], - "rules_by_span_id": { - "4": "[X] ||| weiterhin gehört ||| also ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.458975 MaxLexFgivenE=4.79441 CountEF=0.30103 SampleCountF=0.477121 EgivenFCoherent=0.30103 ||| 0-0 1-0\n", - "3": "[X] ||| [X] zur Erfindung [X] ||| the invention [X] relates to [X] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=3.63316 MaxLexFgivenE=1.80404 CountEF=0.30103 SampleCountF=1.27875 EgivenFCoherent=1.25527 ||| 1-4 2-3\n", - "7": "[X] ||| die Verwendung dieser Zusammensetzungen zur ||| the use of said compositions for the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.93053 MaxLexFgivenE=3.26928 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 0-0 1-1 2-3 3-4 4-5\n", - "6": "[X] ||| [X] Therapie und Prophylaxe von ||| [X] therapy and prophylaxis of ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.926982 MaxLexFgivenE=1.42237 CountEF=0.90309 SampleCountF=1.47712 EgivenFCoherent=0.6173 ||| 1-1 2-2 3-3 4-4\n", - "5": "[X] ||| [X] Herz-Kreislauf-Erkrankungen ||| [X] cardiovascular diseases ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.739399 MaxLexFgivenE=0.797149 CountEF=1.38021 SampleCountF=1.69897 EgivenFCoherent=0.328468 ||| 1-1 1-2\n", - "8": "[X] ||| , Erkrankungen [X] Krebserkrankungen [X] ||| , diseases [X] cancerous diseases [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.3856 MaxLexFgivenE=0.862494 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 1-1 3-3 3-4\n", - "9": "[X] ||| im Zusammenhang [X] oder ||| in conjunction [X] or ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.43987 MaxLexFgivenE=2.45332 CountEF=0.778151 SampleCountF=1.44716 EgivenFCoherent=0.732394 ||| 0-0 1-1 3-3\n", - "11": "[X] ||| mit einer erhöhten [X] ||| with an increased [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.72017 MaxLexFgivenE=2.04771 CountEF=0.954243 SampleCountF=1.6902 EgivenFCoherent=0.778151 ||| 0-0 1-1 2-2\n", - "12": "[X] ||| Thrombozytenaggregation , [X] ||| platelet aggregation [X] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.886057 MaxLexFgivenE=1.98654 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-0 0-1\n", - "13": "[X] ||| Stoffwechsel-Erkrankungen ||| asdf ||| ForceRule=1 ||| 0-0\n", - "10": "[X] ||| [X] , Knochenerkrankungen ||| [X] , osteopathy ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11792 MaxLexFgivenE=0.186321 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-1 2-2\n", - "14": "[X] ||| . ||| . ||| IsSupportedOnline=1 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.0201086 MaxLexFgivenE=0.135104 CountEF=2.39967 SampleCountF=2.48287 EgivenFCoherent=0.0835026 ||| 0-0\n" - }, - "source_groups": [ - "weiterhin gehört", - "zur Erfindung", - "die Verwendung dieser Zusammensetzungen zur", - "Therapie und Prophylaxe von", - "Herz-Kreislauf-Erkrankungen", - ", Erkrankungen", - "im Zusammenhang", - "mit einer erhöhten", - "Thrombozytenaggregation ,", - "Stoffwechsel-Erkrankungen", - ", Knochenerkrankungen", - "oder", - "Krebserkrankungen", - "." - ], - "target_groups": [ - "the invention", - "also", - "relates to", - "the use of said compositions for the", - "therapy and prophylaxis of", - "cardiovascular diseases", - ", diseases", - "in conjunction", - "with an increased", - "platelet aggregation", - "asdf", - ", osteopathy", - "or", - "cancerous diseases", - "." - ] -} diff --git a/derivation_to_json/example.1.json b/derivation_to_json/example.1.json deleted file mode 100644 index ab793c2..0000000 --- a/derivation_to_json/example.1.json +++ /dev/null @@ -1 +0,0 @@ -{"phrase_alignment":[[0],[1],[2],[3]],"source_groups":["(","ein","test",")"],"target_groups":["(","another","test","level )"]} diff --git a/derivation_to_json/example.1.output b/derivation_to_json/example.1.output deleted file mode 100644 index 65e4e27..0000000 --- a/derivation_to_json/example.1.output +++ /dev/null @@ -1,2 +0,0 @@ -["<0,4> [Goal] ||| [S] ||| [1]", "<0,4> [S] ||| [S] [X] ||| [1] [2]", "<0,2> [S] ||| [S] [X] ||| [1] [2]", "<0,1> [S] ||| [X] ||| [1]", "<0,1> [X] ||| ( ||| (", "<1,2> [X] ||| ein ||| another", "<2,4> [X] ||| [X] ) ||| [1] level )", "<2,3> [X] ||| test ||| test"] -( another test level diff --git a/derivation_to_json/example.1.raw b/derivation_to_json/example.1.raw deleted file mode 100644 index 0a2a6b7..0000000 --- a/derivation_to_json/example.1.raw +++ /dev/null @@ -1 +0,0 @@ -({<0,4> [Goal] ||| [S] ||| [1]}({<0,4> [S] ||| [S] [X] ||| [1] [2]}({<0,2> [S] ||| [S] [X] ||| [1] [2]}({<0,1> [S] ||| [X] ||| [1]}({<0,1> [X] ||| ( ||| (}) ) ({<1,2> [X] ||| ein ||| another}) ) ({<2,4> [X] ||| [X] ) ||| [1] level )}({<2,3> [X] ||| test ||| test}) ) ) ) diff --git a/derivation_to_json/example.json b/derivation_to_json/example.json deleted file mode 100644 index 6bb2b19..0000000 --- a/derivation_to_json/example.json +++ /dev/null @@ -1 +0,0 @@ -{"phrase_alignment":[[0,2],[1],[0,2],[3],[4],[5],[6],[7,9],[8],[7,9],[10,18],[11],[10,18],[12],[13],[14],[15],[16],[17],[10,18],[19],[20],[21,23],[22],[21,23],[24],[25]],"source_groups":["hier also","ein bescheidener",",","auf alle","demokratien","anzuwendender","vorschlag",":","der markt für","ideen","funktioniert","besser",",","wenn es den","bürgern","leichter","fällt , die","zielkonflikte zwischen","treffsicherheit","der","aussagen und","unterhaltung","oder zwischen","treffsicherheit","und","parteitreue","zu erkennen ."],"target_groups":["so here","a modest",",","to all","democracies","anzuwendender","proposal",":","the market for","ideas","works","better","if","citizens","easier",", the","trade @-@ offs between","treffsicherheit","the","statements and","entertainment","or","treffsicherheit","and","parteitreue","."]} diff --git a/derivation_to_json/example.output b/derivation_to_json/example.output deleted file mode 100644 index ac66de7..0000000 --- a/derivation_to_json/example.output +++ /dev/null @@ -1 +0,0 @@ -so here a modest , to all democracies anzuwendender proposal : the market for ideas works better if citizens easier , the trade @-@ offs between treffsicherheit the statements and entertainment or treffsicherheit and parteitreue . diff --git a/derivation_to_json/example.raw b/derivation_to_json/example.raw deleted file mode 100644 index 5128e09..0000000 --- a/derivation_to_json/example.raw +++ /dev/null @@ -1 +0,0 @@ -({<0,41> [Goal] ||| [S] ||| [1]}({<0,41> [S] ||| [S] [X] ||| [1] [2]}({<0,37> [S] ||| [S] [X] ||| [1] [2]}({<0,33> [S] ||| [S] [X] ||| [1] [2]}({<0,32> [S] ||| [S] [X] ||| [1] [2]}({<0,30> [S] ||| [S] [X] ||| [1] [2]}({<0,15> [S] ||| [S] [X] ||| [1] [2]}({<0,10> [S] ||| [S] [X] ||| [1] [2]}({<0,9> [S] ||| [S] [X] ||| [1] [2]}({<0,8> [S] ||| [S] [X] ||| [1] [2]}({<0,7> [S] ||| [S] [X] ||| [1] [2]}({<0,5> [S] ||| [X] ||| [1]}({<0,5> [X] ||| hier also [X] , ||| so here [1] ,}({<2,4> [X] ||| ein bescheidener ||| a modest}) ) ) ({<5,7> [X] ||| auf alle ||| to all}) ) ({<7,8> [X] ||| demokratien ||| democracies}) ) ({<8,9> [X] ||| anzuwendender ||| anzuwendender}) ) ({<9,10> [X] ||| vorschlag ||| proposal}) ) ({<10,15> [X] ||| : [X] ideen ||| : [1] ideas}({<11,14> [X] ||| der markt für ||| the market for}) ) ) ({<15,30> [X] ||| funktioniert [X] , [X] der ||| works [1] [2] the}({<16,17> [X] ||| besser ||| better}) ({<18,29> [X] ||| wenn es den [X] ||| if [1]}({<21,29> [X] ||| [X] fällt , die [X] ||| [1] , the [2]}({<21,23> [X] ||| [X] leichter ||| [1] easier}({<21,22> [X] ||| bürgern ||| citizens}) ) ({<26,29> [X] ||| zielkonflikte zwischen [X] ||| trade @-@ offs between [1]}({<28,29> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ) ) ) ({<30,32> [X] ||| aussagen und ||| statements and}) ) ({<32,33> [X] ||| unterhaltung ||| entertainment}) ) ({<33,37> [X] ||| oder zwischen [X] und ||| or [1] and}({<35,36> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ({<37,41> [X] ||| [X] zu erkennen . ||| [1] .}({<37,38> [X] ||| parteitreue ||| parteitreue}) ) ) ) diff --git a/derivation_to_json/in.json b/derivation_to_json/in.json deleted file mode 100644 index 6fd2c88..0000000 --- a/derivation_to_json/in.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ], - [ - 3 - ], - [ - 4 - ], - [ - 5 - ], - [ - 6 - ], - [ - 7 - ] - ], - "source_rgroups": [ - 4, - 6, - 5, - 4, - 3, - 7, - 8, - 7 - ], - "target_rgroups": [ - 4, - 6, - 5, - 4, - 3, - 7, - 8, - 7 - ], - "rules_by_span_id": { - "4": "[X] ||| die [X] auf basis von ||| the [1] which are based on metal ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.95916 MaxLexFgivenE=3.2265 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-4 3-4 3-5 4-6", - "6": "[X] ||| neuerung ||| invention ||| ForceRule=1 ||| 0-0", - "5": "[X] ||| [X] bezieht sich auf gassensoren ||| [1] relates to gas sensors ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.45124 MaxLexFgivenE=2.73473 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 1-1 1-2 2-1 4-3 4-4", - "3": "[X] ||| [X] metalloxid @-@ halbleitern , ||| [1] @-@ oxide semiconductors and which ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.79715 MaxLexFgivenE=2.26688 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-2 2-1 3-3 4-5", - "7": "[X] ||| die sehr [X] sind . ||| are very [1] . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.456219 MaxLexFgivenE=2.16613 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 3-3 4-3", - "8": "[X] ||| empfindlich und wenig temperaturabhängig ||| sensitive and not appreciably temperature @-@ dependent ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=6.5059 MaxLexFgivenE=1.51642 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-3 3-3 3-4 3-6" - }, - "source_groups": [ - "die", - "neuerung", - "bezieht sich auf gassensoren", - "auf basis von", - "metalloxid @-@ halbleitern ,", - "die sehr", - "empfindlich und wenig temperaturabhängig", - "sind ." - ], - "target_groups": [ - "the", - "invention", - "relates to gas sensors", - "which are based on metal", - "@-@ oxide semiconductors and which", - "are very", - "sensitive and not appreciably temperature @-@ dependent", - "." - ], - "span_info": { - "1": [ - [ - 0, - 21 - ], - [ - - ] - ], - "3": [ - [ - 0, - 13 - ], - [ - [ - 0, - 9 - ] - ] - ], - "4": [ - [ - 0, - 9 - ], - [ - [ - 1, - 6 - ] - ] - ], - "5": [ - [ - 1, - 6 - ], - [ - [ - 1, - 2 - ] - ] - ], - "6": [ - [ - 1, - 2 - ], - [ - - ] - ], - "7": [ - [ - 13, - 21 - ], - [ - [ - 15, - 19 - ] - ] - ], - "8": [ - [ - 15, - 19 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 21]": 1, - "[0, 13]": 3, - "[0, 9]": 4, - "[1, 6]": 5, - "[1, 2]": 6, - "[13, 21]": 7, - "[15, 19]": 8 - } -} diff --git a/derivation_to_json/in2.json b/derivation_to_json/in2.json deleted file mode 100644 index 8b7859e..0000000 --- a/derivation_to_json/in2.json +++ /dev/null @@ -1,331 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ], - [ - 4 - ], - [ - 3 - ], - [ - 5 - ], - [ - 6 - ], - [ - 7 - ], - [ - 8 - ], - [ - 9 - ], - [ - 10 - ], - [ - 11 - ], - [ - 12 - ], - [ - 13 - ] - ], - "source_rgroups": [ - 6, - 7, - 5, - 9, - 8, - 11, - 10, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "target_rgroups": [ - 6, - 7, - 5, - 8, - 9, - 11, - 10, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "rules_by_span_id": { - "6": "[X] ||| die [X] ||| the [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3972 MaxLexFgivenE=0.936969 CountEF=1.72428 SampleCountF=2.48144 EgivenFCoherent=0.764004 ||| 0-0", - "7": "[X] ||| neuerung ||| invention ||| ForceRule=1 ||| 0-0", - "5": "[X] ||| [X] sieht ||| [1] provides ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.30103 MaxLexFgivenE=0.425969 CountEF=0.30103 SampleCountF=0.69897 EgivenFCoherent=0.60206 ||| 1-1", - "9": "[X] ||| hierfür ||| this ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0 MaxLexFgivenE=1.79588 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0", - "8": "[X] ||| [X] gassensoren ||| gas sensors [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.671584 MaxLexFgivenE=0 CountEF=0.477121 SampleCountF=0.90309 EgivenFCoherent=0.544068 ||| 1-0 1-1", - "11": "[X] ||| vor ||| , ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=0 MaxLexEgivenF=1.29576 MaxLexFgivenE=3.07287 CountEF=0.30103 SampleCountF=1.34242 EgivenFCoherent=1.32222 ||| 0-0", - "10": "[X] ||| [X] , bei denen [X] ||| [1] in which [2] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3792 MaxLexFgivenE=3.12049 CountEF=0.60206 SampleCountF=0.954243 EgivenFCoherent=0.425969 ||| 1-2 2-1 3-2", - "12": "[X] ||| der metalloxid @-@ [X] ||| the metal @-@ oxide [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.46878 MaxLexFgivenE=2.10214 CountEF=0.60206 SampleCountF=0.60206 EgivenFCoherent=-0 ||| 0-0 1-1 1-3 2-2", - "13": "[X] ||| halbleiter auf dem [X] ||| semiconductor is on the [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.07042 MaxLexFgivenE=2.63438 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-2 2-1 2-3", - "14": "[X] ||| substrat ( 1 ) ||| substrate ( 1 ) ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.754079 MaxLexFgivenE=1.55196 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3", - "15": "[X] ||| in form einer amorphen [X] ||| in the form of an amorphous [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.39105 MaxLexFgivenE=2.14549 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-2 1-3 1-4 2-4 3-5", - "16": "[X] ||| metalloxid @-@ halbleiter @-@ [X] ||| metal @-@ oxide semiconductor [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.08127 MaxLexFgivenE=2.41368 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0 0-2 1-1 2-3 3-1", - "17": "[X] ||| dünnschicht [X] ||| thin layer [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=3.04136 MaxLexFgivenE=0.929419 CountEF=0.477121 SampleCountF=0.477121 EgivenFCoherent=-0 ||| 0-0", - "18": "[X] ||| ( 2 ) vorliegt . ||| ( 2 ) . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.776902 MaxLexFgivenE=4.75559 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-0 2-2 4-3" - }, - "source_groups": [ - "die", - "neuerung", - "sieht", - "hierfür", - "gassensoren", - "vor", - ", bei denen", - "der metalloxid @-@", - "halbleiter auf dem", - "substrat ( 1 )", - "in form einer amorphen", - "metalloxid @-@ halbleiter @-@", - "dünnschicht", - "( 2 ) vorliegt ." - ], - "target_groups": [ - "the", - "invention", - "provides", - "gas sensors", - "this", - ",", - "in which", - "the metal @-@ oxide", - "semiconductor is on the", - "substrate ( 1 )", - "in the form of an amorphous", - "metal @-@ oxide semiconductor", - "thin layer", - "( 2 ) ." - ], - "span_info": { - "1": [ - [ - 0, - 33 - ], - [ - - ] - ], - "2": [ - [ - 0, - 19 - ], - [ - - ] - ], - "3": [ - [ - 0, - 5 - ], - [ - - ] - ], - "5": [ - [ - 0, - 3 - ], - [ - [ - 0, - 2 - ] - ] - ], - "6": [ - [ - 0, - 2 - ], - [ - [ - 1, - 2 - ] - ] - ], - "7": [ - [ - 1, - 2 - ], - [ - - ] - ], - "8": [ - [ - 3, - 5 - ], - [ - [ - 3, - 4 - ] - ] - ], - "9": [ - [ - 3, - 4 - ], - [ - - ] - ], - "10": [ - [ - 5, - 19 - ], - [ - [ - 5, - 6 - ], - [ - 9, - 19 - ] - ] - ], - "11": [ - [ - 5, - 6 - ], - [ - - ] - ], - "12": [ - [ - 9, - 19 - ], - [ - [ - 12, - 19 - ] - ] - ], - "13": [ - [ - 12, - 19 - ], - [ - [ - 15, - 19 - ] - ] - ], - "14": [ - [ - 15, - 19 - ], - [ - - ] - ], - "15": [ - [ - 19, - 33 - ], - [ - [ - 23, - 33 - ] - ] - ], - "16": [ - [ - 23, - 33 - ], - [ - [ - 27, - 33 - ] - ] - ], - "17": [ - [ - 27, - 33 - ], - [ - [ - 28, - 33 - ] - ] - ], - "18": [ - [ - 28, - 33 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 33]": 1, - "[0, 19]": 2, - "[0, 5]": 3, - "[0, 3]": 5, - "[0, 2]": 6, - "[1, 2]": 7, - "[3, 5]": 8, - "[3, 4]": 9, - "[5, 19]": 10, - "[5, 6]": 11, - "[9, 19]": 12, - "[12, 19]": 13, - "[15, 19]": 14, - "[19, 33]": 15, - "[23, 33]": 16, - "[27, 33]": 17, - "[28, 33]": 18 - } -} diff --git a/derivation_to_json/in3.json b/derivation_to_json/in3.json deleted file mode 100644 index 2181e2a..0000000 --- a/derivation_to_json/in3.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ] - ], - "source_rgroups": [ - 2, - 3, - 2 - ], - "target_rgroups": [ - 2, - 3, - 2 - ], - "rules_by_span_id": { - "2": "[X] ||| das [X] kristallin sein . ||| the [1] crystalline . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=0.317153 MaxLexFgivenE=2.02464 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-2 3-2 4-3", - "3": "[X] ||| substrat kann amorph oder ||| substrate may be amorphous or ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.89753 MaxLexFgivenE=1.43405 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 2-3 3-4" - }, - "source_groups": [ - "das", - "substrat kann amorph oder", - "kristallin sein ." - ], - "target_groups": [ - "the", - "substrate may be amorphous or", - "crystalline ." - ], - "span_info": { - "2": [ - [ - 0, - 8 - ], - [ - [ - 1, - 5 - ] - ] - ], - "3": [ - [ - 1, - 5 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 8]": 2, - "[1, 5]": 3 - } -} diff --git a/derivation_to_json/in4.json b/derivation_to_json/in4.json deleted file mode 100644 index 566b91f..0000000 --- a/derivation_to_json/in4.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ], - [ - 3 - ] - ], - "source_rgroups": [ - 2, - 3, - 4, - 3 - ], - "target_rgroups": [ - 2, - 3, - 4, - 3 - ], - "rules_by_span_id": { - "2": "[X] ||| der metalloxid @-@ halbleiter [X] ||| the metal @-@ oxide semiconductor [1] ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.56569 MaxLexFgivenE=2.8425 CountEF=0.477121 SampleCountF=0.69897 EgivenFCoherent=0.30103 ||| 0-0 1-1 1-3 2-2 3-4", - "3": "[X] ||| kann [X] dotierungselement enthalten . ||| may contain [1] doping element . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.14956 MaxLexFgivenE=1.87655 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 2-3 2-4 3-3 4-5", - "4": "[X] ||| mindestens ein ||| at least one ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=1.61108 MaxLexFgivenE=1.75632 CountEF=0.477121 SampleCountF=0.778151 EgivenFCoherent=0.39794 ||| 0-0 0-1 1-2" - }, - "source_groups": [ - "der metalloxid @-@ halbleiter", - "kann", - "mindestens ein", - "dotierungselement enthalten ." - ], - "target_groups": [ - "the metal @-@ oxide semiconductor", - "may contain", - "at least one", - "doping element ." - ], - "span_info": { - "2": [ - [ - 0, - 10 - ], - [ - [ - 4, - 10 - ] - ] - ], - "3": [ - [ - 4, - 10 - ], - [ - [ - 5, - 7 - ] - ] - ], - "4": [ - [ - 5, - 7 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 10]": 2, - "[4, 10]": 3, - "[5, 7]": 4 - } -} diff --git a/derivation_to_json/in5.json b/derivation_to_json/in5.json deleted file mode 100644 index 9d320ae..0000000 --- a/derivation_to_json/in5.json +++ /dev/null @@ -1,170 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ], - [ - 3 - ], - [ - 4 - ], - [ - 5 - ], - [ - 6 - ] - ], - "source_rgroups": [ - 5, - 6, - 7, - 9, - 8, - 10, - 8 - ], - "target_rgroups": [ - 5, - 6, - 7, - 9, - 8, - 10, - 8 - ], - "rules_by_span_id": { - "5": "[X] ||| die ||| the ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.3972 MaxLexFgivenE=0.936969 CountEF=1.97313 SampleCountF=2.48287 EgivenFCoherent=0.51296 ||| 0-0", - "6": "[X] ||| apparatur ||| device ||| ForceRule=1 ||| 0-0", - "7": "[X] ||| dient der ||| is used ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.11454 MaxLexFgivenE=2.49742 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 0-1 1-0", - "9": "[X] ||| messung des auf eine arbeitsmaschine ||| measuring the load moment of a working ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=8.74061 MaxLexFgivenE=5.92883 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-4 3-5 4-6", - "8": "[X] ||| [X] , [X] lastmomentes . ||| [1] machine , [2] appliance . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.9196 MaxLexFgivenE=1.76182 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 1-2 3-4 4-5", - "10": "[X] ||| insbesondere ein hebezeug wirkenden ||| in particular a lifting ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.67342 MaxLexFgivenE=4.61451 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 0-1 1-2 2-1 3-3" - }, - "source_groups": [ - "die", - "apparatur", - "dient der", - "messung des auf eine arbeitsmaschine", - ",", - "insbesondere ein hebezeug wirkenden", - "lastmomentes ." - ], - "target_groups": [ - "the", - "device", - "is used", - "measuring the load moment of a working", - "machine ,", - "in particular a lifting", - "appliance ." - ], - "span_info": { - "1": [ - [ - 0, - 16 - ], - [ - - ] - ], - "2": [ - [ - 0, - 4 - ], - [ - - ] - ], - "3": [ - [ - 0, - 2 - ], - [ - - ] - ], - "5": [ - [ - 0, - 1 - ], - [ - - ] - ], - "6": [ - [ - 1, - 2 - ], - [ - - ] - ], - "7": [ - [ - 2, - 4 - ], - [ - - ] - ], - "8": [ - [ - 4, - 16 - ], - [ - [ - 4, - 9 - ], - [ - 10, - 14 - ] - ] - ], - "9": [ - [ - 4, - 9 - ], - [ - - ] - ], - "10": [ - [ - 10, - 14 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 16]": 1, - "[0, 4]": 2, - "[0, 2]": 3, - "[0, 1]": 5, - "[1, 2]": 6, - "[2, 4]": 7, - "[4, 16]": 8, - "[4, 9]": 9, - "[10, 14]": 10 - } -} diff --git a/derivation_to_json/in6.json b/derivation_to_json/in6.json deleted file mode 100644 index 0aa652e..0000000 --- a/derivation_to_json/in6.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 2 - ] - ], - "source_rgroups": [ - 3, - 4, - 5 - ], - "target_rgroups": [ - 3, - 4, - 5 - ], - "rules_by_span_id": { - "3": "[X] ||| diese komponente ist ein [X] ||| this component is a [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.35358 MaxLexFgivenE=3.52666 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3", - "4": "[X] ||| unmittelbares ||| immediate ||| ForceRule=1 ||| 0-0", - "5": "[X] ||| mass für das lastmoment . ||| measure for the load moment . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.13152 MaxLexFgivenE=3.40485 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3 3-4 4-5" - }, - "source_groups": [ - "diese komponente ist ein", - "unmittelbares", - "mass für das lastmoment ." - ], - "target_groups": [ - "this component is a", - "immediate", - "measure for the load moment ." - ], - "span_info": { - "1": [ - [ - 0, - 10 - ], - [ - - ] - ], - "3": [ - [ - 0, - 5 - ], - [ - [ - 4, - 5 - ] - ] - ], - "4": [ - [ - 4, - 5 - ], - [ - - ] - ], - "5": [ - [ - 5, - 10 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 10]": 1, - "[0, 5]": 3, - "[4, 5]": 4, - "[5, 10]": 5 - } -} diff --git a/derivation_to_json/in7.json b/derivation_to_json/in7.json deleted file mode 100644 index 8dd3531..0000000 --- a/derivation_to_json/in7.json +++ /dev/null @@ -1,233 +0,0 @@ -{ - "phrase_alignment": [ - [ - 0 - ], - [ - 1 - ], - [ - 5 - ], - [ - 2, - 4 - ], - [ - 3 - ], - [ - 5 - ], - [ - 6 - ], - [ - 8 - ], - [ - 7 - ], - [ - 8 - ], - [ - 9 - ] - ], - "source_rgroups": [ - 4, - 6, - 5, - 7, - 8, - 5, - 9, - 10, - 11, - 10, - 12 - ], - "target_rgroups": [ - 4, - 6, - 7, - 8, - 7, - 5, - 9, - 11, - 10, - 12 - ], - "rules_by_span_id": { - "4": "[X] ||| in einer ||| in a ||| IsSupportedOnline=0 IsSingletonFE=0 IsSingletonF=0 MaxLexEgivenF=0.605196 MaxLexFgivenE=1.51196 CountEF=0.90309 SampleCountF=1.63347 EgivenFCoherent=0.778151 ||| 0-0 1-1", - "6": "[X] ||| favorisierten ||| favorited ||| ForceRule=1 ||| 0-0", - "5": "[X] ||| [X] ausführung [X] zugleich ||| [1] [2] at the ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.87793 MaxLexFgivenE=7.30253 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 3-3", - "7": "[X] ||| dient einer der [X] ||| of the [1] is used ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=2.41969 MaxLexFgivenE=4.25111 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-3 0-4 1-0 2-1", - "8": "[X] ||| schwenkbolzen ( 8 ) ||| hinge bolts ( 8 ) ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=3.81279 MaxLexFgivenE=2.05233 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 1-0 1-2 1-4 2-3 3-4", - "9": "[X] ||| als messbolzen , indem [X] ||| time as a measuring bolt , [1] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=8.72269 MaxLexFgivenE=5.55315 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 1-0 2-0", - "10": "[X] ||| an ihm [X] ( [X] ||| [1] strips ( [2] ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=1.86875 MaxLexFgivenE=4.61571 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-1 3-2", - "11": "[X] ||| dehnungsmess @-@ streifen ||| by having strain gauge ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.6656 MaxLexFgivenE=2.456 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-2 1-0 2-3", - "12": "[X] ||| 12 ) angebracht sind . ||| 12 ) attached to it . ||| IsSupportedOnline=0 IsSingletonFE=1 IsSingletonF=1 MaxLexEgivenF=5.60745 MaxLexFgivenE=3.33055 CountEF=0.30103 SampleCountF=0.30103 EgivenFCoherent=-0 ||| 0-0 1-1 2-2 3-3 4-5" - }, - "source_groups": [ - "in einer", - "favorisierten", - "ausführung", - "dient einer der", - "schwenkbolzen ( 8 )", - "zugleich", - "als messbolzen , indem", - "an ihm", - "dehnungsmess @-@ streifen", - "(", - "12 ) angebracht sind ." - ], - "target_groups": [ - "in a", - "favorited", - "of the", - "hinge bolts ( 8 )", - "is used", - "at the", - "time as a measuring bolt ,", - "by having strain gauge", - "strips (", - "12 ) attached to it ." - ], - "span_info": { - "1": [ - [ - 0, - 27 - ], - [ - - ] - ], - "2": [ - [ - 0, - 12 - ], - [ - - ] - ], - "4": [ - [ - 0, - 2 - ], - [ - - ] - ], - "5": [ - [ - 2, - 12 - ], - [ - [ - 2, - 3 - ], - [ - 4, - 11 - ] - ] - ], - "6": [ - [ - 2, - 3 - ], - [ - - ] - ], - "7": [ - [ - 4, - 11 - ], - [ - [ - 7, - 11 - ] - ] - ], - "8": [ - [ - 7, - 11 - ], - [ - - ] - ], - "9": [ - [ - 12, - 27 - ], - [ - [ - 16, - 27 - ] - ] - ], - "10": [ - [ - 16, - 27 - ], - [ - [ - 18, - 21 - ], - [ - 22, - 27 - ] - ] - ], - "11": [ - [ - 18, - 21 - ], - [ - - ] - ], - "12": [ - [ - 22, - 27 - ], - [ - - ] - ] - }, - "span2id": { - "[0, 27]": 1, - "[0, 12]": 2, - "[0, 2]": 4, - "[2, 12]": 5, - "[2, 3]": 6, - "[4, 11]": 7, - "[7, 11]": 8, - "[12, 27]": 9, - "[16, 27]": 10, - "[18, 21]": 11, - "[22, 27]": 12 - } -} diff --git a/derivation_to_json/out2.json b/derivation_to_json/out2.json deleted file mode 100644 index 84913e1..0000000 --- a/derivation_to_json/out2.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "source": [ - "Die", - "neuerung", - "sieht", - "hierfür", - "gassensoren", - "vor", - ", Bei denen", - "der metalloxid-", - "halbleiter auf dem", - "substrat (1)", - "in form einer amorphen", - "metalloxid-halbleiter-", - "dünnschicht", - "(2) vorliegt." - ], - "target": [ - "The", - "invention", - "provides", - "gas sensors", - "for this purpose", - ",", - "in which", - "the metal-oxide", - "semiconductor is on the", - "substrate (1)", - "in the form of an amorphous", - "metal-oxide semiconductor", - "thin layer", - "(2)." - ], - "align": [ - "0-0", - "1-1", - "2-2", - "3-4", - "4-3", - "6-6", - "7-7", - "8-8", - "9-9", - "10-10", - "11-11", - "12-12", - "13-13" - ], - "post_edit": "The invention provides gas sensors for this purpose , in which the metal-oxide semiconductor is on the substrate (1) in the form of an amorphous metal-oxide semiconductor thin layer (2).", - "duration": 1735996, - "source_value": "die neuerung sieht hierfür gassensoren vor , bei denen der metalloxid @-@ halbleiter auf dem substrat ( 1 ) in form einer amorphen metalloxid @-@ halbleiter @-@ dünnschicht ( 2 ) vorliegt ." -} diff --git a/derivation_to_json/out3.json b/derivation_to_json/out3.json deleted file mode 100644 index b1282d0..0000000 --- a/derivation_to_json/out3.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "source": [ - "Das", - "substrat kann amorph oder", - "kristallin sein." - ], - "target": [ - "c", - "b", - "a" - ], - "align": [ - "0-2", - "1-1", - "2-0" - ], - "post_edit": "c b a", - "duration": 320043, - "source_value": "das substrat kann amorph oder kristallin sein ." -} diff --git a/derivation_to_json/out4.json b/derivation_to_json/out4.json deleted file mode 100644 index 0ef13fa..0000000 --- a/derivation_to_json/out4.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "source": [ - "Der metalloxid-halbleiter", - "kann", - "mindestens ein", - "dotierungselement enthalten." - ], - "target": [ - "doping element.", - "may contain", - "The metal-oxide semiconductor", - "at least one" - ], - "align": [ - "0-2", - "1-1", - "2-3", - "3-0" - ], - "post_edit": "doping element. may contain The metal-oxide semiconductor at least one", - "duration": 228277, - "source_value": "der metalloxid @-@ halbleiter kann mindestens ein dotierungselement enthalten ." -} diff --git a/derivation_to_json/out5.json b/derivation_to_json/out5.json deleted file mode 100644 index 775e7e0..0000000 --- a/derivation_to_json/out5.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "source": [ - "Die", - "apparatur", - "dient der", - "messung des auf eine arbeitsmaschine", - ",", - "insbesondere ein hebezeug wirkenden", - "lastmomentes." - ], - "target": [ - "measuring the load moment of a working", - "appliance.", - "in particular a lifting", - "is used", - "machine,", - "device", - "The" - ], - "align": [ - "0-6", - "1-5", - "2-3", - "3-0", - "4-4", - "5-2", - "6-1", - "1-0" - ], - "post_edit": "measuring the load moment of a working appliance. in particular a lifting is used machine, device The", - "duration": 49037, - "source_value": "die apparatur dient der messung des auf eine arbeitsmaschine , insbesondere ein hebezeug wirkenden lastmomentes ." -} diff --git a/derivation_to_json/out6.json b/derivation_to_json/out6.json deleted file mode 100644 index 628133e..0000000 --- a/derivation_to_json/out6.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "source": [ - "Diese komponente ist ein", - "unmittelbares", - "mass für das lastmoment." - ], - "target": [ - "measure for the load moment.", - "This component is a", - "immediate" - ], - "align": [ - "0-1", - "1-2", - "2-0" - ], - "post_edit": "measure for the load moment. This component is a immediate", - "duration": 18198, - "source_value": "diese komponente ist ein unmittelbares mass für das lastmoment ." -} diff --git a/derivation_to_json/out7.json b/derivation_to_json/out7.json deleted file mode 100644 index b5586a7..0000000 --- a/derivation_to_json/out7.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "source": [ - "In einer", - "favorisierten", - "ausführung", - "dient einer der", - "schwenkbolzen (8)", - "zugleich", - "als messbolzen, indem", - "an ihm", - "dehnungsmess-streifen", - "(", - "12) angebracht sind." - ], - "target": [ - "12) attached to it.", - "of the", - "is used", - "strips (", - "by having strain gauge", - "time as a measuring bolt,", - "at the", - "hinge bolts (8)", - "favorited", - "In a" - ], - "align": [ - "0-9", - "1-8", - "2-6", - "3-1", - "3-2", - "4-7", - "5-6", - "6-5", - "7-3", - "8-4", - "9-3", - "10-0" - ], - "post_edit": "12) attached to it. of the is used strips ( by having strain gauge time as a measuring bolt, at the hinge bolts (8) favorited In a", - "duration": 41261, - "source_value": "in einer favorisierten ausführung dient einer der schwenkbolzen ( 8 ) zugleich als messbolzen , indem an ihm dehnungsmess @-@ streifen ( 12 ) angebracht sind ." -} diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb deleted file mode 100755 index 84bdc0d..0000000 --- a/derivation_to_json/rec.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env ruby - -require 'json' -require 'zipf' - - -before = JSON.parse(ReadFile.read('in7.json')) -after = JSON.parse(ReadFile.read('out7.json')) - -alignment = {} -after["align"].each { |i| - a,b = i.split '-' - a = a.to_i - b = b.to_i - if alignment[a] - alignment[a] << b - else - alignment[a] = [b] - end -} - -srg2idx = {} -before['source_rgroups'].uniq.each { |k| - srg2idx[k] = [] - before['source_rgroups'].each_with_index { |i,j| - if i==k - srg2idx[k] << j - end - } -} - -def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false - a = [] - tgt = [] - target_phrases = [] # alignment seen from target - v.each { |i| - a << after["source"][i] - target_phrases << alignment[i].first if alignment[i] - } - target_phrases.sort! if !dontsort - target_phrases.each { |j| - tgt << after["target"][j] - } - - return a, tgt, target_phrases -end - - -# k is a rule id in after['rules_by_span_id'] -srg2idx.each_pair { |k,v| - a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v - rule_before = before['rules_by_span_id'][k.to_s] - src_side_before = splitpipe(rule_before)[1] - x = src_side_before.split - a.first.insert(0, " [X] ") if x[0] == "[X]" - a[a.size-1] += " [X] " if x[x.size-1] == "[X]" - puts rule_before - puts "#{k} #{a.join " [X] "}" - puts tgt.to_s - puts before["span_info"][k.to_s].to_s - puts "target phrases #{target_phrases}" - s = "" - target_phrases.uniq.each { |j| s += after["target"][j]+" " } - puts "S: #{s}" - puts "nothing to do" if before["span_info"][k.to_s][1].size==0 - target_phrase_sub = [] - before["span_info"][k.to_s][1].each { |subspan| - puts subspan.to_s - subid = before["span2id"][subspan.to_s] - puts "subid #{subid}" - puts "XXX #{srg2idx[subid]}" - _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true - target_phrase_sub << tp - } - puts "targ ph sub #{target_phrase_sub.to_s}" - puts "---" - puts -} - diff --git a/derivation_to_json/rules.rb b/derivation_to_json/rules.rb deleted file mode 100755 index b0d267b..0000000 --- a/derivation_to_json/rules.rb +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -src = ['Synergistische', 'pharmazeutische Zusammensetzung enthaltend', 'ein Peptid', 'mit 2 bis 5', 'Aminosaeuren'] -target = ["A", "synergistic", "pharmaceutical composition containing", "a peptide", "with 2 to 5", "amino acis"] -align = [[1], [2], [0,3], [4], [5]] - - -def single_nt a - r = [] - r << a - max_sz = a.size-2 - if max_sz<0 - return r - end - a.each_index { |i| - b = Array.new a - b[i] = "[X]" - r << b - c = Array.new b - (1).upto(a.size-(i+1)) { |k| - c = Array.new c - c.delete_at(i+1) - break if c.size<2 - r << c - } - } - - return r -end - -src.each_with_index { |i,j| - src[j..src.size-1].each_with_index { |k,l| - sub = src[j..(j+l)] - r = single_nt sub - r.each { |i| - puts i.to_s - } - } -} - |