summaryrefslogtreecommitdiff
path: root/compound-split
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-01-17 12:49:06 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-01-17 12:49:06 -0500
commitb49944ee0e5f347a936df244a7c354a867c79c93 (patch)
treea06616eb7b3a7aa44c69463fc599fd0e5c764cff /compound-split
parent0ae0a535ddca7d85894c48722fc90c488cc059cb (diff)
more german fixes
Diffstat (limited to 'compound-split')
-rw-r--r--compound-split/de/badlist.de.gzbin373 -> 391 bytes
-rw-r--r--compound-split/de/dev.in-ref10
-rw-r--r--compound-split/de/weights.trained40
3 files changed, 29 insertions, 21 deletions
diff --git a/compound-split/de/badlist.de.gz b/compound-split/de/badlist.de.gz
index d845f3b6..bda4fde9 100644
--- a/compound-split/de/badlist.de.gz
+++ b/compound-split/de/badlist.de.gz
Binary files differ
diff --git a/compound-split/de/dev.in-ref b/compound-split/de/dev.in-ref
index b4b91f77..ab6af9dd 100644
--- a/compound-split/de/dev.in-ref
+++ b/compound-split/de/dev.in-ref
@@ -725,7 +725,7 @@ vergiftet ||| # vergiftet
kuklina ||| # kuklina
trägerin ||| # trägerin
alternativen ||| # alternativen
-nobelpreises ||| ((('#',0,1),),(('nobel',0,1),),(('preises',0,1),),)
+nobelpreis ||| ((('#',0,1),),(('nobel',0,1),),(('preis',0,1),),)
kämpft ||| # kämpft
rechte ||| # rechte
soldaten ||| # soldaten
@@ -790,3 +790,11 @@ schwellenländer ||| ((('#',0,1),),(('schwellen',0,1),('schwelle',0,1),),(('län
brasilien ||| # brasilien
russland ||| # russland
indien ||| # indien
+frühstück ||| # frühstück
+fortschritt ||| # fortschritt
+frühstückstisch ||| ((('#',0,1),),(('frühstück',0,1),('frühstücks',0,1),),(('tisch',0,1),),)
+unserer ||| # unserer
+familie ||| # familie
+vielen ||| # vielen
+jahren ||| # jahren
+tageszeitung ||| ((('#',0,1),),(('tag',0,1),('tages',0,1),),(('zeitung',0,1),),)
diff --git a/compound-split/de/weights.trained b/compound-split/de/weights.trained
index f19cfb87..4ae8a8ce 100644
--- a/compound-split/de/weights.trained
+++ b/compound-split/de/weights.trained
@@ -1,20 +1,20 @@
-# Objective = 141.257 (eval count=260)
-LettersSq -0.043739909283617769
-LettersSqrt 0.1872289898773126
-RevCharLM 0.42554069360897689
-FugS 0.19456803361089897
-FugN -0.52139851618458022
-WordCount -0.15691017588076511
-InDict -0.5625646425495513
-InDictSubWord 0.93167610469172124
-Short 0.75149167149253815
-Long -0.73284751373263413
-OOV 0.40565446666620508
-OOVSubWord -0.69173632880670455
-ShortRange -1.1747803070666263
-HighFreq -3.6846138678893623
-MedFreq 0.043969281682716951
-Freq -0.2997699217323242
-Bad -2.9862583497002633
-FreqLen1 -0.35008877438893016
-FreqLen2 -0.15783550188513254
+# Objective = 141.249 (eval count=281)
+LettersSq -0.04232699523807458
+LettersSqrt 0.4355587430228624
+RevCharLM 0.41198831478844122
+FugS 0.075512682701211239
+FugN -0.61902217202456356
+WordCount -0.0082286209848003913
+InDict -0.98529136326577915
+InDictSubWord 1.0386001157542868
+Short 0.70242841302446457
+Long -0.69651861257390713
+OOV 0.97706274228074586
+OOVSubWord -0.76138571782502074
+ShortRange -1.1864424374105051
+HighFreq -4.1150415279961052
+MedFreq 0.014790338975451987
+Freq -0.28901069668114737
+Bad -3.8059407890457644
+FreqLen1 -0.3827361966178347
+FreqLen2 -0.17308899259418953