From b49944ee0e5f347a936df244a7c354a867c79c93 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 17 Jan 2011 12:49:06 -0500 Subject: more german fixes --- compound-split/de/badlist.de.gz | Bin 373 -> 391 bytes compound-split/de/dev.in-ref | 10 +++++++++- compound-split/de/weights.trained | 40 +++++++++++++++++++------------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/compound-split/de/badlist.de.gz b/compound-split/de/badlist.de.gz index d845f3b6..bda4fde9 100644 Binary files a/compound-split/de/badlist.de.gz and b/compound-split/de/badlist.de.gz differ diff --git a/compound-split/de/dev.in-ref b/compound-split/de/dev.in-ref index b4b91f77..ab6af9dd 100644 --- a/compound-split/de/dev.in-ref +++ b/compound-split/de/dev.in-ref @@ -725,7 +725,7 @@ vergiftet ||| # vergiftet kuklina ||| # kuklina trägerin ||| # trägerin alternativen ||| # alternativen -nobelpreises ||| ((('#',0,1),),(('nobel',0,1),),(('preises',0,1),),) +nobelpreis ||| ((('#',0,1),),(('nobel',0,1),),(('preis',0,1),),) kämpft ||| # kämpft rechte ||| # rechte soldaten ||| # soldaten @@ -790,3 +790,11 @@ schwellenländer ||| ((('#',0,1),),(('schwellen',0,1),('schwelle',0,1),),(('län brasilien ||| # brasilien russland ||| # russland indien ||| # indien +frühstück ||| # frühstück +fortschritt ||| # fortschritt +frühstückstisch ||| ((('#',0,1),),(('frühstück',0,1),('frühstücks',0,1),),(('tisch',0,1),),) +unserer ||| # unserer +familie ||| # familie +vielen ||| # vielen +jahren ||| # jahren +tageszeitung ||| ((('#',0,1),),(('tag',0,1),('tages',0,1),),(('zeitung',0,1),),) diff --git a/compound-split/de/weights.trained b/compound-split/de/weights.trained index f19cfb87..4ae8a8ce 100644 --- a/compound-split/de/weights.trained +++ b/compound-split/de/weights.trained @@ -1,20 +1,20 @@ -# Objective = 141.257 (eval count=260) -LettersSq -0.043739909283617769 -LettersSqrt 0.1872289898773126 -RevCharLM 0.42554069360897689 -FugS 0.19456803361089897 -FugN -0.52139851618458022 -WordCount -0.15691017588076511 -InDict -0.5625646425495513 -InDictSubWord 0.93167610469172124 -Short 0.75149167149253815 -Long -0.73284751373263413 -OOV 0.40565446666620508 -OOVSubWord -0.69173632880670455 -ShortRange -1.1747803070666263 -HighFreq -3.6846138678893623 -MedFreq 0.043969281682716951 -Freq -0.2997699217323242 -Bad -2.9862583497002633 -FreqLen1 -0.35008877438893016 -FreqLen2 -0.15783550188513254 +# Objective = 141.249 (eval count=281) +LettersSq -0.04232699523807458 +LettersSqrt 0.4355587430228624 +RevCharLM 0.41198831478844122 +FugS 0.075512682701211239 +FugN -0.61902217202456356 +WordCount -0.0082286209848003913 +InDict -0.98529136326577915 +InDictSubWord 1.0386001157542868 +Short 0.70242841302446457 +Long -0.69651861257390713 +OOV 0.97706274228074586 +OOVSubWord -0.76138571782502074 +ShortRange -1.1864424374105051 +HighFreq -4.1150415279961052 +MedFreq 0.014790338975451987 +Freq -0.28901069668114737 +Bad -3.8059407890457644 +FreqLen1 -0.3827361966178347 +FreqLen2 -0.17308899259418953 -- cgit v1.2.3