summaryrefslogtreecommitdiff
path: root/decoder/ff_csplit.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-01-18 21:57:32 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-01-18 21:57:32 -0500
commit3c1e7365d8708846dfffd598158571d4c4abd097 (patch)
treebb1865a9e47f82ec5cef353e9ccce5043a255866 /decoder/ff_csplit.cc
parent1bc1a92c0f72fe4266182f9cb467b75e670a1dac (diff)
new tuning of crf compound splitter for wmt14
Diffstat (limited to 'decoder/ff_csplit.cc')
-rw-r--r--decoder/ff_csplit.cc9
1 files changed, 6 insertions, 3 deletions
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index 33b6cea8..a0e538d3 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -37,7 +37,8 @@ struct BasicCSplitFeaturesImpl {
short_range_(FD::Convert("ShortRange")),
high_freq_(FD::Convert("HighFreq")),
med_freq_(FD::Convert("MedFreq")),
- freq_(FD::Convert("Freq")),
+ logfreq_(FD::Convert("LogFreq")),
+ loglogfreq_(FD::Convert("LogLogFreq")),
in_dict_full_word_(FD::Convert("InDictFullWord")),
fl1_(FD::Convert("FreqLen1")),
fl2_(FD::Convert("FreqLen2")),
@@ -86,7 +87,8 @@ struct BasicCSplitFeaturesImpl {
const int short_range_;
const int high_freq_;
const int med_freq_;
- const int freq_;
+ const int logfreq_;
+ const int loglogfreq_;
const int in_dict_full_word_;
const int fl1_;
const int fl2_;
@@ -139,7 +141,8 @@ void BasicCSplitFeaturesImpl::TraversalFeaturesImpl(
float freq = freq_dict_.LookUp(word);
if (freq) {
- features->set_value(freq_, freq);
+ features->set_value(logfreq_, freq);
+ features->set_value(loglogfreq_, log(freq) / log(1.69));
features->set_value(in_dict_, 1.0);
if (subword) features->set_value(in_dict_sub_word_, 1.0);
} else {