From c09150011c4d00ee4af82d504dc89f4eb92b670e Mon Sep 17 00:00:00 2001 From: redpony Date: Tue, 13 Jul 2010 19:12:40 +0000 Subject: singleton features git-svn-id: https://ws10smt.googlecode.com/svn/trunk@237 ec762483-ff6d-05da-a07a-a48fb63a330f --- extools/featurize_grammar.cc | 27 ++++++++++++++++++++------- gi/pipeline/evaluation-pipeline.pl | 4 +++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc index 9a4af4d8..cb80a79f 100644 --- a/extools/featurize_grammar.cc +++ b/extools/featurize_grammar.cc @@ -332,6 +332,8 @@ struct XFeatures: public FeatureExtractor { fid_xef(FD::Convert("XEF")), fid_labelledfe(FD::Convert("LabelledFE")), fid_labelledef(FD::Convert("LabelledEF")), + fid_xesingleton(FD::Convert("XE_Singleton")), + fid_xfsingleton(FD::Convert("XF_Singleton")), kCFE(FD::Convert("CFE")) {} virtual void ObserveFilteredRule(const WordID /*lhs*/, const vector& src, @@ -349,9 +351,11 @@ struct XFeatures: public FeatureExtractor { const RuleStatistics& info) { RuleTuple r(-1, src, trg); map_rule(r); - rule_counts.inc_if_exists(r, info.counts.value(kCFE)); - source_counts.inc_if_exists(r.source(), info.counts.value(kCFE)); - target_counts.inc_if_exists(r.target(), info.counts.value(kCFE)); + const int count = info.counts.value(kCFE); + assert(count > 0); + rule_counts.inc_if_exists(r, count); + source_counts.inc_if_exists(r.source(), count); + target_counts.inc_if_exists(r.target(), count); } virtual void ExtractFeatures(const WordID /*lhs*/, @@ -363,11 +367,19 @@ struct XFeatures: public FeatureExtractor { map_rule(r); double l_r_freq = log(rule_counts(r)); - result->set_value(fid_xfe, log(target_counts(r.target())) - l_r_freq); - result->set_value(fid_labelledfe, log(target_counts(r.target())) - log(info.counts.value(kCFE))); + const int t_c = target_counts(r.target()); + assert(t_c > 0); + result->set_value(fid_xfe, log(t_c) - l_r_freq); + result->set_value(fid_labelledfe, log(t_c) - log(info.counts.value(kCFE))); + if (t_c == 1) + result->set_value(fid_xesingleton, 1.0); - result->set_value(fid_xef, log(source_counts(r.source())) - l_r_freq); - result->set_value(fid_labelledef, log(source_counts(r.source())) - log(info.counts.value(kCFE))); + const int s_c = source_counts(r.source()); + assert(s_c > 0); + result->set_value(fid_xef, log(s_c) - l_r_freq); + result->set_value(fid_labelledef, log(s_c) - log(info.counts.value(kCFE))); + if (s_c == 1) + result->set_value(fid_xfsingleton, 1.0); } void map_rule(RuleTuple& r) const { @@ -384,6 +396,7 @@ struct XFeatures: public FeatureExtractor { const int fid_xfe, fid_xef; const int fid_labelledfe, fid_labelledef; + const int fid_xesingleton, fid_xfsingleton; const int kCFE; RuleFreqCount rule_counts; FreqCount< vector > source_counts, target_counts; diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl index c0cd9a69..37863df3 100755 --- a/gi/pipeline/evaluation-pipeline.pl +++ b/gi/pipeline/evaluation-pipeline.pl @@ -11,7 +11,7 @@ my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR my $feat_map = { "LogRuleCount" => [ "LogRuleCount", "SingletonRule" ] , # "XFeatures" => [ "XFE","XEF" ] , - "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE"] , + "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE","XE_Singleton","XF_Singleton"] , "LabelledRuleConditionals" => [ "LabelledFE","LabelledEF" ] , "LexProb" => [ "LexE2F", "LexF2E" ] , "BackoffRule" => [ "BackoffRule" ] , @@ -35,6 +35,8 @@ my %init_weights = qw( BackoffRule 0.5 XFE -0.256706 XEF -0.256706 + XF_Singleton -0.05 + XE_Singleton -0.8 LabelledFE -0.256706 LabelledEF -0.256706 PassThrough -0.9304905 -- cgit v1.2.3