summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@Chriss-MacBook-Air.local>2013-05-06 22:18:08 -0400
committerChris Dyer <cdyer@Chriss-MacBook-Air.local>2013-05-06 22:18:08 -0400
commit22f37b828e2e9d8fc2cb9a7f55081b5a327f709f (patch)
tree739229c251928833113040e536f1ddc3b6347a87
parent14ed53426726202813a8e82d706b44266f015fe1 (diff)
add passthrough length features
-rw-r--r--decoder/scfg_translator.cc7
1 files changed, 6 insertions, 1 deletions
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 3b43b586..6f0b003b 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -12,6 +12,7 @@
#include "grammar.h"
#include "bottom_up_parser.h"
#include "sentence_metadata.h"
+#include "stringlib.h"
#include "tdict.h"
#include "viterbi.h"
#include "verbose.h"
@@ -68,7 +69,11 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat,
const int j = alts[k].dist2next + i;
const string& src = TD::Convert(alts[k].label);
if (ss.count(alts[k].label) == 0) {
- TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
+ int length = static_cast<int>(log(UTF8StringLen(src)) / log(1.6)) + 1;
+ if (length > 6) length = 6;
+ string len_feat = "PassThrough_0=1";
+ len_feat[12] += length;
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat));
pt->a_.push_back(AlignmentPoint(0,0));
AddRule(pt);
RefineRule(pt, ctf_level);