From b7ea2615bc9bb69031ff714ddce1539c9f1bda2d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 1 May 2013 17:09:20 -0400 Subject: fix wu ke's unique k-best extraction bug --- decoder/kbest.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'decoder') diff --git a/decoder/kbest.h b/decoder/kbest.h index 9a55f653..44c23151 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -6,6 +6,7 @@ #include #include +#include #include "wordid.h" #include "hg.h" @@ -134,7 +135,7 @@ namespace KBest { } add_next = false; - if (cand.size() > 0) { + while (!add_next && cand.size() > 0) { std::pop_heap(cand.begin(), cand.end(), HeapCompare()); Derivation* d = cand.back(); cand.pop_back(); @@ -145,10 +146,15 @@ namespace KBest { if (!filter(d->yield)) { D.push_back(d); add_next = true; + } else { + // just because a node already derived a string (or whatever + // equivalent derivation class), you need to add its successors + // to the node's candidate pool + LazyNext(d, &cand, &s.ds); } - } else { - break; } + if (!add_next) + break; } if (k < D.size()) return D[k]; else return NULL; } @@ -184,7 +190,11 @@ namespace KBest { s.cand.push_back(d); } - const unsigned effective_k = std::min(k_prime, s.cand.size()); + unsigned effective_k = s.cand.size(); + if (boost::is_same >::value) { + // if there's no filter you can use this optimization + effective_k = std::min(k_prime, s.cand.size()); + } const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k; std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare()); s.cand.resize(effective_k); -- cgit v1.2.3 From 2e4b60f35c40af366be40fe7089f21a5c1ad8e71 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 6 May 2013 22:18:08 -0400 Subject: add passthrough length features --- decoder/scfg_translator.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'decoder') diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 3b43b586..6f0b003b 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -12,6 +12,7 @@ #include "grammar.h" #include "bottom_up_parser.h" #include "sentence_metadata.h" +#include "stringlib.h" #include "tdict.h" #include "viterbi.h" #include "verbose.h" @@ -68,7 +69,11 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; + if (length > 6) length = 6; + string len_feat = "PassThrough_0=1"; + len_feat[12] += length; + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); pt->a_.push_back(AlignmentPoint(0,0)); AddRule(pt); RefineRule(pt, ctf_level); -- cgit v1.2.3