summaryrefslogtreecommitdiff
path: root/decoder/tree2string_translator.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-06-15 03:16:37 -0400
committerChris Dyer <redpony@gmail.com>2014-06-15 03:16:37 -0400
commit284c2a9009d60e787566eec1cdebfdde248aa0c5 (patch)
treea879f7a752fb4e522a2b9bffd467aba299b4885c /decoder/tree2string_translator.cc
parentb4ce7c0b51d8615abf84c022ec3a981bee3277fe (diff)
reduce ambiguity due to pass through rules in t2s translation
Diffstat (limited to 'decoder/tree2string_translator.cc')
-rw-r--r--decoder/tree2string_translator.cc30
1 files changed, 30 insertions, 0 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index d61b9aba..c9c91a37 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -29,6 +29,7 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo
int lc = 0;
while(getline(*in, line)) {
++lc;
+ if (line.size() == 0 || line[0] == '#') continue;
std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| ");
if (has_multiple_states && fields.size() != 4) {
cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl;
@@ -178,6 +179,32 @@ struct Tree2StringTranslatorImpl {
ReadTree2StringGrammar(rf.stream(), root.back().get(), has_multiple_states);
}
+ // src must be fully abstract
+ bool DoesAbstractPassThroughRuleExist(unsigned state, const cdec::TreeFragment& src) const {
+ unsigned len = root.size();
+ if (len <= 1) return false;
+ --len;
+ for (unsigned i = 0; i < len; ++i) {
+ const Tree2StringGrammarNode* cur = &*root[i];
+ auto it = cur->next.find(state);
+ if (it == cur->next.end()) continue;
+ cur = &it->second;
+ bool failed = false;
+ vector<int> trg;
+ for (auto sym : src) {
+ it = cur->next.find(sym);
+ if (it == cur->next.end()) { failed = true; break; }
+ if (cdec::IsFrontier(sym)) trg.push_back(-trg.size());
+ cur = &it->second;
+ }
+ if (failed) continue;
+ // TODO check for destination states in t2t
+ for (auto r : cur->rules)
+ if (r->e_ == trg) return true;
+ }
+ return false;
+ }
+
void CreatePassThroughRules(const cdec::TreeFragment& tree) {
static const int kFIDlex = FD::Convert("PassThrough_Lexical");
static const int kFIDabs = FD::Convert("PassThrough_Abstract");
@@ -231,6 +258,9 @@ struct Tree2StringTranslatorImpl {
Tree2StringGrammarNode* cur = root.back().get();
// do we need all transducer states here??? a list??? no pass through rules???
unsigned transducer_state = 0;
+ const bool abstract_rule = (has_nt && !has_lex);
+ // the following reduces ambiguity quite a lot
+ if (abstract_rule && DoesAbstractPassThroughRuleExist(transducer_state, rule_src)) continue;
cur = &cur->next[transducer_state];
for (auto sym : rule_src)
cur = &cur->next[sym];