diff options
| author | Patrick Simianer <p@simianer.de> | 2014-06-27 15:47:54 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-06-27 15:47:54 +0200 | 
| commit | 9351b82d65713cc076c8097427f21f1c7ad4f5d2 (patch) | |
| tree | c744a034ad661e0b132f0c3a215baf8d403357ec | |
| parent | 7c19e97d2b1216daf95055291c8bbda9b5705dd6 (diff) | |
| parent | 9a9abc5f6e9b3f26daf5f276434c1fd7f0c83da2 (diff) | |
Merge remote-tracking branch 'upstream/master'
| -rw-r--r-- | decoder/tree2string_translator.cc | 30 | 
1 files changed, 30 insertions, 0 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index d61b9aba..c9c91a37 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -29,6 +29,7 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo    int lc = 0;    while(getline(*in, line)) {      ++lc; +    if (line.size() == 0 || line[0] == '#') continue;      std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| ");      if (has_multiple_states && fields.size() != 4) {        cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl; @@ -178,6 +179,32 @@ struct Tree2StringTranslatorImpl {      ReadTree2StringGrammar(rf.stream(), root.back().get(), has_multiple_states);    } +  // src must be fully abstract +  bool DoesAbstractPassThroughRuleExist(unsigned state, const cdec::TreeFragment& src) const { +    unsigned len = root.size(); +    if (len <= 1) return false; +    --len; +    for (unsigned i = 0; i < len; ++i) { +      const Tree2StringGrammarNode* cur = &*root[i]; +      auto it = cur->next.find(state); +      if (it == cur->next.end()) continue; +      cur = &it->second; +      bool failed = false; +      vector<int> trg; +      for (auto sym : src) { +        it = cur->next.find(sym); +        if (it == cur->next.end()) { failed = true; break; } +        if (cdec::IsFrontier(sym)) trg.push_back(-trg.size()); +        cur = &it->second; +      } +      if (failed) continue; +      // TODO check for destination states in t2t +      for (auto r : cur->rules) +        if (r->e_ == trg) return true; +    } +    return false; +  } +    void CreatePassThroughRules(const cdec::TreeFragment& tree) {      static const int kFIDlex = FD::Convert("PassThrough_Lexical");      static const int kFIDabs = FD::Convert("PassThrough_Abstract"); @@ -231,6 +258,9 @@ struct Tree2StringTranslatorImpl {        Tree2StringGrammarNode* cur = root.back().get();        // do we need all transducer states here??? a list??? no pass through rules???        unsigned transducer_state = 0; +      const bool abstract_rule = (has_nt && !has_lex); +      // the following reduces ambiguity quite a lot +      if (abstract_rule && DoesAbstractPassThroughRuleExist(transducer_state, rule_src)) continue;         cur = &cur->next[transducer_state];        for (auto sym : rule_src)          cur = &cur->next[sym];  | 
