diff options
author | Chris Dyer <redpony@gmail.com> | 2014-06-13 00:25:00 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-06-13 00:25:00 -0400 |
commit | d4b7953245ede0ee7484136d33c58464c066475e (patch) | |
tree | dafbcae2b8b8adb762c7f2511592f5ab6b838e48 /decoder | |
parent | 095998d0fb627808f546388491891d0ca0154787 (diff) |
stringpiece based parser for tree-to-string rules (foundation for t2t rules)
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/tree2string_translator.cc | 34 |
1 files changed, 21 insertions, 13 deletions
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index b5b47d5d..d61b9aba 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -26,14 +26,21 @@ struct Tree2StringGrammarNode { // use a lexer probably static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bool has_multiple_states) { string line; + int lc = 0; while(getline(*in, line)) { - size_t pos = line.find("|||"); - assert(pos != string::npos); - assert(pos > 3); - unsigned xc = 0; - while (line[pos - 1] == ' ') { --pos; xc++; } - cdec::TreeFragment rule_src(line.substr(0, pos), true); - // TODO transducer_state should (optionally?) be read from input + ++lc; + std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| "); + if (has_multiple_states && fields.size() != 4) { + cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl; + abort(); + } + if (!has_multiple_states && fields.size() != 3) { + cerr << "Expected 3 fields in rule file but line " << lc << " is:\n" << line << endl; + abort(); + } + + cdec::TreeFragment rule_src(fields[has_multiple_states ? 1 : 0], true); + // TODO transducer_state should be read from input const unsigned transducer_state = 0; Tree2StringGrammarNode* cur = &root->next[transducer_state]; ostringstream os; @@ -59,12 +66,13 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo else os << TD::Convert(x); } - pos += 3 + xc; - while(line[pos] == ' ') { ++pos; } - os << " ||| " << line.substr(pos); - TRulePtr rule(new TRule(os.str())); - // TODO the transducer_state you end up in after using this rule (for each NT) - // needs to be read and encoded somehow in the rule (for use XXX) + TRulePtr rule; + if (has_multiple_states) { + cerr << "Not implemented...\n"; abort(); // TODO read in states + } else { + os << " ||| " << fields[1] << " ||| " << fields[2]; + rule.reset(new TRule(os.str())); + } cur->rules.push_back(rule); //cerr << "RULE: " << rule->AsString() << "\n\n"; } |