diff options
| author | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 | 
| commit | 244971287003d079e46193b8a209c28955f90134 (patch) | |
| tree | 8beaae6b12b913acb213fc7f2415fd63886192f9 /decoder/trule.cc | |
| parent | 5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (diff) | |
| parent | b66e838ed52decc0be1eb5817b2a77c3840db2c5 (diff) | |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder/trule.cc')
| -rw-r--r-- | decoder/trule.cc | 204 | 
1 files changed, 21 insertions, 183 deletions
| diff --git a/decoder/trule.cc b/decoder/trule.cc index c22baae3..bee211d5 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -17,73 +17,16 @@ bool TRule::IsGoal() const {    return GetLHS() == kGOAL;  } -static WordID ConvertTrgString(const string& w) { -  const unsigned len = w.size(); -  WordID id = 0; -  // [X,0] or [0] -  // for target rules, we ignore the category, just keep the index -  if (len > 2 && w[0]=='[' && w[len-1]==']' && w[len-2] > '0' && w[len-2] <= '9' && -      (len == 3 || (len > 4 && w[len-3] == ','))) { -    id = w[len-2] - '0'; -    id = 1 - id; -  } else { -    id = TD::Convert(w); -  } -  return id; -} - -static WordID ConvertSrcString(const string& w, bool mono = false) { -  const unsigned len = w.size(); -  // [X,0] -  // for source rules, we keep the category and ignore the index (source rules are -  // always numbered 1, 2, 3... -  if (mono) { -    if (len > 2 && w[0]=='[' && w[len-1]==']') { -      if (len > 4 && w[len-3] == ',') { -        cerr << "[ERROR] Monolingual rules mut not have non-terminal indices:\n  " -             << w << endl; -        exit(1); -      } -      // TODO check that source indices go 1,2,3,etc. -      return TD::Convert(w.substr(1, len-2)) * -1; -    } else { -      return TD::Convert(w); -    } -  } else { -    if (len > 4 && w[0]=='[' && w[len-1]==']' && w[len-3] == ',' && w[len-2] > '0' && w[len-2] <= '9') { -      return TD::Convert(w.substr(1, len-4)) * -1; -    } else { -      return TD::Convert(w); -    } -  } -} - -static WordID ConvertLHS(const string& w) { -  if (w[0] == '[') { -    const unsigned len = w.size(); -    if (len < 3) { cerr << "Format error: " << w << endl; exit(1); } -    return TD::Convert(w.substr(1, len-2)) * -1; -  } else { -    return TD::Convert(w) * -1; -  } -} -  TRule* TRule::CreateRuleSynchronous(const string& rule) {    TRule* res = new TRule; -  if (res->ReadFromString(rule, true, false)) return res; +  if (res->ReadFromString(rule)) return res;    cerr << "[ERROR] Failed to creating rule from: " << rule << endl;    delete res;    return NULL;  }  TRule* TRule::CreateRulePhrasetable(const string& rule) { -  // TODO make this faster -  // TODO add configuration for default NT type -  if (rule[0] == '[') { -    cerr << "Phrasetable rules shouldn't have a LHS / non-terminals:\n  " << rule << endl; -    return NULL; -  } -  TRule* res = new TRule("[X] ||| " + rule, true, false); +  TRule* res = new TRule("[X] ||| " + rule);    if (res->Arity() != 0) {      cerr << "Phrasetable rules should have arity 0:\n  " << rule << endl;      delete res; @@ -93,138 +36,33 @@ TRule* TRule::CreateRulePhrasetable(const string& rule) {  }  TRule* TRule::CreateRuleMonolingual(const string& rule) { -  return new TRule(rule, false, true); +  return new TRule(rule, true);  }  namespace { -// callback for lexer +// callback for single rule lexer  int n_assigned=0; -void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { -  (void) ctf_level; -  (void) coarse_rule; -  TRule *assignto=(TRule *)extra; -  *assignto=*new_rule; -  ++n_assigned; -} - -} - -bool TRule::ReadFromString(const string& line, bool strict, bool mono) { -  if (!is_single_line_stripped(line)) -    cerr<<"\nWARNING: building rule from multi-line string "<<line<<".\n"; -  // backed off of this: it's failing to parse TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); thinks [1] is the features! -  if (false && !(mono||strict)) { -    // use lexer -    istringstream il(line); -    n_assigned=0; -    RuleLexer::ReadRules(&il,assign_trule,"STRING",this); -    if (n_assigned>1) -      cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n"; -    return n_assigned; +  void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { +    (void) ctf_level; +    (void) coarse_rule; +    *static_cast<TRule*>(extra) = *new_rule; +    ++n_assigned;    } +} -  e_.clear(); -  f_.clear(); -  scores_.clear(); - -  string w; -  istringstream is(line); -  int format = CountSubstrings(line, "|||"); -  if (strict && format < 2) { -    cerr << "Bad rule format in strict mode:\n" << line << endl; -    return false; -  } -  if (format >= 2 || (mono && format == 1)) { -    while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); } -    while(is>>w && w!="|||") { f_.push_back(ConvertSrcString(w, mono)); } -    if (!mono) { -      while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } -    } -    int fv = 0; -    if (is) { -      string ss; -      getline(is, ss); -      //cerr << "L: " << ss << endl; -      unsigned start = 0; -      unsigned len = ss.size(); -      const size_t ppos = ss.find(" |||"); -      if (ppos != string::npos) { len = ppos; } -      while (start < len) { -        while(start < len && (ss[start] == ' ' || ss[start] == ';')) -          ++start; -        if (start == len) break; -        unsigned end = start + 1; -        while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';')) -          ++end; -        if (end == len || ss[end] == ' ' || ss[end] == ';') { -          //cerr << "PROC: '" << ss.substr(start, end - start) << "'\n"; -          // non-named features -          if (end != len) { ss[end] = 0; } -          string fname = "PhraseModel_X"; -          if (fv > 9) { cerr << "Too many phrasetable scores - used named format\n"; abort(); } -          fname[12]='0' + fv; -          ++fv; -          // if the feature set is frozen, this may return zero, indicating an -          // undefined feature -          const int fid = FD::Convert(fname); -          if (fid) -            scores_.set_value(fid, atof(&ss[start])); -          //cerr << "F: " << fname << " VAL=" << scores_.value(FD::Convert(fname)) << endl; -        } else { -          const int fid = FD::Convert(ss.substr(start, end - start)); -          start = end + 1; -          end = start + 1; -          while(end < len && (ss[end] != ' ' && ss[end] != ';')) -            ++end; -          if (end < len) { ss[end] = 0; } -	  assert(start < len); -          if (fid) -            scores_.set_value(fid, atof(&ss[start])); -          //cerr << "F: " << FD::Convert(fid) << " VAL=" << scores_.value(fid) << endl; -        } -        start = end + 1; -      } -    } -  } else if (format == 1) { -    while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); } -    while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } -    f_ = e_; -    int x = ConvertLHS("[X]"); -    for (unsigned i = 0; i < f_.size(); ++i) -      if (f_[i] <= 0) { f_[i] = x; } -  } else { -    cerr << "F: " << format << endl; -    cerr << "[ERROR] Don't know how to read:\n" << line << endl; -  } +bool TRule::ReadFromString(const string& line, bool mono) { +  n_assigned = 0; +  //cerr << "LINE: " << line << "  -- mono=" << mono << endl; +  RuleLexer::ReadRule(line + '\n', assign_trule, mono, this); +  if (n_assigned > 1) +    cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n";    if (mono) {      e_ = f_; -    int ci = 0; -    for (unsigned i = 0; i < e_.size(); ++i) -      if (e_[i] < 0) -        e_[i] = ci--; -  } -  ComputeArity(); -  return SanityCheck(); -} - -bool TRule::SanityCheck() const { -  vector<int> used(f_.size(), 0); -  int ac = 0; -  for (unsigned i = 0; i < e_.size(); ++i) { -    int ind = e_[i]; -    if (ind > 0) continue; -    ind = -ind; -    if ((++used[ind]) != 1) { -      cerr << "[ERROR] e-side variable index " << (ind+1) << " used more than once!\n"; -      return false; -    } -    ac++; -  } -  if (ac != Arity()) { -    cerr << "[ERROR] e-side arity mismatches f-side\n"; -    return false; +    int ntc = 0; +    for (auto& i : e_) +      if (i < 0) i = -ntc++;    } -  return true; +  return n_assigned;  }  void TRule::ComputeArity() { @@ -245,7 +83,7 @@ string TRule::AsString(bool verbose) const {      if (w < 0) {        int wi = w * -1;        ++idx; -      os << " [" << TD::Convert(wi) << ',' << idx << ']'; +      os << " [" << TD::Convert(wi) << ']';      } else {        os << ' ' << TD::Convert(w);      } | 
