diff options
Diffstat (limited to 'decoder')
| -rw-r--r-- | decoder/ff_spans.cc | 39 | 
1 files changed, 25 insertions, 14 deletions
diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index e1da088d..bc23974d 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -13,6 +13,17 @@  using namespace std; +namespace { +  string Escape(const string& x) { +    string y = x; +    for (int i = 0; i < y.size(); ++i) { +      if (y[i] == '=') y[i]='_'; +      if (y[i] == ';') y[i]='_'; +    } +    return y; +  } +} +  // log transform to make long spans cluster together  // but preserve differences  int SpanSizeTransform(unsigned span_size) { @@ -140,19 +151,19 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) {      word = MapIfNecessary(word);      ostringstream sfid;      sfid << "ES:" << TD::Convert(word); -    end_span_ids_[i] = FD::Convert(sfid.str()); +    end_span_ids_[i] = FD::Convert(Escape(sfid.str()));      ostringstream esbiid;      esbiid << "EBI:" << TD::Convert(bword) << "_" << TD::Convert(word); -    end_bigram_ids_[i] = FD::Convert(esbiid.str()); +    end_bigram_ids_[i] = FD::Convert(Escape(esbiid.str()));      ostringstream bsbiid;      bsbiid << "BBI:" << TD::Convert(bword) << "_" << TD::Convert(word); -    beg_bigram_ids_[i] = FD::Convert(bsbiid.str()); +    beg_bigram_ids_[i] = FD::Convert(Escape(bsbiid.str()));      ostringstream bfid;      bfid << "BS:" << TD::Convert(bword); -    beg_span_ids_[i] = FD::Convert(bfid.str()); +    beg_span_ids_[i] = FD::Convert(Escape(bfid.str()));      if (use_collapsed_features_) { -      end_span_vals_[i] = feat2val_[sfid.str()] + feat2val_[esbiid.str()]; -      beg_span_vals_[i] = feat2val_[bfid.str()] + feat2val_[bsbiid.str()]; +      end_span_vals_[i] = feat2val_[Escape(sfid.str())] + feat2val_[Escape(esbiid.str())]; +      beg_span_vals_[i] = feat2val_[Escape(bfid.str())] + feat2val_[Escape(bsbiid.str())];      }    }    for (int i = 0; i <= lattice.size(); ++i) { @@ -167,16 +178,16 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) {        word = MapIfNecessary(word);        ostringstream pf;        pf << "S:" << TD::Convert(bword) << "_" << TD::Convert(word); -      span_feats_(i,j).first = FD::Convert(pf.str()); -      span_feats_(i,j).second = FD::Convert("S_" + pf.str()); +      span_feats_(i,j).first = FD::Convert(Escape(pf.str())); +      span_feats_(i,j).second = FD::Convert(Escape("S_" + pf.str()));        ostringstream lf;        const unsigned span_size = (i < j ? j - i : i - j);        lf << "LS:" << SpanSizeTransform(span_size) << "_" << TD::Convert(bword) << "_" << TD::Convert(word); -      len_span_feats_(i,j).first = FD::Convert(lf.str()); -      len_span_feats_(i,j).second = FD::Convert("S_" + lf.str()); +      len_span_feats_(i,j).first = FD::Convert(Escape(lf.str())); +      len_span_feats_(i,j).second = FD::Convert(Escape("S_" + lf.str()));        if (use_collapsed_features_) { -        span_vals_(i,j).first = feat2val_[pf.str()] + feat2val_[lf.str()]; -        span_vals_(i,j).second = feat2val_["S_" + pf.str()] + feat2val_["S_" + lf.str()]; +        span_vals_(i,j).first = feat2val_[Escape(pf.str())] + feat2val_[Escape(lf.str())]; +        span_vals_(i,j).second = feat2val_[Escape("S_" + pf.str())] + feat2val_[Escape("S_" + lf.str())];        }      }    }  @@ -209,14 +220,14 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,        const string& cur = TD::Convert(w);        ostringstream os;        os << "RB:" << prev << '_' << cur; -      const int fid = FD::Convert(os.str()); +      const int fid = FD::Convert(Escape(os.str()));        if (fid <= 0) return;        f.add_value(fid, 1.0);        prev = cur;      }      ostringstream os;      os << "RB:" << prev << '_' << "</r>"; -    f.set_value(FD::Convert(os.str()), 1.0); +    f.set_value(FD::Convert(Escape(os.str())), 1.0);    }    (*features) += it->second;  }  | 
