diff options
| author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-09-29 20:45:48 +0000 | 
|---|---|---|
| committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-09-29 20:45:48 +0000 | 
| commit | f412aaab3d10fb82b20a2190f2cb1424959c599a (patch) | |
| tree | 1942e2a05777694cc81724f3206c8972813b4224 /decoder/ff_wordalign.cc | |
| parent | 7f56dd65ee706683444b012d0afcfff3e376bfff (diff) | |
another feature, another POS
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@664 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_wordalign.cc')
| -rw-r--r-- | decoder/ff_wordalign.cc | 66 | 
1 files changed, 66 insertions, 0 deletions
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index a1968159..da86b714 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -266,6 +266,72 @@ void MarkovJump::TraversalFeaturesImpl(const SentenceMetadata& smeta,    }  } +// state: src word used, number of trg words generated +SourceBigram::SourceBigram(const std::string& param) : +    FeatureFunction(sizeof(WordID) + sizeof(int)) { +} + +void SourceBigram::FinalTraversalFeatures(const void* context, +                                      SparseVector<double>* features) const { +  WordID left = *static_cast<const WordID*>(context); +  int left_wc = *(static_cast<const int*>(context) + 1); +  if (left_wc == 1) +    FireFeature(-1, left, features); +  FireFeature(left, -1, features); +} + +void SourceBigram::FireFeature(WordID left, +                   WordID right, +                   SparseVector<double>* features) const { +  int& fid = fmap_[left][right]; +  // TODO important important !!! escape strings !!! +  if (!fid) { +    ostringstream os; +    os << "SB:"; +    if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); } +    os << '_'; +    if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); } +    fid = FD::Convert(os.str()); +    if (fid == 0) fid = -1; +  } +  if (fid > 0) features->set_value(fid, 1.0); +  int& ufid = ufmap_[left]; +  if (!ufid) { +    ostringstream os; +    os << "SU:"; +    if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); } +    ufid = FD::Convert(os.str()); +    if (ufid == 0) fid = -1; +  } +  if (ufid > 0) features->set_value(ufid, 1.0); +} + +void SourceBigram::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const Hypergraph::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                            SparseVector<double>* /* estimated_features */, +                                     void* context) const { +  WordID& out_context = *static_cast<WordID*>(context); +  int& out_word_count = *(static_cast<int*>(context) + 1); +  const int arity = edge.Arity(); +  if (arity == 0) { +    out_context = edge.rule_->f()[0]; +    out_word_count = edge.rule_->EWords(); +    assert(out_word_count == 1); // this is only defined for lex translation! +    // revisit this if you want to translate into null words +  } else if (arity == 2) { +    WordID left = *static_cast<const WordID*>(ant_contexts[0]); +    WordID right = *static_cast<const WordID*>(ant_contexts[1]); +    int left_wc = *(static_cast<const int*>(ant_contexts[0]) + 1); +    int right_wc = *(static_cast<const int*>(ant_contexts[0]) + 1); +    if (left_wc == 1 && right_wc == 1) +      FireFeature(-1, left, features); +    FireFeature(left, right, features); +    out_word_count = left_wc + right_wc; +    out_context = right; +  } +}  // state: POS of src word used, number of trg words generated  SourcePOSBigram::SourcePOSBigram(const std::string& param) :      FeatureFunction(sizeof(WordID) + sizeof(int)) {  | 
