summaryrefslogtreecommitdiff
path: root/decoder/ff_tagger.cc
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-27 13:55:23 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-27 13:55:23 +0000
commitecde8cb600b24c31b062f8f53d57641e3fa23379 (patch)
tree71dbc397a8a6aac6209bff294623b3230223fcd2 /decoder/ff_tagger.cc
parent4e66b377ebb4b73d470c0efc573f5bda773b2972 (diff)
factored lexicon
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@692 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_tagger.cc')
-rw-r--r--decoder/ff_tagger.cc60
1 files changed, 36 insertions, 24 deletions
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc
index 05de8ba3..21d0f812 100644
--- a/decoder/ff_tagger.cc
+++ b/decoder/ff_tagger.cc
@@ -1,9 +1,10 @@
#include "ff_tagger.h"
+#include <sstream>
+
#include "tdict.h"
#include "sentence_metadata.h"
-
-#include <sstream>
+#include "stringlib.h"
using namespace std;
@@ -52,23 +53,36 @@ void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
}
-LexicalPairIdentity::LexicalPairIdentity(const std::string& param) {}
+void LexicalPairIdentity::PrepareForInput(const SentenceMetadata& smeta) {
+ lexmap_->PrepareForInput(smeta);
+}
+
+LexicalPairIdentity::LexicalPairIdentity(const std::string& param) {
+ name_ = "Id";
+ if (param.size()) {
+ // name corpus.f emap.txt
+ vector<string> params;
+ SplitOnWhitespace(param, &params);
+ if (params.size() != 3) {
+ cerr << "LexicalPairIdentity takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n";
+ cerr << " * may be used for corpus.src.txt or trgmap.txt to use surface forms\n";
+ cerr << " Received: " << param << endl;
+ abort();
+ }
+ name_ = params[0];
+ lexmap_.reset(new FactoredLexiconHelper(params[1], params[2]));
+ } else {
+ lexmap_.reset(new FactoredLexiconHelper);
+ }
+}
void LexicalPairIdentity::FireFeature(WordID src,
- WordID trg,
- SparseVector<double>* features) const {
+ WordID trg,
+ SparseVector<double>* features) const {
int& fid = fmap_[src][trg];
if (!fid) {
- static map<WordID, WordID> escape;
- if (escape.empty()) {
- escape[TD::Convert("=")] = TD::Convert("__EQ");
- escape[TD::Convert(";")] = TD::Convert("__SC");
- escape[TD::Convert(",")] = TD::Convert("__CO");
- }
- if (escape.count(src)) src = escape[src];
- if (escape.count(trg)) trg = escape[trg];
ostringstream os;
- os << "Id:" << TD::Convert(src) << ':' << TD::Convert(trg);
+ os << name_ << ':' << TD::Convert(src) << ':' << TD::Convert(trg);
fid = FD::Convert(os.str());
}
features->set_value(fid, 1.0);
@@ -80,16 +94,14 @@ void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* context) const {
- const vector<WordID>& ew = edge.rule_->e_;
- const vector<WordID>& fw = edge.rule_->f_;
- for (int i = 0; i < ew.size(); ++i) {
- const WordID& e = ew[i];
- if (e <= 0) continue;
- for (int j = 0; j < fw.size(); ++j) {
- const WordID& f = fw[j];
- if (f <= 0) continue;
- FireFeature(f, e, features);
- }
+ // inline WordID SourceWordAtPosition(const int i);
+ // inline WordID CoarsenedTargetWordForTarget(const WordID surface_target);
+ if (edge.Arity() == 0) {
+ const WordID src = lexmap_->SourceWordAtPosition(edge.i_);
+ const vector<WordID>& ew = edge.rule_->e_;
+ assert(ew.size() == 1);
+ const WordID trg = lexmap_->CoarsenedTargetWordForTarget(ew[0]);
+ FireFeature(src, trg, features);
}
}