summaryrefslogtreecommitdiff
path: root/src/phrasetable_fst.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-14 20:35:11 -0500
committerChris Dyer <redpony@gmail.com>2009-12-14 20:35:11 -0500
commit851e389dffdd6996ea32d70defb8906de80b9edc (patch)
tree8c68ee77205badc056b8ab5b332e67e3e98017df /src/phrasetable_fst.cc
parentdc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff)
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'src/phrasetable_fst.cc')
-rw-r--r--src/phrasetable_fst.cc141
1 files changed, 0 insertions, 141 deletions
diff --git a/src/phrasetable_fst.cc b/src/phrasetable_fst.cc
deleted file mode 100644
index f421e941..00000000
--- a/src/phrasetable_fst.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-#include "phrasetable_fst.h"
-
-#include <cassert>
-#include <iostream>
-#include <map>
-
-#include <boost/shared_ptr.hpp>
-
-#include "filelib.h"
-#include "tdict.h"
-
-using boost::shared_ptr;
-using namespace std;
-
-TargetPhraseSet::~TargetPhraseSet() {}
-FSTNode::~FSTNode() {}
-
-class TextTargetPhraseSet : public TargetPhraseSet {
- public:
- void AddRule(TRulePtr rule) {
- rules_.push_back(rule);
- }
- const vector<TRulePtr>& GetRules() const {
- return rules_;
- }
-
- private:
- // all rules must have arity 0
- vector<TRulePtr> rules_;
-};
-
-class TextFSTNode : public FSTNode {
- public:
- const TargetPhraseSet* GetTranslations() const { return data.get(); }
- bool HasData() const { return (bool)data; }
- bool HasOutgoingNonEpsilonEdges() const { return !ptr.empty(); }
- const FSTNode* Extend(const WordID& t) const {
- map<WordID, TextFSTNode>::const_iterator it = ptr.find(t);
- if (it == ptr.end()) return NULL;
- return &it->second;
- }
-
- void AddPhrase(const string& phrase);
-
- void AddPassThroughTranslation(const WordID& w, const SparseVector<double>& feats);
- void ClearPassThroughTranslations();
- private:
- vector<WordID> passthroughs;
- shared_ptr<TargetPhraseSet> data;
- map<WordID, TextFSTNode> ptr;
-};
-
-#ifdef DEBUG_CHART_PARSER
-static string TrimRule(const string& r) {
- size_t start = r.find(" |||") + 5;
- size_t end = r.rfind(" |||");
- return r.substr(start, end - start);
-}
-#endif
-
-void TextFSTNode::AddPhrase(const string& phrase) {
- vector<WordID> words;
- TRulePtr rule(TRule::CreateRulePhrasetable(phrase));
- if (!rule) {
- static int err = 0;
- ++err;
- if (err > 2) { cerr << "TOO MANY PHRASETABLE ERRORS\n"; exit(1); }
- return;
- }
-
- TextFSTNode* fsa = this;
- for (int i = 0; i < rule->FLength(); ++i)
- fsa = &fsa->ptr[rule->f_[i]];
-
- if (!fsa->data)
- fsa->data.reset(new TextTargetPhraseSet);
- static_cast<TextTargetPhraseSet*>(fsa->data.get())->AddRule(rule);
-}
-
-void TextFSTNode::AddPassThroughTranslation(const WordID& w, const SparseVector<double>& feats) {
- TextFSTNode* next = &ptr[w];
- // current, rules are only added if the symbol is completely missing as a
- // word starting the phrase. As a result, it is possible that some sentences
- // won't parse. If this becomes a problem, fix it here.
- if (!next->data) {
- TextTargetPhraseSet* tps = new TextTargetPhraseSet;
- next->data.reset(tps);
- TRule* rule = new TRule;
- rule->e_.resize(1, w);
- rule->f_.resize(1, w);
- rule->lhs_ = TD::Convert("___PHRASE") * -1;
- rule->scores_ = feats;
- rule->arity_ = 0;
- tps->AddRule(TRulePtr(rule));
- passthroughs.push_back(w);
- }
-}
-
-void TextFSTNode::ClearPassThroughTranslations() {
- for (int i = 0; i < passthroughs.size(); ++i)
- ptr.erase(passthroughs[i]);
- passthroughs.clear();
-}
-
-static void AddPhrasetableToFST(istream* in, TextFSTNode* fst) {
- int lc = 0;
- bool flag = false;
- while(*in) {
- string line;
- getline(*in, line);
- if (line.empty()) continue;
- ++lc;
- fst->AddPhrase(line);
- if (lc % 10000 == 0) { flag = true; cerr << '.' << flush; }
- if (lc % 500000 == 0) { flag = false; cerr << " [" << lc << ']' << endl << flush; }
- }
- if (flag) cerr << endl;
- cerr << "Loaded " << lc << " source phrases\n";
-}
-
-FSTNode* LoadTextPhrasetable(istream* in) {
- TextFSTNode *fst = new TextFSTNode;
- AddPhrasetableToFST(in, fst);
- return fst;
-}
-
-FSTNode* LoadTextPhrasetable(const vector<string>& filenames) {
- TextFSTNode* fst = new TextFSTNode;
- for (int i = 0; i < filenames.size(); ++i) {
- ReadFile rf(filenames[i]);
- cerr << "Reading phrase from " << filenames[i] << endl;
- AddPhrasetableToFST(rf.stream(), fst);
- }
- return fst;
-}
-
-FSTNode* LoadBinaryPhrasetable(const string& fname_prefix) {
- (void) fname_prefix;
- assert(!"not implemented yet");
-}
-