From 2d3948b98bb9e8c7bad60f1acd99ff0b42b3ae30 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 10 Nov 2013 00:58:44 -0500 Subject: guard against direct includes of tr1 --- decoder/ff_wordset.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'decoder/ff_wordset.cc') diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc index 70cea7de..9be6f2e0 100644 --- a/decoder/ff_wordset.cc +++ b/decoder/ff_wordset.cc @@ -2,21 +2,67 @@ #include "hg.h" #include "fdict.h" +#include "filelib.h" +#include #include #include using namespace std; +void WordSet::parseArgs(const string& args, string* featName, string* vocabFile, bool* oovMode) { + vector toks(10); + boost::split(toks, args, boost::is_any_of(" ")); + + *oovMode = false; + + // skip initial feature name + for(vector::const_iterator it = toks.begin(); it != toks.end(); ++it) { + if(*it == "-v") { + *vocabFile = *++it; // copy + + } else if(*it == "-N") { + *featName = *++it; + } else if(*it == "--oov") { + *oovMode = true; + } else { + cerr << "Unrecognized argument: " << *it << endl; + exit(1); + } + } + + if(*featName == "") { + cerr << "featName (-N) not specified for WordSet" << endl; + exit(1); + } + if(*vocabFile == "") { + cerr << "vocabFile (-v) not specified for WordSet" << endl; + exit(1); + } +} + +void WordSet::loadVocab(const string& vocabFile, unordered_set* vocab) { + ReadFile rf(vocabFile); + if (!rf) { + cerr << "Unable to open file: " << vocabFile; + abort(); + } + string line; + while (getline(*rf.stream(), line)) { + boost::trim(line); + if(line.empty()) continue; + WordID vocabId = TD::Convert(line); + vocab->insert(vocabId); + } +} + void WordSet::TraversalFeaturesImpl(const SentenceMetadata& /*smeta*/ , const Hypergraph::Edge& edge, const vector& /* ant_contexts */, SparseVector* features, SparseVector* /* estimated_features */, void* /* context */) const { - double addScore = 0.0; - for(std::vector::const_iterator it = edge.rule_->e_.begin(); it != edge.rule_->e_.end(); ++it) { - + for(vector::const_iterator it = edge.rule_->e_.begin(); it != edge.rule_->e_.end(); ++it) { bool inVocab = (vocab_.find(*it) != vocab_.end()); if(oovMode_ && !inVocab) { addScore += 1.0; -- cgit v1.2.3