diff options
author | Patrick Simianer <p@simianer.de> | 2013-11-13 18:12:10 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-11-13 18:12:10 +0100 |
commit | d6e6babf2cfe49fed040b651624b7e34d1a9b507 (patch) | |
tree | 2a00ab18f10a7f93e7e172551c01b48cc9f20b8c /decoder/ff_wordset.h | |
parent | 2d2d5eced93d58bc77894d8c328195cd9950b96d (diff) | |
parent | 8a24bb77bc2e9fd17a6f6529a2942cde96a6af49 (diff) |
merge w/ upstream
Diffstat (limited to 'decoder/ff_wordset.h')
-rw-r--r-- | decoder/ff_wordset.h | 73 |
1 files changed, 10 insertions, 63 deletions
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 639e1514..e78cd2fb 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -4,14 +4,18 @@ #include "ff.h" #include "tdict.h" -#include <tr1/unordered_set> -#include <boost/algorithm/string.hpp> - #include <vector> #include <string> #include <iostream> #include <fstream> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif + class WordSet : public FeatureFunction { public: // we depend on the order of the initializer list @@ -42,69 +46,12 @@ class WordSet : public FeatureFunction { void* context) const; private: - static void loadVocab(const std::string& vocabFile, std::tr1::unordered_set<WordID>* vocab) { - - std::ifstream file; - std::string line; - - file.open(vocabFile.c_str(), std::fstream::in); - if (file.is_open()) { - unsigned lineNum = 0; - while (!file.eof()) { - ++lineNum; - getline(file, line); - boost::trim(line); - if(line.empty()) { - continue; - } - - WordID vocabId = TD::Convert(line); - vocab->insert(vocabId); - } - file.close(); - } else { - std::cerr << "Unable to open file: " << vocabFile; - exit(1); - } - } - - static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode) { - - std::vector<std::string> toks(10); - boost::split(toks, args, boost::is_any_of(" ")); - - *oovMode = false; - - // skip initial feature name - for(std::vector<std::string>::const_iterator it = toks.begin(); it != toks.end(); ++it) { - if(*it == "-v") { - *vocabFile = *++it; // copy - - } else if(*it == "-N") { - *featName = *++it; - - } else if(*it == "--oov") { - *oovMode = true; - - } else { - std::cerr << "Unrecognized argument: " << *it << std::endl; - exit(1); - } - } - - if(*featName == "") { - std::cerr << "featName (-N) not specified for WordSet" << std::endl; - exit(1); - } - if(*vocabFile == "") { - std::cerr << "vocabFile (-v) not specified for WordSet" << std::endl; - exit(1); - } - } + static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode); + static void loadVocab(const std::string& vocabFile, std::unordered_set<WordID>* vocab); int fid_; bool oovMode_; - std::tr1::unordered_set<WordID> vocab_; + std::unordered_set<WordID> vocab_; }; #endif |