diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-23 17:33:47 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-23 17:33:47 +0000 |
commit | cc6313b23cac25eb05976b6cf64f96faf1ed4163 (patch) | |
tree | 3dc28060ad25b43773e875bea7388ab1cefcd927 /decoder/ff_wordset.h | |
parent | 7990c750829af93f0a1e0fc14534582f52ee9e8c (diff) | |
parent | f2fb69b10a897e8beb4e6e6d6cbb4327096235ef (diff) |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'decoder/ff_wordset.h')
-rw-r--r-- | decoder/ff_wordset.h | 73 |
1 files changed, 10 insertions, 63 deletions
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 639e1514..e78cd2fb 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -4,14 +4,18 @@ #include "ff.h" #include "tdict.h" -#include <tr1/unordered_set> -#include <boost/algorithm/string.hpp> - #include <vector> #include <string> #include <iostream> #include <fstream> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif + class WordSet : public FeatureFunction { public: // we depend on the order of the initializer list @@ -42,69 +46,12 @@ class WordSet : public FeatureFunction { void* context) const; private: - static void loadVocab(const std::string& vocabFile, std::tr1::unordered_set<WordID>* vocab) { - - std::ifstream file; - std::string line; - - file.open(vocabFile.c_str(), std::fstream::in); - if (file.is_open()) { - unsigned lineNum = 0; - while (!file.eof()) { - ++lineNum; - getline(file, line); - boost::trim(line); - if(line.empty()) { - continue; - } - - WordID vocabId = TD::Convert(line); - vocab->insert(vocabId); - } - file.close(); - } else { - std::cerr << "Unable to open file: " << vocabFile; - exit(1); - } - } - - static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode) { - - std::vector<std::string> toks(10); - boost::split(toks, args, boost::is_any_of(" ")); - - *oovMode = false; - - // skip initial feature name - for(std::vector<std::string>::const_iterator it = toks.begin(); it != toks.end(); ++it) { - if(*it == "-v") { - *vocabFile = *++it; // copy - - } else if(*it == "-N") { - *featName = *++it; - - } else if(*it == "--oov") { - *oovMode = true; - - } else { - std::cerr << "Unrecognized argument: " << *it << std::endl; - exit(1); - } - } - - if(*featName == "") { - std::cerr << "featName (-N) not specified for WordSet" << std::endl; - exit(1); - } - if(*vocabFile == "") { - std::cerr << "vocabFile (-v) not specified for WordSet" << std::endl; - exit(1); - } - } + static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode); + static void loadVocab(const std::string& vocabFile, std::unordered_set<WordID>* vocab); int fid_; bool oovMode_; - std::tr1::unordered_set<WordID> vocab_; + std::unordered_set<WordID> vocab_; }; #endif |