summaryrefslogtreecommitdiff
path: root/decoder/ff_wordset.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-23 17:33:47 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-23 17:33:47 +0000
commitcc6313b23cac25eb05976b6cf64f96faf1ed4163 (patch)
tree3dc28060ad25b43773e875bea7388ab1cefcd927 /decoder/ff_wordset.h
parent7990c750829af93f0a1e0fc14534582f52ee9e8c (diff)
parentf2fb69b10a897e8beb4e6e6d6cbb4327096235ef (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'decoder/ff_wordset.h')
-rw-r--r--decoder/ff_wordset.h73
1 files changed, 10 insertions, 63 deletions
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 639e1514..e78cd2fb 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -4,14 +4,18 @@
#include "ff.h"
#include "tdict.h"
-#include <tr1/unordered_set>
-#include <boost/algorithm/string.hpp>
-
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
+#ifndef HAVE_OLD_CPP
+# include <unordered_set>
+#else
+# include <tr1/unordered_set>
+namespace std { using std::tr1::unordered_set; }
+#endif
+
class WordSet : public FeatureFunction {
public:
// we depend on the order of the initializer list
@@ -42,69 +46,12 @@ class WordSet : public FeatureFunction {
void* context) const;
private:
- static void loadVocab(const std::string& vocabFile, std::tr1::unordered_set<WordID>* vocab) {
-
- std::ifstream file;
- std::string line;
-
- file.open(vocabFile.c_str(), std::fstream::in);
- if (file.is_open()) {
- unsigned lineNum = 0;
- while (!file.eof()) {
- ++lineNum;
- getline(file, line);
- boost::trim(line);
- if(line.empty()) {
- continue;
- }
-
- WordID vocabId = TD::Convert(line);
- vocab->insert(vocabId);
- }
- file.close();
- } else {
- std::cerr << "Unable to open file: " << vocabFile;
- exit(1);
- }
- }
-
- static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode) {
-
- std::vector<std::string> toks(10);
- boost::split(toks, args, boost::is_any_of(" "));
-
- *oovMode = false;
-
- // skip initial feature name
- for(std::vector<std::string>::const_iterator it = toks.begin(); it != toks.end(); ++it) {
- if(*it == "-v") {
- *vocabFile = *++it; // copy
-
- } else if(*it == "-N") {
- *featName = *++it;
-
- } else if(*it == "--oov") {
- *oovMode = true;
-
- } else {
- std::cerr << "Unrecognized argument: " << *it << std::endl;
- exit(1);
- }
- }
-
- if(*featName == "") {
- std::cerr << "featName (-N) not specified for WordSet" << std::endl;
- exit(1);
- }
- if(*vocabFile == "") {
- std::cerr << "vocabFile (-v) not specified for WordSet" << std::endl;
- exit(1);
- }
- }
+ static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode);
+ static void loadVocab(const std::string& vocabFile, std::unordered_set<WordID>* vocab);
int fid_;
bool oovMode_;
- std::tr1::unordered_set<WordID> vocab_;
+ std::unordered_set<WordID> vocab_;
};
#endif