summaryrefslogtreecommitdiff
path: root/extractor/data_array.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/data_array.cc')
-rw-r--r--extractor/data_array.cc21
1 files changed, 13 insertions, 8 deletions
diff --git a/extractor/data_array.cc b/extractor/data_array.cc
index 2e4bdafb..9612aa8a 100644
--- a/extractor/data_array.cc
+++ b/extractor/data_array.cc
@@ -5,9 +5,6 @@
#include <sstream>
#include <string>
-#include <boost/filesystem.hpp>
-
-namespace fs = boost::filesystem;
using namespace std;
namespace extractor {
@@ -81,7 +78,7 @@ void DataArray::CreateDataArray(const vector<string>& lines) {
DataArray::~DataArray() {}
-const vector<int>& DataArray::GetData() const {
+vector<int> DataArray::GetData() const {
return data;
}
@@ -93,6 +90,18 @@ string DataArray::GetWordAtIndex(int index) const {
return id2word[data[index]];
}
+vector<int> DataArray::GetWordIds(int index, int size) const {
+ return vector<int>(data.begin() + index, data.begin() + index + size);
+}
+
+vector<string> DataArray::GetWords(int start_index, int size) const {
+ vector<string> words;
+ for (int word_id: GetWordIds(start_index, size)) {
+ words.push_back(id2word[word_id]);
+ }
+ return words;
+}
+
int DataArray::GetSize() const {
return data.size();
}
@@ -118,10 +127,6 @@ int DataArray::GetSentenceId(int position) const {
return sentence_id[position];
}
-bool DataArray::HasWord(const string& word) const {
- return word2id.count(word);
-}
-
int DataArray::GetWordId(const string& word) const {
auto result = word2id.find(word);
return result == word2id.end() ? -1 : result->second;