summaryrefslogtreecommitdiff
path: root/extractor/data_array.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-24 13:19:28 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-24 16:26:49 +0000
commitf528ac27dab11770f01595b043675dba2947a263 (patch)
tree0f340a74879772fc8a5d8c4484df27457f25f11e /extractor/data_array.cc
parent79206291f78fba893fda6a61ff0ae9264d00bb82 (diff)
Reduce memory overhead for constructing the intersector.
Diffstat (limited to 'extractor/data_array.cc')
-rw-r--r--extractor/data_array.cc14
1 files changed, 13 insertions, 1 deletions
diff --git a/extractor/data_array.cc b/extractor/data_array.cc
index 82efcd51..dacc4283 100644
--- a/extractor/data_array.cc
+++ b/extractor/data_array.cc
@@ -78,7 +78,7 @@ void DataArray::CreateDataArray(const vector<string>& lines) {
DataArray::~DataArray() {}
-const vector<int>& DataArray::GetData() const {
+vector<int> DataArray::GetData() const {
return data;
}
@@ -90,6 +90,18 @@ string DataArray::GetWordAtIndex(int index) const {
return id2word[data[index]];
}
+vector<int> DataArray::GetWordIds(int index, int size) const {
+ return vector<int>(data.begin() + index, data.begin() + index + size);
+}
+
+vector<string> DataArray::GetWords(int start_index, int size) const {
+ vector<string> words;
+ for (int word_id: GetWordIds(start_index, size)) {
+ words.push_back(id2word[word_id]);
+ }
+ return words;
+}
+
int DataArray::GetSize() const {
return data.size();
}