diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-24 13:19:28 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-24 16:26:49 +0000 |
commit | f528ac27dab11770f01595b043675dba2947a263 (patch) | |
tree | 0f340a74879772fc8a5d8c4484df27457f25f11e /extractor/data_array.cc | |
parent | 79206291f78fba893fda6a61ff0ae9264d00bb82 (diff) |
Reduce memory overhead for constructing the intersector.
Diffstat (limited to 'extractor/data_array.cc')
-rw-r--r-- | extractor/data_array.cc | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/extractor/data_array.cc b/extractor/data_array.cc index 82efcd51..dacc4283 100644 --- a/extractor/data_array.cc +++ b/extractor/data_array.cc @@ -78,7 +78,7 @@ void DataArray::CreateDataArray(const vector<string>& lines) { DataArray::~DataArray() {} -const vector<int>& DataArray::GetData() const { +vector<int> DataArray::GetData() const { return data; } @@ -90,6 +90,18 @@ string DataArray::GetWordAtIndex(int index) const { return id2word[data[index]]; } +vector<int> DataArray::GetWordIds(int index, int size) const { + return vector<int>(data.begin() + index, data.begin() + index + size); +} + +vector<string> DataArray::GetWords(int start_index, int size) const { + vector<string> words; + for (int word_id: GetWordIds(start_index, size)) { + words.push_back(id2word[word_id]); + } + return words; +} + int DataArray::GetSize() const { return data.size(); } |