summaryrefslogtreecommitdiff
path: root/extractor/suffix_array.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
commit63b30ed9c8510da8c8e2f6a456576424fddacc0e (patch)
tree1b5278fb5a4480b7f7a965bb6de8f6f9e9c4d333 /extractor/suffix_array.cc
parent0a53f7eca74c165b5ce1c238f1999ddf1febea55 (diff)
Working version of the grammar extractor.
Diffstat (limited to 'extractor/suffix_array.cc')
-rw-r--r--extractor/suffix_array.cc9
1 files changed, 5 insertions, 4 deletions
diff --git a/extractor/suffix_array.cc b/extractor/suffix_array.cc
index d13eacd5..9815996f 100644
--- a/extractor/suffix_array.cc
+++ b/extractor/suffix_array.cc
@@ -22,9 +22,9 @@ SuffixArray::~SuffixArray() {}
void SuffixArray::BuildSuffixArray() {
vector<int> groups = data_array->GetData();
groups.reserve(groups.size() + 1);
- groups.push_back(data_array->GetVocabularySize());
+ groups.push_back(DataArray::NULL_WORD);
suffix_array.resize(groups.size());
- word_start.resize(data_array->GetVocabularySize() + 2);
+ word_start.resize(data_array->GetVocabularySize() + 1);
InitialBucketSort(groups);
@@ -112,6 +112,8 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step,
}
}
+ TernaryQuicksort(left, mid_left - 1, step, groups);
+
if (mid_left == mid_right) {
groups[suffix_array[mid_left]] = mid_left;
suffix_array[mid_left] = -1;
@@ -121,7 +123,6 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step,
}
}
- TernaryQuicksort(left, mid_left - 1, step, groups);
TernaryQuicksort(mid_right + 1, right, step, groups);
}
@@ -201,7 +202,7 @@ int SuffixArray::LookupRangeStart(int low, int high, int word_id,
int result = high;
while (low < high) {
int middle = low + (high - low) / 2;
- if (suffix_array[middle] + offset < data_array->GetSize() &&
+ if (suffix_array[middle] + offset >= data_array->GetSize() ||
data_array->AtIndex(suffix_array[middle] + offset) < word_id) {
low = middle + 1;
} else {