diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
commit | 63b30ed9c8510da8c8e2f6a456576424fddacc0e (patch) | |
tree | 1b5278fb5a4480b7f7a965bb6de8f6f9e9c4d333 /extractor/suffix_array.cc | |
parent | 0a53f7eca74c165b5ce1c238f1999ddf1febea55 (diff) |
Working version of the grammar extractor.
Diffstat (limited to 'extractor/suffix_array.cc')
-rw-r--r-- | extractor/suffix_array.cc | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/extractor/suffix_array.cc b/extractor/suffix_array.cc index d13eacd5..9815996f 100644 --- a/extractor/suffix_array.cc +++ b/extractor/suffix_array.cc @@ -22,9 +22,9 @@ SuffixArray::~SuffixArray() {} void SuffixArray::BuildSuffixArray() { vector<int> groups = data_array->GetData(); groups.reserve(groups.size() + 1); - groups.push_back(data_array->GetVocabularySize()); + groups.push_back(DataArray::NULL_WORD); suffix_array.resize(groups.size()); - word_start.resize(data_array->GetVocabularySize() + 2); + word_start.resize(data_array->GetVocabularySize() + 1); InitialBucketSort(groups); @@ -112,6 +112,8 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step, } } + TernaryQuicksort(left, mid_left - 1, step, groups); + if (mid_left == mid_right) { groups[suffix_array[mid_left]] = mid_left; suffix_array[mid_left] = -1; @@ -121,7 +123,6 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step, } } - TernaryQuicksort(left, mid_left - 1, step, groups); TernaryQuicksort(mid_right + 1, right, step, groups); } @@ -201,7 +202,7 @@ int SuffixArray::LookupRangeStart(int low, int high, int word_id, int result = high; while (low < high) { int middle = low + (high - low) / 2; - if (suffix_array[middle] + offset < data_array->GetSize() && + if (suffix_array[middle] + offset >= data_array->GetSize() || data_array->AtIndex(suffix_array[middle] + offset) < word_id) { low = middle + 1; } else { |