From 63b30ed9c8510da8c8e2f6a456576424fddacc0e Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Thu, 14 Feb 2013 23:17:15 +0000 Subject: Working version of the grammar extractor. --- extractor/suffix_array.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'extractor/suffix_array.cc') diff --git a/extractor/suffix_array.cc b/extractor/suffix_array.cc index d13eacd5..9815996f 100644 --- a/extractor/suffix_array.cc +++ b/extractor/suffix_array.cc @@ -22,9 +22,9 @@ SuffixArray::~SuffixArray() {} void SuffixArray::BuildSuffixArray() { vector groups = data_array->GetData(); groups.reserve(groups.size() + 1); - groups.push_back(data_array->GetVocabularySize()); + groups.push_back(DataArray::NULL_WORD); suffix_array.resize(groups.size()); - word_start.resize(data_array->GetVocabularySize() + 2); + word_start.resize(data_array->GetVocabularySize() + 1); InitialBucketSort(groups); @@ -112,6 +112,8 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step, } } + TernaryQuicksort(left, mid_left - 1, step, groups); + if (mid_left == mid_right) { groups[suffix_array[mid_left]] = mid_left; suffix_array[mid_left] = -1; @@ -121,7 +123,6 @@ void SuffixArray::TernaryQuicksort(int left, int right, int step, } } - TernaryQuicksort(left, mid_left - 1, step, groups); TernaryQuicksort(mid_right + 1, right, step, groups); } @@ -201,7 +202,7 @@ int SuffixArray::LookupRangeStart(int low, int high, int word_id, int result = high; while (low < high) { int middle = low + (high - low) / 2; - if (suffix_array[middle] + offset < data_array->GetSize() && + if (suffix_array[middle] + offset >= data_array->GetSize() || data_array->AtIndex(suffix_array[middle] + offset) < word_id) { low = middle + 1; } else { -- cgit v1.2.3