summaryrefslogtreecommitdiff
path: root/extractor/precomputation.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
commit63b30ed9c8510da8c8e2f6a456576424fddacc0e (patch)
tree1b5278fb5a4480b7f7a965bb6de8f6f9e9c4d333 /extractor/precomputation.cc
parent0a53f7eca74c165b5ce1c238f1999ddf1febea55 (diff)
Working version of the grammar extractor.
Diffstat (limited to 'extractor/precomputation.cc')
-rw-r--r--extractor/precomputation.cc8
1 files changed, 5 insertions, 3 deletions
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 9a167976..8a76beb1 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -7,7 +7,6 @@
#include "suffix_array.h"
using namespace std;
-using namespace tr1;
int Precomputation::NON_TERMINAL = -1;
@@ -79,13 +78,16 @@ vector<vector<int> > Precomputation::FindMostFrequentPatterns(
}
vector<vector<int> > frequent_patterns;
- for (size_t i = 0; i < num_frequent_patterns && !heap.empty(); ++i) {
+ while (frequent_patterns.size() < num_frequent_patterns && !heap.empty()) {
int start = heap.top().second.first;
int len = heap.top().second.second;
heap.pop();
vector<int> pattern(data.begin() + start, data.begin() + start + len);
- frequent_patterns.push_back(pattern);
+ if (find(pattern.begin(), pattern.end(), DataArray::END_OF_LINE) ==
+ pattern.end()) {
+ frequent_patterns.push_back(pattern);
+ }
}
return frequent_patterns;
}