summaryrefslogtreecommitdiff
path: root/extractor/precomputation.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
commit9a026ba2db8fa7723374109e6a4a8dcaff8733cd (patch)
tree34a60703a53ada76e7213da5940e86d6f476f1e4 /extractor/precomputation.cc
parent252fb164c208ec8f3005f8a652eb3b48c0644e3d (diff)
Working version of the grammar extractor.
Diffstat (limited to 'extractor/precomputation.cc')
-rw-r--r--extractor/precomputation.cc8
1 files changed, 5 insertions, 3 deletions
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 9a167976..8a76beb1 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -7,7 +7,6 @@
#include "suffix_array.h"
using namespace std;
-using namespace tr1;
int Precomputation::NON_TERMINAL = -1;
@@ -79,13 +78,16 @@ vector<vector<int> > Precomputation::FindMostFrequentPatterns(
}
vector<vector<int> > frequent_patterns;
- for (size_t i = 0; i < num_frequent_patterns && !heap.empty(); ++i) {
+ while (frequent_patterns.size() < num_frequent_patterns && !heap.empty()) {
int start = heap.top().second.first;
int len = heap.top().second.second;
heap.pop();
vector<int> pattern(data.begin() + start, data.begin() + start + len);
- frequent_patterns.push_back(pattern);
+ if (find(pattern.begin(), pattern.end(), DataArray::END_OF_LINE) ==
+ pattern.end()) {
+ frequent_patterns.push_back(pattern);
+ }
}
return frequent_patterns;
}