summaryrefslogtreecommitdiff
path: root/extractor/binary_search_merger.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
commit4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 (patch)
tree61a9790298659944650e16121c28dc04397b07ba /extractor/binary_search_merger.h
parentae1bd3257aafba586f874c55e7e51e8776879434 (diff)
Initial working commit.
Diffstat (limited to 'extractor/binary_search_merger.h')
-rw-r--r--extractor/binary_search_merger.h68
1 files changed, 68 insertions, 0 deletions
diff --git a/extractor/binary_search_merger.h b/extractor/binary_search_merger.h
new file mode 100644
index 00000000..0e229b3b
--- /dev/null
+++ b/extractor/binary_search_merger.h
@@ -0,0 +1,68 @@
+#ifndef _BINARY_SEARCH_MERGER_H_
+#define _BINARY_SEARCH_MERGER_H_
+
+#include <memory>
+#include <vector>
+
+using namespace std;
+
+class DataArray;
+class LinearMerger;
+class MatchingComparator;
+class Phrase;
+class Vocabulary;
+
+class BinarySearchMerger {
+ public:
+ BinarySearchMerger(shared_ptr<Vocabulary> vocabulary,
+ shared_ptr<LinearMerger> linear_merger,
+ shared_ptr<DataArray> data_array,
+ shared_ptr<MatchingComparator> comparator,
+ bool force_binary_search_merge = false);
+
+ void Merge(
+ vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
+ vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
+ vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
+ int prefix_subpatterns, int suffix_subpatterns) const;
+
+ static double BAEZA_YATES_FACTOR;
+
+ private:
+ bool IsIntersectionVoid(
+ vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
+ vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
+ int prefix_subpatterns, int suffix_subpatterns,
+ const Phrase& suffix) const;
+
+ bool ShouldUseLinearMerge(int prefix_set_size, int suffix_set_size) const;
+
+ vector<int>::iterator GetMiddle(vector<int>::iterator low,
+ vector<int>::iterator high,
+ int num_subpatterns) const;
+
+ void GetComparableMatchings(
+ const vector<int>::iterator& prefix_start,
+ const vector<int>::iterator& prefix_end,
+ const vector<int>::iterator& prefix_mid,
+ int num_subpatterns,
+ vector<int>::iterator& prefix_low,
+ vector<int>::iterator& prefix_high) const;
+
+ int CompareMatchingsSet(
+ const vector<int>::iterator& prefix_low,
+ const vector<int>::iterator& prefix_high,
+ const vector<int>::iterator& suffix_mid,
+ int prefix_subpatterns,
+ int suffix_subpatterns,
+ const Phrase& suffix) const;
+
+ shared_ptr<Vocabulary> vocabulary;
+ shared_ptr<LinearMerger> linear_merger;
+ shared_ptr<DataArray> data_array;
+ shared_ptr<MatchingComparator> comparator;
+ // Should be true only for testing.
+ bool force_binary_search_merge;
+};
+
+#endif