From 4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Mon, 28 Jan 2013 11:56:31 +0000 Subject: Initial working commit. --- extractor/binary_search_merger.h | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 extractor/binary_search_merger.h (limited to 'extractor/binary_search_merger.h') diff --git a/extractor/binary_search_merger.h b/extractor/binary_search_merger.h new file mode 100644 index 00000000..0e229b3b --- /dev/null +++ b/extractor/binary_search_merger.h @@ -0,0 +1,68 @@ +#ifndef _BINARY_SEARCH_MERGER_H_ +#define _BINARY_SEARCH_MERGER_H_ + +#include +#include + +using namespace std; + +class DataArray; +class LinearMerger; +class MatchingComparator; +class Phrase; +class Vocabulary; + +class BinarySearchMerger { + public: + BinarySearchMerger(shared_ptr vocabulary, + shared_ptr linear_merger, + shared_ptr data_array, + shared_ptr comparator, + bool force_binary_search_merge = false); + + void Merge( + vector& locations, const Phrase& phrase, const Phrase& suffix, + vector::iterator prefix_start, vector::iterator prefix_end, + vector::iterator suffix_start, vector::iterator suffix_end, + int prefix_subpatterns, int suffix_subpatterns) const; + + static double BAEZA_YATES_FACTOR; + + private: + bool IsIntersectionVoid( + vector::iterator prefix_start, vector::iterator prefix_end, + vector::iterator suffix_start, vector::iterator suffix_end, + int prefix_subpatterns, int suffix_subpatterns, + const Phrase& suffix) const; + + bool ShouldUseLinearMerge(int prefix_set_size, int suffix_set_size) const; + + vector::iterator GetMiddle(vector::iterator low, + vector::iterator high, + int num_subpatterns) const; + + void GetComparableMatchings( + const vector::iterator& prefix_start, + const vector::iterator& prefix_end, + const vector::iterator& prefix_mid, + int num_subpatterns, + vector::iterator& prefix_low, + vector::iterator& prefix_high) const; + + int CompareMatchingsSet( + const vector::iterator& prefix_low, + const vector::iterator& prefix_high, + const vector::iterator& suffix_mid, + int prefix_subpatterns, + int suffix_subpatterns, + const Phrase& suffix) const; + + shared_ptr vocabulary; + shared_ptr linear_merger; + shared_ptr data_array; + shared_ptr comparator; + // Should be true only for testing. + bool force_binary_search_merge; +}; + +#endif -- cgit v1.2.3