summaryrefslogtreecommitdiff
path: root/extractor/binary_search_merger.h
blob: 0e229b3b06691ba36d394f7d538db74bb0d6a1a3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#ifndef _BINARY_SEARCH_MERGER_H_
#define _BINARY_SEARCH_MERGER_H_

#include <memory>
#include <vector>

using namespace std;

class DataArray;
class LinearMerger;
class MatchingComparator;
class Phrase;
class Vocabulary;

class BinarySearchMerger {
 public:
  BinarySearchMerger(shared_ptr<Vocabulary> vocabulary,
                     shared_ptr<LinearMerger> linear_merger,
                     shared_ptr<DataArray> data_array,
                     shared_ptr<MatchingComparator> comparator,
                     bool force_binary_search_merge = false);

  void Merge(
      vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
      vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
      vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
      int prefix_subpatterns, int suffix_subpatterns) const;

  static double BAEZA_YATES_FACTOR;

 private:
  bool IsIntersectionVoid(
      vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
      vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
      int prefix_subpatterns, int suffix_subpatterns,
      const Phrase& suffix) const;

  bool ShouldUseLinearMerge(int prefix_set_size, int suffix_set_size) const;

  vector<int>::iterator GetMiddle(vector<int>::iterator low,
                                  vector<int>::iterator high,
                                  int num_subpatterns) const;

  void GetComparableMatchings(
      const vector<int>::iterator& prefix_start,
      const vector<int>::iterator& prefix_end,
      const vector<int>::iterator& prefix_mid,
      int num_subpatterns,
      vector<int>::iterator& prefix_low,
      vector<int>::iterator& prefix_high) const;

  int CompareMatchingsSet(
      const vector<int>::iterator& prefix_low,
      const vector<int>::iterator& prefix_high,
      const vector<int>::iterator& suffix_mid,
      int prefix_subpatterns,
      int suffix_subpatterns,
      const Phrase& suffix) const;

  shared_ptr<Vocabulary> vocabulary;
  shared_ptr<LinearMerger> linear_merger;
  shared_ptr<DataArray> data_array;
  shared_ptr<MatchingComparator> comparator;
  // Should be true only for testing.
  bool force_binary_search_merge;
};

#endif