summaryrefslogtreecommitdiff
path: root/extractor/linear_merger.cc
blob: e7a32788dc024c9c7c7da93742035fa5cd2bcdbf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "linear_merger.h"

#include <cmath>

#include "data_array.h"
#include "matching.h"
#include "matching_comparator.h"
#include "phrase.h"
#include "phrase_location.h"
#include "vocabulary.h"

LinearMerger::LinearMerger(shared_ptr<Vocabulary> vocabulary,
                           shared_ptr<DataArray> data_array,
                           shared_ptr<MatchingComparator> comparator) :
    vocabulary(vocabulary), data_array(data_array), comparator(comparator) {}

LinearMerger::LinearMerger() {}

LinearMerger::~LinearMerger() {}

void LinearMerger::Merge(
    vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
    vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
    vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
    int prefix_subpatterns, int suffix_subpatterns) {
  int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
  bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));

  while (prefix_start != prefix_end) {
    Matching left(prefix_start, prefix_subpatterns,
        data_array->GetSentenceId(*prefix_start));

    while (suffix_start != suffix_end) {
      Matching right(suffix_start, suffix_subpatterns,
          data_array->GetSentenceId(*suffix_start));
      if (comparator->Compare(left, right, last_chunk_len, offset) > 0) {
        suffix_start += suffix_subpatterns;
      } else {
        break;
      }
    }

    int start_position = *prefix_start;
    vector<int> :: iterator i = suffix_start;
    while (prefix_start != prefix_end && *prefix_start == start_position) {
      Matching left(prefix_start, prefix_subpatterns,
          data_array->GetSentenceId(*prefix_start));

      while (i != suffix_end) {
        Matching right(i, suffix_subpatterns, data_array->GetSentenceId(*i));
        int comparison = comparator->Compare(left, right, last_chunk_len,
                                             offset);
        if (comparison == 0) {
          vector<int> merged = left.Merge(right, phrase.Arity() + 1);
          locations.insert(locations.end(), merged.begin(), merged.end());
        } else if (comparison < 0) {
          break;
        }
        i += suffix_subpatterns;
      }

      prefix_start += prefix_subpatterns;
    }
  }
}