diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
commit | 5530575ae0ad939e17f08d6bd49978acea388ab7 (patch) | |
tree | 4620a276c1c827d824e285148f4f4a5bf781ebfe /extractor/linear_merger.cc | |
parent | ce6937f136a38af93d9a5cd9628acc712da95543 (diff) |
Initial working commit.
Diffstat (limited to 'extractor/linear_merger.cc')
-rw-r--r-- | extractor/linear_merger.cc | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/linear_merger.cc b/extractor/linear_merger.cc new file mode 100644 index 00000000..59e5f34c --- /dev/null +++ b/extractor/linear_merger.cc @@ -0,0 +1,63 @@ +#include "linear_merger.h" + +#include <cmath> + +#include "data_array.h" +#include "matching.h" +#include "matching_comparator.h" +#include "phrase.h" +#include "phrase_location.h" +#include "vocabulary.h" + +LinearMerger::LinearMerger(shared_ptr<Vocabulary> vocabulary, + shared_ptr<DataArray> data_array, + shared_ptr<MatchingComparator> comparator) : + vocabulary(vocabulary), data_array(data_array), comparator(comparator) {} + +LinearMerger::~LinearMerger() {} + +void LinearMerger::Merge( + vector<int>& locations, const Phrase& phrase, const Phrase& suffix, + vector<int>::iterator prefix_start, vector<int>::iterator prefix_end, + vector<int>::iterator suffix_start, vector<int>::iterator suffix_end, + int prefix_subpatterns, int suffix_subpatterns) const { + int last_chunk_len = suffix.GetChunkLen(suffix.Arity()); + bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0)); + + while (prefix_start != prefix_end) { + Matching left(prefix_start, prefix_subpatterns, + data_array->GetSentenceId(*prefix_start)); + + while (suffix_start != suffix_end) { + Matching right(suffix_start, suffix_subpatterns, + data_array->GetSentenceId(*suffix_start)); + if (comparator->Compare(left, right, last_chunk_len, offset) > 0) { + suffix_start += suffix_subpatterns; + } else { + break; + } + } + + int start_position = *prefix_start; + vector<int> :: iterator i = suffix_start; + while (prefix_start != prefix_end && *prefix_start == start_position) { + Matching left(prefix_start, prefix_subpatterns, + data_array->GetSentenceId(*prefix_start)); + + while (i != suffix_end) { + Matching right(i, suffix_subpatterns, data_array->GetSentenceId(*i)); + int comparison = comparator->Compare(left, right, last_chunk_len, + offset); + if (comparison == 0) { + vector<int> merged = left.Merge(right, phrase.Arity() + 1); + locations.insert(locations.end(), merged.begin(), merged.end()); + } else if (comparison < 0) { + break; + } + i += suffix_subpatterns; + } + + prefix_start += prefix_subpatterns; + } + } +} |