diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
commit | 4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 (patch) | |
tree | 61a9790298659944650e16121c28dc04397b07ba /extractor/linear_merger.cc | |
parent | ae1bd3257aafba586f874c55e7e51e8776879434 (diff) |
Initial working commit.
Diffstat (limited to 'extractor/linear_merger.cc')
-rw-r--r-- | extractor/linear_merger.cc | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/linear_merger.cc b/extractor/linear_merger.cc new file mode 100644 index 00000000..59e5f34c --- /dev/null +++ b/extractor/linear_merger.cc @@ -0,0 +1,63 @@ +#include "linear_merger.h" + +#include <cmath> + +#include "data_array.h" +#include "matching.h" +#include "matching_comparator.h" +#include "phrase.h" +#include "phrase_location.h" +#include "vocabulary.h" + +LinearMerger::LinearMerger(shared_ptr<Vocabulary> vocabulary, + shared_ptr<DataArray> data_array, + shared_ptr<MatchingComparator> comparator) : + vocabulary(vocabulary), data_array(data_array), comparator(comparator) {} + +LinearMerger::~LinearMerger() {} + +void LinearMerger::Merge( + vector<int>& locations, const Phrase& phrase, const Phrase& suffix, + vector<int>::iterator prefix_start, vector<int>::iterator prefix_end, + vector<int>::iterator suffix_start, vector<int>::iterator suffix_end, + int prefix_subpatterns, int suffix_subpatterns) const { + int last_chunk_len = suffix.GetChunkLen(suffix.Arity()); + bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0)); + + while (prefix_start != prefix_end) { + Matching left(prefix_start, prefix_subpatterns, + data_array->GetSentenceId(*prefix_start)); + + while (suffix_start != suffix_end) { + Matching right(suffix_start, suffix_subpatterns, + data_array->GetSentenceId(*suffix_start)); + if (comparator->Compare(left, right, last_chunk_len, offset) > 0) { + suffix_start += suffix_subpatterns; + } else { + break; + } + } + + int start_position = *prefix_start; + vector<int> :: iterator i = suffix_start; + while (prefix_start != prefix_end && *prefix_start == start_position) { + Matching left(prefix_start, prefix_subpatterns, + data_array->GetSentenceId(*prefix_start)); + + while (i != suffix_end) { + Matching right(i, suffix_subpatterns, data_array->GetSentenceId(*i)); + int comparison = comparator->Compare(left, right, last_chunk_len, + offset); + if (comparison == 0) { + vector<int> merged = left.Merge(right, phrase.Arity() + 1); + locations.insert(locations.end(), merged.begin(), merged.end()); + } else if (comparison < 0) { + break; + } + i += suffix_subpatterns; + } + + prefix_start += prefix_subpatterns; + } + } +} |