summaryrefslogtreecommitdiff
path: root/extractor/linear_merger.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
commit4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 (patch)
tree61a9790298659944650e16121c28dc04397b07ba /extractor/linear_merger.cc
parentae1bd3257aafba586f874c55e7e51e8776879434 (diff)
Initial working commit.
Diffstat (limited to 'extractor/linear_merger.cc')
-rw-r--r--extractor/linear_merger.cc63
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/linear_merger.cc b/extractor/linear_merger.cc
new file mode 100644
index 00000000..59e5f34c
--- /dev/null
+++ b/extractor/linear_merger.cc
@@ -0,0 +1,63 @@
+#include "linear_merger.h"
+
+#include <cmath>
+
+#include "data_array.h"
+#include "matching.h"
+#include "matching_comparator.h"
+#include "phrase.h"
+#include "phrase_location.h"
+#include "vocabulary.h"
+
+LinearMerger::LinearMerger(shared_ptr<Vocabulary> vocabulary,
+ shared_ptr<DataArray> data_array,
+ shared_ptr<MatchingComparator> comparator) :
+ vocabulary(vocabulary), data_array(data_array), comparator(comparator) {}
+
+LinearMerger::~LinearMerger() {}
+
+void LinearMerger::Merge(
+ vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
+ vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
+ vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
+ int prefix_subpatterns, int suffix_subpatterns) const {
+ int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
+ bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));
+
+ while (prefix_start != prefix_end) {
+ Matching left(prefix_start, prefix_subpatterns,
+ data_array->GetSentenceId(*prefix_start));
+
+ while (suffix_start != suffix_end) {
+ Matching right(suffix_start, suffix_subpatterns,
+ data_array->GetSentenceId(*suffix_start));
+ if (comparator->Compare(left, right, last_chunk_len, offset) > 0) {
+ suffix_start += suffix_subpatterns;
+ } else {
+ break;
+ }
+ }
+
+ int start_position = *prefix_start;
+ vector<int> :: iterator i = suffix_start;
+ while (prefix_start != prefix_end && *prefix_start == start_position) {
+ Matching left(prefix_start, prefix_subpatterns,
+ data_array->GetSentenceId(*prefix_start));
+
+ while (i != suffix_end) {
+ Matching right(i, suffix_subpatterns, data_array->GetSentenceId(*i));
+ int comparison = comparator->Compare(left, right, last_chunk_len,
+ offset);
+ if (comparison == 0) {
+ vector<int> merged = left.Merge(right, phrase.Arity() + 1);
+ locations.insert(locations.end(), merged.begin(), merged.end());
+ } else if (comparison < 0) {
+ break;
+ }
+ i += suffix_subpatterns;
+ }
+
+ prefix_start += prefix_subpatterns;
+ }
+ }
+}