summaryrefslogtreecommitdiff
path: root/extractor/intersector.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-01-28 11:56:31 +0000
commit4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 (patch)
tree61a9790298659944650e16121c28dc04397b07ba /extractor/intersector.h
parentae1bd3257aafba586f874c55e7e51e8776879434 (diff)
Initial working commit.
Diffstat (limited to 'extractor/intersector.h')
-rw-r--r--extractor/intersector.h57
1 files changed, 57 insertions, 0 deletions
diff --git a/extractor/intersector.h b/extractor/intersector.h
new file mode 100644
index 00000000..874ffc1b
--- /dev/null
+++ b/extractor/intersector.h
@@ -0,0 +1,57 @@
+#ifndef _INTERSECTOR_H_
+#define _INTERSECTOR_H_
+
+#include <memory>
+#include <tr1/unordered_map>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "binary_search_merger.h"
+#include "linear_merger.h"
+
+using namespace std;
+using namespace tr1;
+
+typedef boost::hash<vector<int> > vector_hash;
+typedef unordered_map<vector<int>, vector<int>, vector_hash> Index;
+
+class DataArray;
+class MatchingComparator;
+class Phrase;
+class PhraseLocation;
+class Precomputation;
+class SuffixArray;
+class Vocabulary;
+
+class Intersector {
+ public:
+ Intersector(
+ shared_ptr<Vocabulary> vocabulary,
+ const Precomputation& precomputaiton,
+ shared_ptr<SuffixArray> source_suffix_array,
+ shared_ptr<MatchingComparator> comparator,
+ bool use_baeza_yates);
+
+ PhraseLocation Intersect(
+ const Phrase& prefix, PhraseLocation& prefix_location,
+ const Phrase& suffix, PhraseLocation& suffix_location,
+ const Phrase& phrase);
+
+ private:
+ vector<int> Convert(const vector<int>& old_phrase,
+ shared_ptr<DataArray> source_data_array);
+
+ void ExtendPhraseLocation(const Phrase& phrase,
+ PhraseLocation& phrase_location);
+
+ shared_ptr<Vocabulary> vocabulary;
+ shared_ptr<SuffixArray> suffix_array;
+ shared_ptr<LinearMerger> linear_merger;
+ shared_ptr<BinarySearchMerger> binary_search_merger;
+ Index inverted_index;
+ Index collocations;
+ bool use_baeza_yates;
+};
+
+#endif