diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-01-28 11:56:31 +0000 |
commit | 5530575ae0ad939e17f08d6bd49978acea388ab7 (patch) | |
tree | 4620a276c1c827d824e285148f4f4a5bf781ebfe /extractor/intersector.h | |
parent | ce6937f136a38af93d9a5cd9628acc712da95543 (diff) |
Initial working commit.
Diffstat (limited to 'extractor/intersector.h')
-rw-r--r-- | extractor/intersector.h | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/extractor/intersector.h b/extractor/intersector.h new file mode 100644 index 00000000..874ffc1b --- /dev/null +++ b/extractor/intersector.h @@ -0,0 +1,57 @@ +#ifndef _INTERSECTOR_H_ +#define _INTERSECTOR_H_ + +#include <memory> +#include <tr1/unordered_map> +#include <vector> + +#include <boost/functional/hash.hpp> + +#include "binary_search_merger.h" +#include "linear_merger.h" + +using namespace std; +using namespace tr1; + +typedef boost::hash<vector<int> > vector_hash; +typedef unordered_map<vector<int>, vector<int>, vector_hash> Index; + +class DataArray; +class MatchingComparator; +class Phrase; +class PhraseLocation; +class Precomputation; +class SuffixArray; +class Vocabulary; + +class Intersector { + public: + Intersector( + shared_ptr<Vocabulary> vocabulary, + const Precomputation& precomputaiton, + shared_ptr<SuffixArray> source_suffix_array, + shared_ptr<MatchingComparator> comparator, + bool use_baeza_yates); + + PhraseLocation Intersect( + const Phrase& prefix, PhraseLocation& prefix_location, + const Phrase& suffix, PhraseLocation& suffix_location, + const Phrase& phrase); + + private: + vector<int> Convert(const vector<int>& old_phrase, + shared_ptr<DataArray> source_data_array); + + void ExtendPhraseLocation(const Phrase& phrase, + PhraseLocation& phrase_location); + + shared_ptr<Vocabulary> vocabulary; + shared_ptr<SuffixArray> suffix_array; + shared_ptr<LinearMerger> linear_merger; + shared_ptr<BinarySearchMerger> binary_search_merger; + Index inverted_index; + Index collocations; + bool use_baeza_yates; +}; + +#endif |