From 4ab84a0be28fdb6c0c421fe5ba5e09cfa298f2d1 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Mon, 28 Jan 2013 11:56:31 +0000 Subject: Initial working commit. --- extractor/intersector.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 extractor/intersector.h (limited to 'extractor/intersector.h') diff --git a/extractor/intersector.h b/extractor/intersector.h new file mode 100644 index 00000000..874ffc1b --- /dev/null +++ b/extractor/intersector.h @@ -0,0 +1,57 @@ +#ifndef _INTERSECTOR_H_ +#define _INTERSECTOR_H_ + +#include +#include +#include + +#include + +#include "binary_search_merger.h" +#include "linear_merger.h" + +using namespace std; +using namespace tr1; + +typedef boost::hash > vector_hash; +typedef unordered_map, vector, vector_hash> Index; + +class DataArray; +class MatchingComparator; +class Phrase; +class PhraseLocation; +class Precomputation; +class SuffixArray; +class Vocabulary; + +class Intersector { + public: + Intersector( + shared_ptr vocabulary, + const Precomputation& precomputaiton, + shared_ptr source_suffix_array, + shared_ptr comparator, + bool use_baeza_yates); + + PhraseLocation Intersect( + const Phrase& prefix, PhraseLocation& prefix_location, + const Phrase& suffix, PhraseLocation& suffix_location, + const Phrase& phrase); + + private: + vector Convert(const vector& old_phrase, + shared_ptr source_data_array); + + void ExtendPhraseLocation(const Phrase& phrase, + PhraseLocation& phrase_location); + + shared_ptr vocabulary; + shared_ptr suffix_array; + shared_ptr linear_merger; + shared_ptr binary_search_merger; + Index inverted_index; + Index collocations; + bool use_baeza_yates; +}; + +#endif -- cgit v1.2.3