From 54a1c0e2bde259e3acc9c0a8ec8da3c7704e80ca Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Tue, 19 Feb 2013 21:23:48 +0000 Subject: Timing every part of the extractor. --- extractor/fast_intersector.h | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 extractor/fast_intersector.h (limited to 'extractor/fast_intersector.h') diff --git a/extractor/fast_intersector.h b/extractor/fast_intersector.h new file mode 100644 index 00000000..785e428e --- /dev/null +++ b/extractor/fast_intersector.h @@ -0,0 +1,65 @@ +#ifndef _FAST_INTERSECTOR_H_ +#define _FAST_INTERSECTOR_H_ + +#include +#include +#include + +#include + +using namespace std; + +typedef boost::hash > VectorHash; +typedef unordered_map, vector, VectorHash> Index; + +class Phrase; +class PhraseLocation; +class Precomputation; +class SuffixArray; +class Vocabulary; + +class FastIntersector { + public: + FastIntersector(shared_ptr suffix_array, + shared_ptr precomputation, + shared_ptr vocabulary, + int max_rule_span, + int min_gap_size); + + virtual ~FastIntersector(); + + virtual PhraseLocation Intersect(PhraseLocation& prefix_location, + PhraseLocation& suffix_location, + const Phrase& phrase); + + protected: + FastIntersector(); + + private: + vector ConvertPhrase(const vector& old_phrase); + + int EstimateNumOperations(const PhraseLocation& phrase_location, + bool has_margin_x) const; + + PhraseLocation ExtendPrefixPhraseLocation(PhraseLocation& prefix_location, + const Phrase& phrase, + bool prefix_ends_with_x, + int next_symbol) const; + + PhraseLocation ExtendSuffixPhraseLocation(PhraseLocation& suffix_location, + const Phrase& phrase, + bool suffix_starts_with_x, + int prev_symbol) const; + + void ExtendPhraseLocation(PhraseLocation& location) const; + + pair GetSearchRange(bool has_marginal_x) const; + + shared_ptr suffix_array; + shared_ptr vocabulary; + int max_rule_span; + int min_gap_size; + Index collocations; +}; + +#endif -- cgit v1.2.3