summaryrefslogtreecommitdiff
path: root/extractor/fast_intersector.h
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/fast_intersector.h')
-rw-r--r--extractor/fast_intersector.h65
1 files changed, 65 insertions, 0 deletions
diff --git a/extractor/fast_intersector.h b/extractor/fast_intersector.h
new file mode 100644
index 00000000..785e428e
--- /dev/null
+++ b/extractor/fast_intersector.h
@@ -0,0 +1,65 @@
+#ifndef _FAST_INTERSECTOR_H_
+#define _FAST_INTERSECTOR_H_
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+using namespace std;
+
+typedef boost::hash<vector<int> > VectorHash;
+typedef unordered_map<vector<int>, vector<int>, VectorHash> Index;
+
+class Phrase;
+class PhraseLocation;
+class Precomputation;
+class SuffixArray;
+class Vocabulary;
+
+class FastIntersector {
+ public:
+ FastIntersector(shared_ptr<SuffixArray> suffix_array,
+ shared_ptr<Precomputation> precomputation,
+ shared_ptr<Vocabulary> vocabulary,
+ int max_rule_span,
+ int min_gap_size);
+
+ virtual ~FastIntersector();
+
+ virtual PhraseLocation Intersect(PhraseLocation& prefix_location,
+ PhraseLocation& suffix_location,
+ const Phrase& phrase);
+
+ protected:
+ FastIntersector();
+
+ private:
+ vector<int> ConvertPhrase(const vector<int>& old_phrase);
+
+ int EstimateNumOperations(const PhraseLocation& phrase_location,
+ bool has_margin_x) const;
+
+ PhraseLocation ExtendPrefixPhraseLocation(PhraseLocation& prefix_location,
+ const Phrase& phrase,
+ bool prefix_ends_with_x,
+ int next_symbol) const;
+
+ PhraseLocation ExtendSuffixPhraseLocation(PhraseLocation& suffix_location,
+ const Phrase& phrase,
+ bool suffix_starts_with_x,
+ int prev_symbol) const;
+
+ void ExtendPhraseLocation(PhraseLocation& location) const;
+
+ pair<int, int> GetSearchRange(bool has_marginal_x) const;
+
+ shared_ptr<SuffixArray> suffix_array;
+ shared_ptr<Vocabulary> vocabulary;
+ int max_rule_span;
+ int min_gap_size;
+ Index collocations;
+};
+
+#endif