From 0a53f7eca74c165b5ce1c238f1999ddf1febea55 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Fri, 1 Feb 2013 16:11:10 +0000 Subject: Second working commit. --- extractor/precomputation.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'extractor/precomputation.h') diff --git a/extractor/precomputation.h b/extractor/precomputation.h index 0d1b269f..428505d8 100644 --- a/extractor/precomputation.h +++ b/extractor/precomputation.h @@ -16,8 +16,8 @@ using namespace tr1; class SuffixArray; -typedef boost::hash > vector_hash; -typedef unordered_map, vector, vector_hash> Index; +typedef boost::hash > VectorHash; +typedef unordered_map, vector, VectorHash> Index; class Precomputation { public: @@ -27,20 +27,25 @@ class Precomputation { int max_rule_symbols, int min_gap_size, int max_frequent_phrase_len, int min_frequency); + virtual ~Precomputation(); + void WriteBinary(const fs::path& filepath) const; - const Index& GetInvertedIndex() const; - const Index& GetCollocations() const; + virtual const Index& GetInvertedIndex() const; + virtual const Index& GetCollocations() const; static int NON_TERMINAL; + protected: + Precomputation(); + private: vector > FindMostFrequentPatterns( shared_ptr suffix_array, const vector& data, int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency); void AddCollocations( - const vector >& matchings, const vector& data, + const vector >& matchings, const vector& data, int max_rule_span, int min_gap_size, int max_rule_symbols); void AddStartPositions(vector& positions, int pos1, int pos2); void AddStartPositions(vector& positions, int pos1, int pos2, int pos3); -- cgit v1.2.3