#ifndef _PRECOMPUTATION_H_ #define _PRECOMPUTATION_H_ #include #include #include #include #include #include #include namespace fs = boost::filesystem; using namespace std; namespace extractor { typedef boost::hash > VectorHash; typedef unordered_map, vector, VectorHash> Index; class SuffixArray; class Precomputation { public: Precomputation( shared_ptr suffix_array, int num_frequent_patterns, int num_super_frequent_patterns, int max_rule_span, int max_rule_symbols, int min_gap_size, int max_frequent_phrase_len, int min_frequency); virtual ~Precomputation(); void WriteBinary(const fs::path& filepath) const; virtual const Index& GetCollocations() const; static int NON_TERMINAL; protected: Precomputation(); private: vector > FindMostFrequentPatterns( shared_ptr suffix_array, const vector& data, int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency); void AddCollocations( const vector >& matchings, const vector& data, int max_rule_span, int min_gap_size, int max_rule_symbols); void AddStartPositions(vector& positions, int pos1, int pos2); void AddStartPositions(vector& positions, int pos1, int pos2, int pos3); Index collocations; }; } // namespace extractor #endif