From a6e6a369f40d8fb6a191fd7f74fc5efa8bfae2a0 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Wed, 27 Nov 2013 14:33:36 +0000 Subject: Unify sampling backoff strategy. --- extractor/suffix_array_sampler.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 extractor/suffix_array_sampler.h (limited to 'extractor/suffix_array_sampler.h') diff --git a/extractor/suffix_array_sampler.h b/extractor/suffix_array_sampler.h new file mode 100644 index 00000000..bb3c2653 --- /dev/null +++ b/extractor/suffix_array_sampler.h @@ -0,0 +1,34 @@ +#ifndef _SUFFIX_ARRAY_SAMPLER_H_ +#define _SUFFIX_ARRAY_SAMPLER_H_ + +#include "backoff_sampler.h" + +namespace extractor { + +class SuffixArray; + +class SuffixArrayRangeSampler : public BackoffSampler { + public: + SuffixArrayRangeSampler(shared_ptr suffix_array, + int max_samples); + + SuffixArrayRangeSampler(); + + private: + int GetNumSubpatterns(const PhraseLocation& location) const; + + int GetRangeLow(const PhraseLocation& location) const; + + int GetRangeHigh(const PhraseLocation& location) const; + + int GetPosition(const PhraseLocation& location, int index) const; + + void AppendMatching(vector& samples, int index, + const PhraseLocation& location) const; + + shared_ptr source_suffix_array; +}; + +} // namespace extractor + +#endif -- cgit v1.2.3