diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | c164dc0ed8a32e4095ba1b36495e0f743b8cc1ea (patch) | |
tree | 78b81e4c63adfa67adb7b8f80c3e6be87b4a2b2a /extractor/sampler.h | |
parent | 0e46089cafa4e8e2f060e370d7afaceeda6b90a9 (diff) | |
parent | d467e14b28085809c31431be0478eb3d9322fe96 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/sampler.h')
-rw-r--r-- | extractor/sampler.h | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/extractor/sampler.h b/extractor/sampler.h new file mode 100644 index 00000000..be4aa1bb --- /dev/null +++ b/extractor/sampler.h @@ -0,0 +1,38 @@ +#ifndef _SAMPLER_H_ +#define _SAMPLER_H_ + +#include <memory> + +using namespace std; + +namespace extractor { + +class PhraseLocation; +class SuffixArray; + +/** + * Provides uniform sampling for a PhraseLocation. + */ +class Sampler { + public: + Sampler(shared_ptr<SuffixArray> suffix_array, int max_samples); + + virtual ~Sampler(); + + // Samples uniformly at most max_samples phrase occurrences. + virtual PhraseLocation Sample(const PhraseLocation& location) const; + + protected: + Sampler(); + + private: + // Round floating point number to the nearest integer. + int Round(double x) const; + + shared_ptr<SuffixArray> suffix_array; + int max_samples; +}; + +} // namespace extractor + +#endif |