summaryrefslogtreecommitdiff
path: root/extractor/sampler.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commitc164dc0ed8a32e4095ba1b36495e0f743b8cc1ea (patch)
tree78b81e4c63adfa67adb7b8f80c3e6be87b4a2b2a /extractor/sampler.h
parent0e46089cafa4e8e2f060e370d7afaceeda6b90a9 (diff)
parentd467e14b28085809c31431be0478eb3d9322fe96 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/sampler.h')
-rw-r--r--extractor/sampler.h38
1 files changed, 38 insertions, 0 deletions
diff --git a/extractor/sampler.h b/extractor/sampler.h
new file mode 100644
index 00000000..be4aa1bb
--- /dev/null
+++ b/extractor/sampler.h
@@ -0,0 +1,38 @@
+#ifndef _SAMPLER_H_
+#define _SAMPLER_H_
+
+#include <memory>
+
+using namespace std;
+
+namespace extractor {
+
+class PhraseLocation;
+class SuffixArray;
+
+/**
+ * Provides uniform sampling for a PhraseLocation.
+ */
+class Sampler {
+ public:
+ Sampler(shared_ptr<SuffixArray> suffix_array, int max_samples);
+
+ virtual ~Sampler();
+
+ // Samples uniformly at most max_samples phrase occurrences.
+ virtual PhraseLocation Sample(const PhraseLocation& location) const;
+
+ protected:
+ Sampler();
+
+ private:
+ // Round floating point number to the nearest integer.
+ int Round(double x) const;
+
+ shared_ptr<SuffixArray> suffix_array;
+ int max_samples;
+};
+
+} // namespace extractor
+
+#endif