summaryrefslogtreecommitdiff
path: root/extractor/sampler.h
blob: de450c4888127c8bf7a6c24e69b08b69f6a8a4ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#ifndef _SAMPLER_H_
#define _SAMPLER_H_

#include <memory>
#include <unordered_set>

#include "data_array.h"

using namespace std;

namespace extractor {

class PhraseLocation;
class SuffixArray;

/**
 * Provides uniform sampling for a PhraseLocation.
 */
class Sampler {
 public:
  Sampler(shared_ptr<SuffixArray> suffix_array, int max_samples);

  virtual ~Sampler();

  // Samples uniformly at most max_samples phrase occurrences.
  virtual PhraseLocation Sample(const PhraseLocation& location, const unordered_set<int>& blacklisted_sentence_ids, const shared_ptr<DataArray> source_data_array) const;

 protected:
  Sampler();

 private:
  // Round floating point number to the nearest integer.
  int Round(double x) const;

  shared_ptr<SuffixArray> suffix_array;
  int max_samples;
};

} // namespace extractor

#endif