summaryrefslogtreecommitdiff
path: root/extractor/alignment.h
blob: dc5a8b558ed9d53445fba59b343cf9d673add434 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#ifndef _ALIGNMENT_H_
#define _ALIGNMENT_H_

#include <string>
#include <vector>

#include <boost/filesystem.hpp>
#include <boost/serialization/serialization.hpp>
#include <boost/serialization/split_member.hpp>
#include <boost/serialization/utility.hpp>
#include <boost/serialization/vector.hpp>

namespace fs = boost::filesystem;
using namespace std;

namespace extractor {

/**
 * Data structure storing the word alignments for a parallel corpus.
 */
class Alignment {
 public:
  // Reads alignment from text file.
  Alignment(const string& filename);

  // Creates empty alignment.
  Alignment();

  // Returns the alignment for a given sentence.
  virtual vector<pair<int, int>> GetLinks(int sentence_index) const;

  virtual ~Alignment();

  bool operator==(const Alignment& alignment) const;

 private:
  friend class boost::serialization::access;

  template<class Archive> void serialize(Archive& ar, unsigned int) {
    ar & alignments;
  }

  vector<vector<pair<int, int>>> alignments;
};

} // namespace extractor

#endif