summaryrefslogtreecommitdiff
path: root/extractor/alignment.cc
blob: 2fa0abac947a4c465452bbf947472383a49b9744 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include "alignment.h"

#include <fstream>
#include <sstream>
#include <string>
#include <fcntl.h>
#include <unistd.h>
#include <vector>

#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>

namespace fs = boost::filesystem;
using namespace std;

Alignment::Alignment(const string& filename) {
  ifstream infile(filename.c_str());
  string line;
  while (getline(infile, line)) {
    vector<string> items;
    boost::split(items, line, boost::is_any_of(" -"));
    vector<pair<int, int> > alignment;
    alignment.reserve(items.size() / 2);
    for (size_t i = 0; i < items.size(); i += 2) {
      alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1])));
    }
    alignments.push_back(alignment);
  }
  // Note: shrink_to_fit does nothing for vector<vector<string> > on g++ 4.6.3,
  // but let's hope that the bug will be fixed in a newer version.
  alignments.shrink_to_fit();
}

const vector<pair<int, int> >& Alignment::GetLinks(int sentence_index) const {
  return alignments[sentence_index];
}

void Alignment::WriteBinary(const fs::path& filepath) {
  FILE* file = fopen(filepath.string().c_str(), "w");
  int size = alignments.size();
  fwrite(&size, sizeof(int), 1, file);
  for (vector<pair<int, int> > alignment: alignments) {
    size = alignment.size();
    fwrite(&size, sizeof(int), 1, file);
    fwrite(alignment.data(), sizeof(pair<int, int>), size, file);
  }
}