From 5530575ae0ad939e17f08d6bd49978acea388ab7 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Mon, 28 Jan 2013 11:56:31 +0000 Subject: Initial working commit. --- extractor/alignment.cc | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 extractor/alignment.cc (limited to 'extractor/alignment.cc') diff --git a/extractor/alignment.cc b/extractor/alignment.cc new file mode 100644 index 00000000..cad28a72 --- /dev/null +++ b/extractor/alignment.cc @@ -0,0 +1,47 @@ +#include "alignment.h" + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace fs = boost::filesystem; +using namespace std; + +Alignment::Alignment(const string& filename) { + ifstream infile(filename.c_str()); + string line; + while (getline(infile, line)) { + vector items; + boost::split(items, line, boost::is_any_of(" -")); + vector > alignment; + alignment.reserve(items.size() / 2); + for (size_t i = 0; i < items.size(); i += 2) { + alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1]))); + } + alignments.push_back(alignment); + } + // Note: shrink_to_fit does nothing for vector > on g++ 4.6.3, + // but let's hope that the bug will be fixed in a newer version. + alignments.shrink_to_fit(); +} + +vector > Alignment::GetLinks(int sentence_index) const { + return alignments[sentence_index]; +} + +void Alignment::WriteBinary(const fs::path& filepath) { + FILE* file = fopen(filepath.string().c_str(), "w"); + int size = alignments.size(); + fwrite(&size, sizeof(int), 1, file); + for (vector > alignment: alignments) { + size = alignment.size(); + fwrite(&size, sizeof(int), 1, file); + fwrite(alignment.data(), sizeof(pair), size, file); + } +} -- cgit v1.2.3