summaryrefslogtreecommitdiff
path: root/extractor/alignment.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/alignment.cc')
-rw-r--r--extractor/alignment.cc47
1 files changed, 47 insertions, 0 deletions
diff --git a/extractor/alignment.cc b/extractor/alignment.cc
new file mode 100644
index 00000000..cad28a72
--- /dev/null
+++ b/extractor/alignment.cc
@@ -0,0 +1,47 @@
+#include "alignment.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <fcntl.h>
+#include <unistd.h>
+#include <vector>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem.hpp>
+
+namespace fs = boost::filesystem;
+using namespace std;
+
+Alignment::Alignment(const string& filename) {
+ ifstream infile(filename.c_str());
+ string line;
+ while (getline(infile, line)) {
+ vector<string> items;
+ boost::split(items, line, boost::is_any_of(" -"));
+ vector<pair<int, int> > alignment;
+ alignment.reserve(items.size() / 2);
+ for (size_t i = 0; i < items.size(); i += 2) {
+ alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1])));
+ }
+ alignments.push_back(alignment);
+ }
+ // Note: shrink_to_fit does nothing for vector<vector<string> > on g++ 4.6.3,
+ // but let's hope that the bug will be fixed in a newer version.
+ alignments.shrink_to_fit();
+}
+
+vector<pair<int, int> > Alignment::GetLinks(int sentence_index) const {
+ return alignments[sentence_index];
+}
+
+void Alignment::WriteBinary(const fs::path& filepath) {
+ FILE* file = fopen(filepath.string().c_str(), "w");
+ int size = alignments.size();
+ fwrite(&size, sizeof(int), 1, file);
+ for (vector<pair<int, int> > alignment: alignments) {
+ size = alignment.size();
+ fwrite(&size, sizeof(int), 1, file);
+ fwrite(alignment.data(), sizeof(pair<int, int>), size, file);
+ }
+}