summaryrefslogtreecommitdiff
path: root/extractor/alignment.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/alignment.cc
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/alignment.cc')
-rw-r--r--extractor/alignment.cc53
1 files changed, 53 insertions, 0 deletions
diff --git a/extractor/alignment.cc b/extractor/alignment.cc
new file mode 100644
index 00000000..1aea34b3
--- /dev/null
+++ b/extractor/alignment.cc
@@ -0,0 +1,53 @@
+#include "alignment.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <fcntl.h>
+#include <unistd.h>
+#include <vector>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem.hpp>
+
+namespace fs = boost::filesystem;
+using namespace std;
+
+namespace extractor {
+
+Alignment::Alignment(const string& filename) {
+ ifstream infile(filename.c_str());
+ string line;
+ while (getline(infile, line)) {
+ vector<string> items;
+ boost::split(items, line, boost::is_any_of(" -"));
+ vector<pair<int, int> > alignment;
+ alignment.reserve(items.size() / 2);
+ for (size_t i = 0; i < items.size(); i += 2) {
+ alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1])));
+ }
+ alignments.push_back(alignment);
+ }
+ alignments.shrink_to_fit();
+}
+
+Alignment::Alignment() {}
+
+Alignment::~Alignment() {}
+
+vector<pair<int, int> > Alignment::GetLinks(int sentence_index) const {
+ return alignments[sentence_index];
+}
+
+void Alignment::WriteBinary(const fs::path& filepath) {
+ FILE* file = fopen(filepath.string().c_str(), "w");
+ int size = alignments.size();
+ fwrite(&size, sizeof(int), 1, file);
+ for (vector<pair<int, int> > alignment: alignments) {
+ size = alignment.size();
+ fwrite(&size, sizeof(int), 1, file);
+ fwrite(alignment.data(), sizeof(pair<int, int>), size, file);
+ }
+}
+
+} // namespace extractor