diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | 6d347f1ce078dede3da0e1498f75e357351c6543 (patch) | |
tree | 8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/alignment.cc | |
parent | d11b76def6899790161c47a73018146311356d8b (diff) | |
parent | 5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/alignment.cc')
-rw-r--r-- | extractor/alignment.cc | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/extractor/alignment.cc b/extractor/alignment.cc new file mode 100644 index 00000000..1aea34b3 --- /dev/null +++ b/extractor/alignment.cc @@ -0,0 +1,53 @@ +#include "alignment.h" + +#include <fstream> +#include <sstream> +#include <string> +#include <fcntl.h> +#include <unistd.h> +#include <vector> + +#include <boost/algorithm/string.hpp> +#include <boost/filesystem.hpp> + +namespace fs = boost::filesystem; +using namespace std; + +namespace extractor { + +Alignment::Alignment(const string& filename) { + ifstream infile(filename.c_str()); + string line; + while (getline(infile, line)) { + vector<string> items; + boost::split(items, line, boost::is_any_of(" -")); + vector<pair<int, int> > alignment; + alignment.reserve(items.size() / 2); + for (size_t i = 0; i < items.size(); i += 2) { + alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1]))); + } + alignments.push_back(alignment); + } + alignments.shrink_to_fit(); +} + +Alignment::Alignment() {} + +Alignment::~Alignment() {} + +vector<pair<int, int> > Alignment::GetLinks(int sentence_index) const { + return alignments[sentence_index]; +} + +void Alignment::WriteBinary(const fs::path& filepath) { + FILE* file = fopen(filepath.string().c_str(), "w"); + int size = alignments.size(); + fwrite(&size, sizeof(int), 1, file); + for (vector<pair<int, int> > alignment: alignments) { + size = alignment.size(); + fwrite(&size, sizeof(int), 1, file); + fwrite(alignment.data(), sizeof(pair<int, int>), size, file); + } +} + +} // namespace extractor |