summaryrefslogtreecommitdiff
path: root/extractor/alignment.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/alignment.h
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/alignment.h')
-rw-r--r--extractor/alignment.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/extractor/alignment.h b/extractor/alignment.h
new file mode 100644
index 00000000..e9292121
--- /dev/null
+++ b/extractor/alignment.h
@@ -0,0 +1,39 @@
+#ifndef _ALIGNMENT_H_
+#define _ALIGNMENT_H_
+
+#include <string>
+#include <vector>
+
+#include <boost/filesystem.hpp>
+
+namespace fs = boost::filesystem;
+using namespace std;
+
+namespace extractor {
+
+/**
+ * Data structure storing the word alignments for a parallel corpus.
+ */
+class Alignment {
+ public:
+ // Reads alignment from text file.
+ Alignment(const string& filename);
+
+ // Returns the alignment for a given sentence.
+ virtual vector<pair<int, int> > GetLinks(int sentence_index) const;
+
+ // Writes alignment to file in binary format.
+ void WriteBinary(const fs::path& filepath);
+
+ virtual ~Alignment();
+
+ protected:
+ Alignment();
+
+ private:
+ vector<vector<pair<int, int> > > alignments;
+};
+
+} // namespace extractor
+
+#endif