summaryrefslogtreecommitdiff
path: root/extools/sentence_pair.h
diff options
context:
space:
mode:
Diffstat (limited to 'extools/sentence_pair.h')
-rw-r--r--extools/sentence_pair.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/extools/sentence_pair.h b/extools/sentence_pair.h
new file mode 100644
index 00000000..d78be359
--- /dev/null
+++ b/extools/sentence_pair.h
@@ -0,0 +1,39 @@
+#ifndef _SENTENCE_PAIR_H_
+#define _SENTENCE_PAIR_H_
+
+#include <utility>
+#include <vector>
+#include "wordid.h"
+#include "array2d.h"
+
+// represents a parallel sentence with a word alignment and category
+// annotations over subspans (currently in terms of f)
+// you should read one using ParseInputLine and then use the public
+// member variables to query things about it
+struct AnnotatedParallelSentence {
+ // read annotated parallel sentence from string
+ void ParseInputLine(const char* buf);
+
+ std::vector<WordID> f, e; // words in f and e
+
+ // word alignment information
+ std::vector<int> e_aligned, f_aligned; // counts the number of times column/row x is aligned
+ Array2D<bool> aligned;
+ std::vector<std::vector<std::pair<short, short> > > aligns_by_fword;
+
+ // span type information
+ Array2D<std::vector<WordID> > span_types; // span_types(i,j) is the list of category
+ // types for a span (i,j) in the TARGET language.
+
+ int f_len, e_len;
+
+ static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b);
+
+ private:
+ void Reset();
+ void AllocateForAlignment();
+ void ParseAlignmentPoint(const char* buf, int start, int end);
+ void ParseSpanLabel(const char* buf, int start, int end);
+};
+
+#endif