diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 05:12:27 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 05:12:27 +0000 |
commit | 0172721855098ca02b207231a654dffa5e4eb1c9 (patch) | |
tree | 8069c3a62e2d72bd64a2cdeee9724b2679c8a56b /extools/sentence_pair.h | |
parent | 37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff) |
initial checkin
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'extools/sentence_pair.h')
-rw-r--r-- | extools/sentence_pair.h | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/extools/sentence_pair.h b/extools/sentence_pair.h new file mode 100644 index 00000000..d78be359 --- /dev/null +++ b/extools/sentence_pair.h @@ -0,0 +1,39 @@ +#ifndef _SENTENCE_PAIR_H_ +#define _SENTENCE_PAIR_H_ + +#include <utility> +#include <vector> +#include "wordid.h" +#include "array2d.h" + +// represents a parallel sentence with a word alignment and category +// annotations over subspans (currently in terms of f) +// you should read one using ParseInputLine and then use the public +// member variables to query things about it +struct AnnotatedParallelSentence { + // read annotated parallel sentence from string + void ParseInputLine(const char* buf); + + std::vector<WordID> f, e; // words in f and e + + // word alignment information + std::vector<int> e_aligned, f_aligned; // counts the number of times column/row x is aligned + Array2D<bool> aligned; + std::vector<std::vector<std::pair<short, short> > > aligns_by_fword; + + // span type information + Array2D<std::vector<WordID> > span_types; // span_types(i,j) is the list of category + // types for a span (i,j) in the TARGET language. + + int f_len, e_len; + + static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b); + + private: + void Reset(); + void AllocateForAlignment(); + void ParseAlignmentPoint(const char* buf, int start, int end); + void ParseSpanLabel(const char* buf, int start, int end); +}; + +#endif |