#ifndef _SENTENCE_PAIR_H_ #define _SENTENCE_PAIR_H_ #include #include #include #include #include "wordid.h" #include "array2d.h" // represents a parallel sentence with a word alignment and category // annotations over subspans (currently in terms of f) // you should read one using ParseInputLine and then use the public // member variables to query things about it struct AnnotatedParallelSentence { // read annotated parallel sentence from string void ParseInputLine(const char* buf); std::vector f, e; // words in f and e // word alignment information std::vector e_aligned, f_aligned; // counts the number of times column/row x is aligned Array2D aligned; std::vector > > aligns_by_fword; // span type information std::map< boost::tuple, std::vector > span_types; // span_types(i,j,k,l) is the list of category span (i,j) in source and (k,l) in the target language. int f_len, e_len; void Align(const short a, const short b); void AllocateForAlignment(); static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b, short* c, short* d); private: void Reset(); void ParseAlignmentPoint(const char* buf, int start, int end); void ParseSpanLabel(const char* buf, int start, int end); }; #endif