From 0172721855098ca02b207231a654dffa5e4eb1c9 Mon Sep 17 00:00:00 2001 From: redpony Date: Tue, 22 Jun 2010 05:12:27 +0000 Subject: initial checkin git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f --- extools/sentence_pair.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 extools/sentence_pair.h (limited to 'extools/sentence_pair.h') diff --git a/extools/sentence_pair.h b/extools/sentence_pair.h new file mode 100644 index 00000000..d78be359 --- /dev/null +++ b/extools/sentence_pair.h @@ -0,0 +1,39 @@ +#ifndef _SENTENCE_PAIR_H_ +#define _SENTENCE_PAIR_H_ + +#include +#include +#include "wordid.h" +#include "array2d.h" + +// represents a parallel sentence with a word alignment and category +// annotations over subspans (currently in terms of f) +// you should read one using ParseInputLine and then use the public +// member variables to query things about it +struct AnnotatedParallelSentence { + // read annotated parallel sentence from string + void ParseInputLine(const char* buf); + + std::vector f, e; // words in f and e + + // word alignment information + std::vector e_aligned, f_aligned; // counts the number of times column/row x is aligned + Array2D aligned; + std::vector > > aligns_by_fword; + + // span type information + Array2D > span_types; // span_types(i,j) is the list of category + // types for a span (i,j) in the TARGET language. + + int f_len, e_len; + + static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b); + + private: + void Reset(); + void AllocateForAlignment(); + void ParseAlignmentPoint(const char* buf, int start, int end); + void ParseSpanLabel(const char* buf, int start, int end); +}; + +#endif -- cgit v1.2.3