1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
#ifndef _SENTENCE_PAIR_H_
#define _SENTENCE_PAIR_H_
#include <utility>
#include <vector>
#include "wordid.h"
#include "array2d.h"
// represents a parallel sentence with a word alignment and category
// annotations over subspans (currently in terms of f)
// you should read one using ParseInputLine and then use the public
// member variables to query things about it
struct AnnotatedParallelSentence {
// read annotated parallel sentence from string
void ParseInputLine(const char* buf);
std::vector<WordID> f, e; // words in f and e
// word alignment information
std::vector<int> e_aligned, f_aligned; // counts the number of times column/row x is aligned
Array2D<bool> aligned;
std::vector<std::vector<std::pair<short, short> > > aligns_by_fword;
// span type information
Array2D<std::vector<WordID> > span_types; // span_types(i,j) is the list of category
// types for a span (i,j) in the TARGET language.
int f_len, e_len;
static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b);
private:
void Reset();
void AllocateForAlignment();
void ParseAlignmentPoint(const char* buf, int start, int end);
void ParseSpanLabel(const char* buf, int start, int end);
};
#endif
|