diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
commit | 9a026ba2db8fa7723374109e6a4a8dcaff8733cd (patch) | |
tree | 34a60703a53ada76e7213da5940e86d6f476f1e4 /extractor/data_array.cc | |
parent | 252fb164c208ec8f3005f8a652eb3b48c0644e3d (diff) |
Working version of the grammar extractor.
Diffstat (limited to 'extractor/data_array.cc')
-rw-r--r-- | extractor/data_array.cc | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/extractor/data_array.cc b/extractor/data_array.cc index 383b08a7..1097caf3 100644 --- a/extractor/data_array.cc +++ b/extractor/data_array.cc @@ -10,9 +10,9 @@ namespace fs = boost::filesystem; using namespace std; -int DataArray::END_OF_FILE = 0; +int DataArray::NULL_WORD = 0; int DataArray::END_OF_LINE = 1; -string DataArray::END_OF_FILE_STR = "__END_OF_FILE__"; +string DataArray::NULL_WORD_STR = "__NULL__"; string DataArray::END_OF_LINE_STR = "__END_OF_LINE__"; DataArray::DataArray() { @@ -47,9 +47,9 @@ DataArray::DataArray(const string& filename, const Side& side) { } void DataArray::InitializeDataArray() { - word2id[END_OF_FILE_STR] = END_OF_FILE; - id2word.push_back(END_OF_FILE_STR); - word2id[END_OF_LINE_STR] = END_OF_FILE; + word2id[NULL_WORD_STR] = NULL_WORD; + id2word.push_back(NULL_WORD_STR); + word2id[END_OF_LINE_STR] = END_OF_LINE; id2word.push_back(END_OF_LINE_STR); } @@ -87,6 +87,10 @@ int DataArray::AtIndex(int index) const { return data[index]; } +string DataArray::GetWordAtIndex(int index) const { + return id2word[data[index]]; +} + int DataArray::GetSize() const { return data.size(); } @@ -103,6 +107,11 @@ int DataArray::GetSentenceStart(int position) const { return sentence_start[position]; } +int DataArray::GetSentenceLength(int sentence_id) const { + // Ignore end of line markers. + return sentence_start[sentence_id + 1] - sentence_start[sentence_id] - 1; +} + int DataArray::GetSentenceId(int position) const { return sentence_id[position]; } |