summaryrefslogtreecommitdiff
path: root/extractor/data_array.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-14 23:17:15 +0000
commit63b30ed9c8510da8c8e2f6a456576424fddacc0e (patch)
tree1b5278fb5a4480b7f7a965bb6de8f6f9e9c4d333 /extractor/data_array.cc
parent0a53f7eca74c165b5ce1c238f1999ddf1febea55 (diff)
Working version of the grammar extractor.
Diffstat (limited to 'extractor/data_array.cc')
-rw-r--r--extractor/data_array.cc19
1 files changed, 14 insertions, 5 deletions
diff --git a/extractor/data_array.cc b/extractor/data_array.cc
index 383b08a7..1097caf3 100644
--- a/extractor/data_array.cc
+++ b/extractor/data_array.cc
@@ -10,9 +10,9 @@
namespace fs = boost::filesystem;
using namespace std;
-int DataArray::END_OF_FILE = 0;
+int DataArray::NULL_WORD = 0;
int DataArray::END_OF_LINE = 1;
-string DataArray::END_OF_FILE_STR = "__END_OF_FILE__";
+string DataArray::NULL_WORD_STR = "__NULL__";
string DataArray::END_OF_LINE_STR = "__END_OF_LINE__";
DataArray::DataArray() {
@@ -47,9 +47,9 @@ DataArray::DataArray(const string& filename, const Side& side) {
}
void DataArray::InitializeDataArray() {
- word2id[END_OF_FILE_STR] = END_OF_FILE;
- id2word.push_back(END_OF_FILE_STR);
- word2id[END_OF_LINE_STR] = END_OF_FILE;
+ word2id[NULL_WORD_STR] = NULL_WORD;
+ id2word.push_back(NULL_WORD_STR);
+ word2id[END_OF_LINE_STR] = END_OF_LINE;
id2word.push_back(END_OF_LINE_STR);
}
@@ -87,6 +87,10 @@ int DataArray::AtIndex(int index) const {
return data[index];
}
+string DataArray::GetWordAtIndex(int index) const {
+ return id2word[data[index]];
+}
+
int DataArray::GetSize() const {
return data.size();
}
@@ -103,6 +107,11 @@ int DataArray::GetSentenceStart(int position) const {
return sentence_start[position];
}
+int DataArray::GetSentenceLength(int sentence_id) const {
+ // Ignore end of line markers.
+ return sentence_start[sentence_id + 1] - sentence_start[sentence_id] - 1;
+}
+
int DataArray::GetSentenceId(int position) const {
return sentence_id[position];
}