summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <prguest11@taipan.cs>2012-02-29 07:00:49 +0000
committerChris Dyer <prguest11@taipan.cs>2012-02-29 07:00:49 +0000
commita872f46ce1212703b8bed562c894ea1a932c0746 (patch)
tree3922f1eab5d7f81f87c163aed0868900052be900 /utils
parentcc01f3b7c9b87928be91e8a89f233a07a183ac2e (diff)
mpi fixes
Diffstat (limited to 'utils')
-rw-r--r--utils/corpus_tools.cc16
1 files changed, 10 insertions, 6 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index a0542b6e..d17785af 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename,
while(getline(in, line)) {
const bool skip = (lc % size != rank);
++lc;
- if (skip) continue;
TD::ConvertSentence(line, &tmp);
- src->push_back(vector<WordID>());
- vector<WordID>* d = &src->back();
+ vector<WordID>* d = NULL;
+ if (!skip) {
+ src->push_back(vector<WordID>());
+ d = &src->back();
+ }
set<WordID>* v = src_vocab;
int s = 0;
for (unsigned i = 0; i < tmp.size(); ++i) {
@@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename,
++s;
if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
assert(trg);
- trg->push_back(vector<WordID>());
- d = &trg->back();
+ if (!skip) {
+ trg->push_back(vector<WordID>());
+ d = &trg->back();
+ }
v = trg_vocab;
} else {
- d->push_back(tmp[i]);
+ if (d) d->push_back(tmp[i]);
if (v) v->insert(tmp[i]);
}
}