summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
commit0885db8a3c07927bd1220bb1c639a0fe8687d6f6 (patch)
tree3e174cf6bfbc97da6f35e7351a0cdbfa800e969e /utils
parent378ba2373374015c8de4b360a30227f072616c6c (diff)
parente1a0c140e9f31461ab45ec7f9533ad98d2b9caa9 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'utils')
-rw-r--r--utils/corpus_tools.cc16
1 files changed, 10 insertions, 6 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index a0542b6e..d17785af 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename,
while(getline(in, line)) {
const bool skip = (lc % size != rank);
++lc;
- if (skip) continue;
TD::ConvertSentence(line, &tmp);
- src->push_back(vector<WordID>());
- vector<WordID>* d = &src->back();
+ vector<WordID>* d = NULL;
+ if (!skip) {
+ src->push_back(vector<WordID>());
+ d = &src->back();
+ }
set<WordID>* v = src_vocab;
int s = 0;
for (unsigned i = 0; i < tmp.size(); ++i) {
@@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename,
++s;
if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
assert(trg);
- trg->push_back(vector<WordID>());
- d = &trg->back();
+ if (!skip) {
+ trg->push_back(vector<WordID>());
+ d = &trg->back();
+ }
v = trg_vocab;
} else {
- d->push_back(tmp[i]);
+ if (d) d->push_back(tmp[i]);
if (v) v->insert(tmp[i]);
}
}