summaryrefslogtreecommitdiff
path: root/utils/corpus_tools.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-01 23:09:41 -0500
commit6db288215f1741ef1d5f29ea2a84d37e6929b683 (patch)
tree77145daed952f12b8c9a50d5803e31aaf8170ea4 /utils/corpus_tools.cc
parent113e71899b2d9d8737e9480165e41624e2a96a25 (diff)
parent61a1c37fb1357b85286c9aca6790042f8ca38f5f (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'utils/corpus_tools.cc')
-rw-r--r--utils/corpus_tools.cc16
1 files changed, 10 insertions, 6 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index a0542b6e..d17785af 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename,
while(getline(in, line)) {
const bool skip = (lc % size != rank);
++lc;
- if (skip) continue;
TD::ConvertSentence(line, &tmp);
- src->push_back(vector<WordID>());
- vector<WordID>* d = &src->back();
+ vector<WordID>* d = NULL;
+ if (!skip) {
+ src->push_back(vector<WordID>());
+ d = &src->back();
+ }
set<WordID>* v = src_vocab;
int s = 0;
for (unsigned i = 0; i < tmp.size(); ++i) {
@@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename,
++s;
if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); }
assert(trg);
- trg->push_back(vector<WordID>());
- d = &trg->back();
+ if (!skip) {
+ trg->push_back(vector<WordID>());
+ d = &trg->back();
+ }
v = trg_vocab;
} else {
- d->push_back(tmp[i]);
+ if (d) d->push_back(tmp[i]);
if (v) v->insert(tmp[i]);
}
}