diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-01 23:09:41 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-01 23:09:41 -0500 |
commit | 6db288215f1741ef1d5f29ea2a84d37e6929b683 (patch) | |
tree | 77145daed952f12b8c9a50d5803e31aaf8170ea4 /utils | |
parent | 113e71899b2d9d8737e9480165e41624e2a96a25 (diff) | |
parent | 61a1c37fb1357b85286c9aca6790042f8ca38f5f (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'utils')
-rw-r--r-- | utils/corpus_tools.cc | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc index a0542b6e..d17785af 100644 --- a/utils/corpus_tools.cc +++ b/utils/corpus_tools.cc @@ -33,10 +33,12 @@ void CorpusTools::ReadFromFile(const string& filename, while(getline(in, line)) { const bool skip = (lc % size != rank); ++lc; - if (skip) continue; TD::ConvertSentence(line, &tmp); - src->push_back(vector<WordID>()); - vector<WordID>* d = &src->back(); + vector<WordID>* d = NULL; + if (!skip) { + src->push_back(vector<WordID>()); + d = &src->back(); + } set<WordID>* v = src_vocab; int s = 0; for (unsigned i = 0; i < tmp.size(); ++i) { @@ -44,11 +46,13 @@ void CorpusTools::ReadFromFile(const string& filename, ++s; if (s > 1) { cerr << "Unexpected format in line " << lc << ": " << line << endl; abort(); } assert(trg); - trg->push_back(vector<WordID>()); - d = &trg->back(); + if (!skip) { + trg->push_back(vector<WordID>()); + d = &trg->back(); + } v = trg_vocab; } else { - d->push_back(tmp[i]); + if (d) d->push_back(tmp[i]); if (v) v->insert(tmp[i]); } } |