diff options
author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
commit | ef6085e558e26c8819f1735425761103021b6470 (patch) | |
tree | 5cf70e4c48c64d838e1326b5a505c8c4061bff4a /gi/pf/corpus.cc | |
parent | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff) | |
parent | dfbc278c1057555fda9312291c8024049e00b7d8 (diff) |
merge with upstream
Diffstat (limited to 'gi/pf/corpus.cc')
-rw-r--r-- | gi/pf/corpus.cc | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc index a408e7cf..cb6e4ed7 100644 --- a/gi/pf/corpus.cc +++ b/gi/pf/corpus.cc @@ -24,11 +24,11 @@ void ReadParallelCorpus(const string& filename, istream* in = rf.stream(); assert(*in); string line; + unsigned lc = 0; const WordID kDIV = TD::Convert("|||"); vector<WordID> tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; + while(getline(*in, line)) { + ++lc; e->push_back(vector<int>()); f->push_back(vector<int>()); vector<int>& le = e->back(); @@ -39,12 +39,17 @@ void ReadParallelCorpus(const string& filename, for (unsigned i = 0; i < tmp.size(); ++i) { const int cur = tmp[i]; if (isf) { - if (kDIV == cur) { isf = false; } else { + if (kDIV == cur) { + isf = false; + } else { lf.push_back(cur); vocab_f->insert(cur); } } else { - assert(cur != kDIV); + if (cur == kDIV) { + cerr << "ERROR in " << lc << ": " << line << endl << endl; + abort(); + } le.push_back(cur); vocab_e->insert(cur); } |