diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-18 20:28:42 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-18 20:28:42 -0400 |
commit | 67456f9f7af754750faeea6f1e66b14b910d8751 (patch) | |
tree | d4c647f455e0a2b9fe102843fd0a060264867c44 /utils/corpus_tools.cc | |
parent | c3fddf01ebfa8f523ab2d6bb2db5e2be1a929ee2 (diff) |
add non-const iterators to sparse vector, speed up model1 code
Diffstat (limited to 'utils/corpus_tools.cc')
-rw-r--r-- | utils/corpus_tools.cc | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc index d17785af..191153a2 100644 --- a/utils/corpus_tools.cc +++ b/utils/corpus_tools.cc @@ -8,6 +8,26 @@ using namespace std; +void CorpusTools::ReadLine(const string& line, + vector<WordID>* src, + vector<WordID>* trg) { + static const WordID kDIV = TD::Convert("|||"); + static vector<WordID> tmp; + src->clear(); + trg->clear(); + TD::ConvertSentence(line, &tmp); + unsigned i = 0; + while(i < tmp.size() && tmp[i] != kDIV) { + src->push_back(tmp[i]); + ++i; + } + if (i < tmp.size() && tmp[i] == kDIV) { + ++i; + for (; i < tmp.size() ; ++i) + trg->push_back(tmp[i]); + } +} + void CorpusTools::ReadFromFile(const string& filename, vector<vector<WordID> >* src, set<WordID>* src_vocab, |