diff options
| author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-18 20:28:42 -0400 | 
|---|---|---|
| committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-18 20:28:42 -0400 | 
| commit | 67456f9f7af754750faeea6f1e66b14b910d8751 (patch) | |
| tree | d4c647f455e0a2b9fe102843fd0a060264867c44 /utils/corpus_tools.cc | |
| parent | c3fddf01ebfa8f523ab2d6bb2db5e2be1a929ee2 (diff) | |
add non-const iterators to sparse vector, speed up model1 code
Diffstat (limited to 'utils/corpus_tools.cc')
| -rw-r--r-- | utils/corpus_tools.cc | 20 | 
1 files changed, 20 insertions, 0 deletions
| diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc index d17785af..191153a2 100644 --- a/utils/corpus_tools.cc +++ b/utils/corpus_tools.cc @@ -8,6 +8,26 @@  using namespace std; +void CorpusTools::ReadLine(const string& line, +                           vector<WordID>* src, +                           vector<WordID>* trg) { +  static const WordID kDIV = TD::Convert("|||"); +  static vector<WordID> tmp; +  src->clear(); +  trg->clear(); +  TD::ConvertSentence(line, &tmp); +  unsigned i = 0; +  while(i < tmp.size() && tmp[i] != kDIV) { +    src->push_back(tmp[i]); +    ++i; +  } +  if (i < tmp.size() && tmp[i] == kDIV) { +    ++i; +    for (; i < tmp.size() ; ++i) +      trg->push_back(tmp[i]); +  } +} +  void CorpusTools::ReadFromFile(const string& filename,                             vector<vector<WordID> >* src,                             set<WordID>* src_vocab, | 
