summaryrefslogtreecommitdiff
path: root/utils/corpus_tools.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-06-18 20:28:42 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-06-18 20:28:42 -0400
commit67456f9f7af754750faeea6f1e66b14b910d8751 (patch)
treed4c647f455e0a2b9fe102843fd0a060264867c44 /utils/corpus_tools.cc
parentc3fddf01ebfa8f523ab2d6bb2db5e2be1a929ee2 (diff)
add non-const iterators to sparse vector, speed up model1 code
Diffstat (limited to 'utils/corpus_tools.cc')
-rw-r--r--utils/corpus_tools.cc20
1 files changed, 20 insertions, 0 deletions
diff --git a/utils/corpus_tools.cc b/utils/corpus_tools.cc
index d17785af..191153a2 100644
--- a/utils/corpus_tools.cc
+++ b/utils/corpus_tools.cc
@@ -8,6 +8,26 @@
using namespace std;
+void CorpusTools::ReadLine(const string& line,
+ vector<WordID>* src,
+ vector<WordID>* trg) {
+ static const WordID kDIV = TD::Convert("|||");
+ static vector<WordID> tmp;
+ src->clear();
+ trg->clear();
+ TD::ConvertSentence(line, &tmp);
+ unsigned i = 0;
+ while(i < tmp.size() && tmp[i] != kDIV) {
+ src->push_back(tmp[i]);
+ ++i;
+ }
+ if (i < tmp.size() && tmp[i] == kDIV) {
+ ++i;
+ for (; i < tmp.size() ; ++i)
+ trg->push_back(tmp[i]);
+ }
+}
+
void CorpusTools::ReadFromFile(const string& filename,
vector<vector<WordID> >* src,
set<WordID>* src_vocab,