diff options
| author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 | 
|---|---|---|
| committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 | 
| commit | ac586bc9b156b4ae687cd5961ba1fe7b20ec57d6 (patch) | |
| tree | 052473b46d7fa18d51f897cdb9e7c93a7186dafd /gi/pf/corpus.cc | |
| parent | 97b85c082b3e55c28a8b0c0eb762483ac84a1577 (diff) | |
| parent | ad6d4a1b2519896f2b16a282699ce4e64041fab8 (diff) | |
Merge remote branch 'upstream/master'
Conflicts:
	Jamroot
	bjam
	decoder/Jamfile
	decoder/cdec.cc
	dpmert/Jamfile
	jam-files/sanity.jam
	klm/lm/Jamfile
	klm/util/Jamfile
	mira/Jamfile
Diffstat (limited to 'gi/pf/corpus.cc')
| -rw-r--r-- | gi/pf/corpus.cc | 62 | 
1 files changed, 0 insertions, 62 deletions
| diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc deleted file mode 100644 index cb6e4ed7..00000000 --- a/gi/pf/corpus.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "corpus.h" - -#include <set> -#include <vector> -#include <string> - -#include "tdict.h" -#include "filelib.h" - -using namespace std; - -namespace corpus { - -void ReadParallelCorpus(const string& filename, -                vector<vector<WordID> >* f, -                vector<vector<WordID> >* e, -                set<WordID>* vocab_f, -                set<WordID>* vocab_e) { -  f->clear(); -  e->clear(); -  vocab_f->clear(); -  vocab_e->clear(); -  ReadFile rf(filename); -  istream* in = rf.stream(); -  assert(*in); -  string line; -  unsigned lc = 0; -  const WordID kDIV = TD::Convert("|||"); -  vector<WordID> tmp; -  while(getline(*in, line)) { -    ++lc; -    e->push_back(vector<int>()); -    f->push_back(vector<int>()); -    vector<int>& le = e->back(); -    vector<int>& lf = f->back(); -    tmp.clear(); -    TD::ConvertSentence(line, &tmp); -    bool isf = true; -    for (unsigned i = 0; i < tmp.size(); ++i) { -      const int cur = tmp[i]; -      if (isf) { -        if (kDIV == cur) { -          isf = false; -        } else { -          lf.push_back(cur); -          vocab_f->insert(cur); -        } -      } else { -        if (cur == kDIV) { -          cerr << "ERROR in " << lc << ": " << line << endl << endl; -          abort(); -        } -        le.push_back(cur); -        vocab_e->insert(cur); -      } -    } -    assert(isf == false); -  } -} - -} - | 
