summaryrefslogtreecommitdiff
path: root/gi/pf/corpus.cc
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commitac586bc9b156b4ae687cd5961ba1fe7b20ec57d6 (patch)
tree052473b46d7fa18d51f897cdb9e7c93a7186dafd /gi/pf/corpus.cc
parent97b85c082b3e55c28a8b0c0eb762483ac84a1577 (diff)
parentad6d4a1b2519896f2b16a282699ce4e64041fab8 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/pf/corpus.cc')
-rw-r--r--gi/pf/corpus.cc62
1 files changed, 0 insertions, 62 deletions
diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc
deleted file mode 100644
index cb6e4ed7..00000000
--- a/gi/pf/corpus.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "corpus.h"
-
-#include <set>
-#include <vector>
-#include <string>
-
-#include "tdict.h"
-#include "filelib.h"
-
-using namespace std;
-
-namespace corpus {
-
-void ReadParallelCorpus(const string& filename,
- vector<vector<WordID> >* f,
- vector<vector<WordID> >* e,
- set<WordID>* vocab_f,
- set<WordID>* vocab_e) {
- f->clear();
- e->clear();
- vocab_f->clear();
- vocab_e->clear();
- ReadFile rf(filename);
- istream* in = rf.stream();
- assert(*in);
- string line;
- unsigned lc = 0;
- const WordID kDIV = TD::Convert("|||");
- vector<WordID> tmp;
- while(getline(*in, line)) {
- ++lc;
- e->push_back(vector<int>());
- f->push_back(vector<int>());
- vector<int>& le = e->back();
- vector<int>& lf = f->back();
- tmp.clear();
- TD::ConvertSentence(line, &tmp);
- bool isf = true;
- for (unsigned i = 0; i < tmp.size(); ++i) {
- const int cur = tmp[i];
- if (isf) {
- if (kDIV == cur) {
- isf = false;
- } else {
- lf.push_back(cur);
- vocab_f->insert(cur);
- }
- } else {
- if (cur == kDIV) {
- cerr << "ERROR in " << lc << ": " << line << endl << endl;
- abort();
- }
- le.push_back(cur);
- vocab_e->insert(cur);
- }
- }
- assert(isf == false);
- }
-}
-
-}
-