summaryrefslogtreecommitdiff
path: root/utils/corpus_tools.h
blob: f6699d87e6a86919046b62603284d5392fe140fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#ifndef _CORPUS_TOOLS_H_
#define _CORPUS_TOOLS_H_

#include <string>
#include <set>
#include <vector>
#include "wordid.h"

struct CorpusTools {
  static void ReadLine(const std::string& line,
                       std::vector<WordID>* src,
                       std::vector<WordID>* trg);

  static void ReadFromFile(const std::string& filename,
                           std::vector<std::vector<WordID> >* src,
                           std::set<WordID>* src_vocab = NULL,
                           std::vector<std::vector<WordID> >* trg = NULL,
                           std::set<WordID>* trg_vocab = NULL,
                           int rank = 0,
                           int size = 1);
};

#endif