diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-16 01:57:08 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-16 01:57:08 +0000 |
commit | 538bc2149631e989e4806165632c5460c3514670 (patch) | |
tree | 85079e928800058790ec26abbb8a968126256c5d /decoder/sentences.h | |
parent | 1cfa8735f4cd7264e70cc6918bbd58c86a015ee4 (diff) |
oracle refactor, oracle vest directions, sparse_vector
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@280 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/sentences.h')
-rwxr-xr-x | decoder/sentences.h | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/decoder/sentences.h b/decoder/sentences.h new file mode 100755 index 00000000..842072b9 --- /dev/null +++ b/decoder/sentences.h @@ -0,0 +1,53 @@ +#ifndef CDEC_SENTENCES_H +#define CDEC_SENTENCES_H + +#include <algorithm> +#include <vector> +#include <iostream> +#include "filelib.h" +#include "tdict.h" +#include "stringlib.h" +typedef std::vector<WordID> Sentence; + +inline void StringToSentence(std::string const& str,Sentence &s) { + using namespace std; + vector<string> ss=SplitOnWhitespace(str); + s.clear(); + transform(ss.begin(),ss.end(),back_inserter(s),ToTD()); +} + +inline Sentence StringToSentence(std::string const& str) { + Sentence s; + StringToSentence(str,s); + return s; +} + +inline std::istream& operator >> (std::istream &in,Sentence &s) { + using namespace std; + string str; + if (getline(in,str)) { + StringToSentence(str,s); + } + return in; +} + + +class Sentences : public std::vector<Sentence> { + typedef std::vector<Sentence> VS; +public: + Sentences() { } + Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) { } + Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) { } + void Load(std::string file) { + ReadFile r(file); + Load(*r.stream()); + } + void Load(std::istream &in) { + this->push_back(Sentence()); + while(in>>this->back()) ; + this->pop_back(); + } +}; + + +#endif |