1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
#ifndef CDEC_SENTENCES_H
#define CDEC_SENTENCES_H
#include <algorithm>
#include <vector>
#include <iostream>
#include "filelib.h"
#include "tdict.h"
#include "stringlib.h"
#include <cstring>
typedef std::vector<WordID> Sentence;
// these "iterators" are invalidated if s is modified. note: this is allowed by std.
inline WordID const* begin(Sentence const& s) {
return &*s.begin();
}
inline WordID const* end(Sentence const& s) {
return &*s.end();
}
inline WordID * begin(Sentence & s) {
return &*s.begin();
}
inline WordID * end(Sentence & s) {
return &*s.end();
}
inline void wordcpy(WordID *dest,WordID const* src,int n) {
std::memcpy(dest,src,n*sizeof(*dest));
}
inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) {
wordcpy(dest,src,src_end-src);
}
inline WordID *wordcpy_reverse(WordID *dest,WordID const* src,WordID const* src_end) {
for(WordID const* i=src_end;i>src;)
*dest++=*--i;
return dest;
}
inline Sentence singleton_sentence(WordID t) {
return Sentence(1,t);
}
inline Sentence singleton_sentence(std::string const& s) {
return singleton_sentence(TD::Convert(s));
}
inline std::ostream & operator<<(std::ostream &out,Sentence const& s) {
return out<<TD::GetString(s);
}
inline void StringToSentence(std::string const& str,Sentence &s) {
using namespace std;
s.clear();
TD::ConvertSentence(str,&s);
/* vector<string> ss=SplitOnWhitespace(str);
transform(ss.begin(),ss.end(),back_inserter(s),ToTD());
*/
}
inline Sentence StringToSentence(std::string const& str) {
Sentence s;
StringToSentence(str,s);
return s;
}
inline std::istream& operator >> (std::istream &in,Sentence &s) {
using namespace std;
string str;
if (getline(in,str)) {
StringToSentence(str,s);
}
return in;
}
class Sentences : public std::vector<Sentence> {
typedef std::vector<Sentence> VS;
public:
Sentences() { }
Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) { }
Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) { }
std::string filename;
void Load(std::string file) {
ReadFile r(file);
Load(r.get(),file);
}
void Load(std::istream &in,std::string filen="-") {
filename=filen;
do {
this->push_back(Sentence());
} while(in>>this->back());
this->pop_back();
}
void Print(std::ostream &out,int headn=0) const {
out << "[" << size()<< " sentences from "<<filename<<"]";
if (headn!=0) {
int i=0,e=this->size();
if (headn>0&&headn<e) {
e=headn;
out << " (first "<<headn<<")";
}
out << " :\n";
for (;i<e;++i)
out<<(*this)[i] << "\n";
}
}
friend inline std::ostream& operator<<(std::ostream &out,Sentences const& s) {
s.Print(out);
return out;
}
};
#endif
|