summaryrefslogtreecommitdiff
path: root/decoder/sentences.h
blob: 54b5ffb3a9cfdef4872bc78cb97e7bdb7ea49063 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#ifndef CDEC_SENTENCES_H
#define CDEC_SENTENCES_H

#include <algorithm>
#include <vector>
#include <iostream>
#include "filelib.h"
#include "tdict.h"
#include "stringlib.h"
#include <cstring>
typedef std::vector<WordID> Sentence;

// these "iterators" are invalidated if s is modified.  note: this is allowed by std.
inline WordID const* begin(Sentence const& s) {
  return &*s.begin();
}
inline WordID const* end(Sentence const& s) {
  return &*s.end();
}
inline WordID * begin(Sentence & s) {
  return &*s.begin();
}
inline WordID * end(Sentence & s) {
  return &*s.end();
}
inline void wordcpy(WordID *dest,WordID const* src,int n) {
  std::memcpy(dest,src,n*sizeof(*dest));
}
inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) {
  wordcpy(dest,src,src_end-src);
}
inline WordID *wordcpy_reverse(WordID *dest,WordID const* src,WordID const* src_end) {
  for(WordID const* i=src_end;i>src;)
    *dest++=*--i;
  return dest;
}
inline Sentence singleton_sentence(WordID t) {
  return Sentence(1,t);
}

inline Sentence singleton_sentence(std::string const& s) {
  return singleton_sentence(TD::Convert(s));
}


inline std::ostream & operator<<(std::ostream &out,Sentence const& s) {
  return out<<TD::GetString(s);
}

inline void StringToSentence(std::string const& str,Sentence &s) {
  using namespace std;
  s.clear();
  TD::ConvertSentence(str,&s);
/*  vector<string> ss=SplitOnWhitespace(str);
  transform(ss.begin(),ss.end(),back_inserter(s),ToTD());
*/

}

inline Sentence StringToSentence(std::string const& str) {
  Sentence s;
  StringToSentence(str,s);
  return s;
}

inline std::istream& operator >> (std::istream &in,Sentence &s) {
  using namespace std;
  string str;
  if (getline(in,str)) {
    StringToSentence(str,s);
  }
  return in;
}


class Sentences : public std::vector<Sentence> {
  typedef std::vector<Sentence> VS;
public:
  Sentences() {  }
  Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) {  }
  Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) {  }
  std::string filename;
  void Load(std::string file) {
    ReadFile r(file);
    Load(r.get(),file);
  }
  void Load(std::istream &in,std::string filen="-") {
    filename=filen;
    do {
      this->push_back(Sentence());
    } while(in>>this->back());
    this->pop_back();
  }
  void Print(std::ostream &out,int headn=0) const {
    out << "[" << size()<< " sentences from "<<filename<<"]";
    if (headn!=0) {
      int i=0,e=this->size();
      if (headn>0&&headn<e) {
        e=headn;
        out << " (first "<<headn<<")";
      }
      out << " :\n";
      for (;i<e;++i)
        out<<(*this)[i] << "\n";
    }
  }
  friend inline std::ostream& operator<<(std::ostream &out,Sentences const& s) {
    s.Print(out);
    return out;
  }
};


#endif