summaryrefslogtreecommitdiff
path: root/training/dtrain/dtrain.h
diff options
context:
space:
mode:
Diffstat (limited to 'training/dtrain/dtrain.h')
-rw-r--r--training/dtrain/dtrain.h100
1 files changed, 16 insertions, 84 deletions
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index e25c6f24..2b466930 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -15,7 +15,6 @@
#include "decoder.h"
#include "ff_register.h"
-#include "sampler.h"
#include "sentence_metadata.h"
#include "verbose.h"
#include "viterbi.h"
@@ -26,113 +25,46 @@ namespace po = boost::program_options;
namespace dtrain
{
-
-inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids)
-{
- vector<string>::const_iterator it;
- for (it = strs.begin(); it < strs.end(); it++)
- ids.push_back(TD::Convert(*it));
-}
-
-inline string gettmpf(const string path, const string infix)
-{
- char fn[path.size() + infix.size() + 8];
- strcpy(fn, path.c_str());
- strcat(fn, "/");
- strcat(fn, infix.c_str());
- strcat(fn, "-XXXXXX");
- if (!mkstemp(fn)) {
- cerr << "Cannot make temp file in" << path << " , exiting." << endl;
- exit(1);
- }
- return string(fn);
-}
-
typedef double score_t;
struct ScoredHyp
{
vector<WordID> w;
- SparseVector<double> f;
- score_t model;
- score_t score;
+ SparseVector<weight_t> f;
+ score_t model, score;
unsigned rank;
};
-struct LocalScorer
+inline void
+RegisterAndConvert(const vector<string>& strs, vector<WordID>& ids)
{
- unsigned N_;
- vector<score_t> w_;
-
- virtual score_t
- Score(const vector<WordID>& hyp, const vector<vector<WordID> >& ref, const unsigned rank, const unsigned src_len)=0;
-
- virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer
-
- inline void
- Init(unsigned N, vector<score_t> weights)
- {
- assert(N > 0);
- N_ = N;
- if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_);
- else w_ = weights;
- }
-
- inline score_t
- brevity_penalty(const unsigned hyp_len, const unsigned ref_len)
- {
- if (hyp_len > ref_len) return 1;
- return exp(1 - (score_t)ref_len/hyp_len);
- }
-};
-
-struct HypSampler : public DecoderObserver
-{
- LocalScorer* scorer_;
- vector<vector<WordID> >* refs_;
- unsigned f_count_, sz_;
- virtual vector<ScoredHyp>* GetSamples()=0;
- inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; }
- inline void SetRef(vector<vector<WordID> >& refs) { refs_ = &refs; }
- inline unsigned get_f_count() { return f_count_; }
- inline unsigned get_sz() { return sz_; }
-};
+ vector<string>::const_iterator it;
+ for (auto s: strs)
+ ids.push_back(TD::Convert(s));
+}
-struct HSReporter
+inline void
+PrintWordIDVec(vector<WordID>& v, ostream& os=cerr)
{
- string task_id_;
-
- HSReporter(string task_id) : task_id_(task_id) {}
-
- inline void update_counter(string name, unsigned amount) {
- cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl;
- }
- inline void update_gcounter(string name, unsigned amount) {
- cerr << "reporter:counter:Global," << name << "," << amount << endl;
+ for (unsigned i = 0; i < v.size(); i++) {
+ os << TD::Convert(v[i]);
+ if (i < v.size()-1) os << " ";
}
-};
+}
inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); }
inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); }
inline ostream& _p2(ostream& out) { return out << setprecision(2); }
inline ostream& _p5(ostream& out) { return out << setprecision(5); }
-inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr)
-{
- for (unsigned i = 0; i < v.size(); i++) {
- os << TD::Convert(v[i]);
- if (i < v.size()-1) os << " ";
- }
-}
-
template<typename T>
-inline T sign(T z)
+inline T
+sign(T z)
{
if (z == 0) return 0;
return z < 0 ? -1 : +1;
}
-
} // namespace
#endif