diff options
Diffstat (limited to 'training/dtrain/dtrain.h')
-rw-r--r-- | training/dtrain/dtrain.h | 100 |
1 files changed, 16 insertions, 84 deletions
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index e25c6f24..2b466930 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -15,7 +15,6 @@ #include "decoder.h" #include "ff_register.h" -#include "sampler.h" #include "sentence_metadata.h" #include "verbose.h" #include "viterbi.h" @@ -26,113 +25,46 @@ namespace po = boost::program_options; namespace dtrain { - -inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids) -{ - vector<string>::const_iterator it; - for (it = strs.begin(); it < strs.end(); it++) - ids.push_back(TD::Convert(*it)); -} - -inline string gettmpf(const string path, const string infix) -{ - char fn[path.size() + infix.size() + 8]; - strcpy(fn, path.c_str()); - strcat(fn, "/"); - strcat(fn, infix.c_str()); - strcat(fn, "-XXXXXX"); - if (!mkstemp(fn)) { - cerr << "Cannot make temp file in" << path << " , exiting." << endl; - exit(1); - } - return string(fn); -} - typedef double score_t; struct ScoredHyp { vector<WordID> w; - SparseVector<double> f; - score_t model; - score_t score; + SparseVector<weight_t> f; + score_t model, score; unsigned rank; }; -struct LocalScorer +inline void +RegisterAndConvert(const vector<string>& strs, vector<WordID>& ids) { - unsigned N_; - vector<score_t> w_; - - virtual score_t - Score(const vector<WordID>& hyp, const vector<vector<WordID> >& ref, const unsigned rank, const unsigned src_len)=0; - - virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer - - inline void - Init(unsigned N, vector<score_t> weights) - { - assert(N > 0); - N_ = N; - if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_); - else w_ = weights; - } - - inline score_t - brevity_penalty(const unsigned hyp_len, const unsigned ref_len) - { - if (hyp_len > ref_len) return 1; - return exp(1 - (score_t)ref_len/hyp_len); - } -}; - -struct HypSampler : public DecoderObserver -{ - LocalScorer* scorer_; - vector<vector<WordID> >* refs_; - unsigned f_count_, sz_; - virtual vector<ScoredHyp>* GetSamples()=0; - inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; } - inline void SetRef(vector<vector<WordID> >& refs) { refs_ = &refs; } - inline unsigned get_f_count() { return f_count_; } - inline unsigned get_sz() { return sz_; } -}; + vector<string>::const_iterator it; + for (auto s: strs) + ids.push_back(TD::Convert(s)); +} -struct HSReporter +inline void +PrintWordIDVec(vector<WordID>& v, ostream& os=cerr) { - string task_id_; - - HSReporter(string task_id) : task_id_(task_id) {} - - inline void update_counter(string name, unsigned amount) { - cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; - } - inline void update_gcounter(string name, unsigned amount) { - cerr << "reporter:counter:Global," << name << "," << amount << endl; + for (unsigned i = 0; i < v.size(); i++) { + os << TD::Convert(v[i]); + if (i < v.size()-1) os << " "; } -}; +} inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } inline ostream& _p2(ostream& out) { return out << setprecision(2); } inline ostream& _p5(ostream& out) { return out << setprecision(5); } -inline void printWordIDVec(vector<WordID>& v, ostream& os=cerr) -{ - for (unsigned i = 0; i < v.size(); i++) { - os << TD::Convert(v[i]); - if (i < v.size()-1) os << " "; - } -} - template<typename T> -inline T sign(T z) +inline T +sign(T z) { if (z == 0) return 0; return z < 0 ? -1 : +1; } - } // namespace #endif |