From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- training/dtrain/dtrain.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 training/dtrain/dtrain.h (limited to 'training/dtrain/dtrain.h') diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h new file mode 100644 index 00000000..4b6f415c --- /dev/null +++ b/training/dtrain/dtrain.h @@ -0,0 +1,97 @@ +#ifndef _DTRAIN_H_ +#define _DTRAIN_H_ + +#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs + // DO NOT USE WITH SVM! +//#define DTRAIN_LOCAL +#define DTRAIN_DOTS 10 // after how many inputs to display a '.' +#define DTRAIN_GRAMMAR_DELIM "########EOS########" +#define DTRAIN_SCALE 100000 + + +#include +#include +#include + +#include +#include + +#include "ksampler.h" +#include "pairsampling.h" + +#include "filelib.h" + + +using namespace std; +using namespace dtrain; +namespace po = boost::program_options; + +inline void register_and_convert(const vector& strs, vector& ids) +{ + vector::const_iterator it; + for (it = strs.begin(); it < strs.end(); it++) + ids.push_back(TD::Convert(*it)); +} + +inline string gettmpf(const string path, const string infix) +{ + char fn[path.size() + infix.size() + 8]; + strcpy(fn, path.c_str()); + strcat(fn, "/"); + strcat(fn, infix.c_str()); + strcat(fn, "-XXXXXX"); + if (!mkstemp(fn)) { + cerr << "Cannot make temp file in" << path << " , exiting." << endl; + exit(1); + } + return string(fn); +} + +inline void split_in(string& s, vector& parts) +{ + unsigned f = 0; + for(unsigned i = 0; i < 3; i++) { + unsigned e = f; + f = s.find("\t", f+1); + if (e != 0) parts.push_back(s.substr(e+1, f-e-1)); + else parts.push_back(s.substr(0, f)); + } + s.erase(0, f+1); +} + +struct HSReporter +{ + string task_id_; + + HSReporter(string task_id) : task_id_(task_id) {} + + inline void update_counter(string name, unsigned amount) { + cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; + } + inline void update_gcounter(string name, unsigned amount) { + cerr << "reporter:counter:Global," << name << "," << amount << endl; + } +}; + +inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } +inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } +inline ostream& _p2(ostream& out) { return out << setprecision(2); } +inline ostream& _p5(ostream& out) { return out << setprecision(5); } + +inline void printWordIDVec(vector& v) +{ + for (unsigned i = 0; i < v.size(); i++) { + cerr << TD::Convert(v[i]); + if (i < v.size()-1) cerr << " "; + } +} + +template +inline T sign(T z) +{ + if (z == 0) return 0; + return z < 0 ? -1 : +1; +} + +#endif + -- cgit v1.2.3