From b7568a8dad2720d5ea0418171e9b85229adbbcc5 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 31 Jul 2011 19:24:02 +0200 Subject: bugfixing, begin refactoring --- dtrain/learner.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 dtrain/learner.h (limited to 'dtrain/learner.h') diff --git a/dtrain/learner.h b/dtrain/learner.h new file mode 100644 index 00000000..a953284d --- /dev/null +++ b/dtrain/learner.h @@ -0,0 +1,71 @@ +/*class Learnerx +{ + public: + virtual void Init(const vector >& kbest, const Scores& scores) {}; + virtual void Update(SparseVector& lambdas); +};*/ + +class SofiaLearner //: public Learnerx FIXME +{ + // TODO bool invert_score + public: + void + Init( const size_t sid, const vector >& kbest, /*const*/ Scores& scores ) + { + assert( kbest.size() == scores.size() ); + ofstream o; + //unlink( "/tmp/sofia_ml_training_stupid" ); + o.open( "/tmp/sofia_ml_training_normalx", ios::trunc ); // TODO randomize, filename exists + int fid = 0; + map::iterator ff; + + for ( size_t k = 0; k < kbest.size(); ++k ) { + map m; + SparseVector::const_iterator it = kbest[k].begin(); + o << scores[k].GetScore(); + for ( ; it != kbest[k].end(); ++it) { + ff = fmap.find( it->first ); + if ( ff == fmap.end() ) { + fmap.insert( pair(it->first, fid) ); + fmap1.insert( pair(fid, it->first) ); + fid++; + } + m.insert(pair(fmap[it->first], it->second)); + } + map::iterator ti = m.begin(); + for ( ; ti != m.end(); ++ti ) { + o << " " << ti->first << ":" << ti->second; + } + o << endl; + } + o.close(); + } + + void + Update(SparseVector& lambdas) + { + string call = "./sofia-ml --training_file /tmp/sofia_ml_training_normalx --model_out /tmp/sofia_ml_model_normalx --loop_type stochastic --lambda 100 --dimensionality "; + std::stringstream out; + out << fmap.size(); + call += out.str(); + call += " &>/dev/null"; + system ( call.c_str() ); + ifstream i; + //unlink( "/tmp/sofia_ml_model_stupid" ); + i.open( "/tmp/sofia_ml_model_normalx", ios::in ); + string model; + getline( i, model ); + vector strs; + boost::split( strs, model, boost::is_any_of(" ") ); + int j = 0; + for ( vector::iterator it = strs.begin(); it != strs.end(); ++it ) { + lambdas.set_value(fmap1[j], atof( it->c_str() ) ); + j++; + } + } + + private: + map fmap; + map fmap1; +}; + -- cgit v1.2.3