diff options
| author | Patrick Simianer <p@simianer.de> | 2011-07-31 19:24:02 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2011-09-23 19:13:57 +0200 | 
| commit | b7568a8dad2720d5ea0418171e9b85229adbbcc5 (patch) | |
| tree | 06cc0f72e35e062fe0841f055f2d7f5e5626d407 /dtrain/learner.h | |
| parent | b732e625ffcf59da8440db577183110488f5c4b7 (diff) | |
bugfixing, begin refactoring
Diffstat (limited to 'dtrain/learner.h')
| -rw-r--r-- | dtrain/learner.h | 71 | 
1 files changed, 71 insertions, 0 deletions
| diff --git a/dtrain/learner.h b/dtrain/learner.h new file mode 100644 index 00000000..a953284d --- /dev/null +++ b/dtrain/learner.h @@ -0,0 +1,71 @@ +/*class Learnerx +{ +  public: +    virtual void Init(const vector<SparseVector<double> >& kbest, const Scores& scores) {}; +    virtual void Update(SparseVector<double>& lambdas); +};*/ + +class SofiaLearner //: public Learnerx FIXME +{ +  // TODO bool invert_score +  public: +  void +  Init( const size_t sid, const vector<SparseVector<double> >& kbest, /*const*/ Scores& scores ) +  { +    assert( kbest.size() == scores.size() ); +    ofstream o; +    //unlink( "/tmp/sofia_ml_training_stupid" ); +    o.open( "/tmp/sofia_ml_training_normalx", ios::trunc ); // TODO randomize, filename exists +    int fid = 0; +    map<int,int>::iterator ff; + +    for ( size_t k = 0; k < kbest.size(); ++k ) { +      map<int,double> m; +      SparseVector<double>::const_iterator it = kbest[k].begin(); +      o << scores[k].GetScore(); +      for ( ; it != kbest[k].end(); ++it) { +        ff = fmap.find( it->first ); +        if ( ff == fmap.end() ) { +          fmap.insert( pair<int,int>(it->first, fid) ); +          fmap1.insert( pair<int,int>(fid, it->first) ); +          fid++; +        } +        m.insert(pair<int,double>(fmap[it->first], it->second)); +      } +      map<int,double>::iterator ti = m.begin(); +      for ( ; ti != m.end(); ++ti ) { +        o << " " << ti->first << ":" << ti->second; +      } +      o << endl; +    } +    o.close(); +  } + +  void +  Update(SparseVector<double>& lambdas) +  { +    string call = "./sofia-ml --training_file /tmp/sofia_ml_training_normalx --model_out /tmp/sofia_ml_model_normalx --loop_type stochastic --lambda 100 --dimensionality "; +    std::stringstream out; +    out << fmap.size(); +    call += out.str(); +    call += " &>/dev/null"; +    system ( call.c_str() ); +    ifstream i; +    //unlink( "/tmp/sofia_ml_model_stupid" ); +    i.open( "/tmp/sofia_ml_model_normalx", ios::in ); +    string model; +    getline( i, model ); +    vector<string> strs; +    boost::split( strs, model, boost::is_any_of(" ") ); +    int j = 0; +    for ( vector<string>::iterator it = strs.begin(); it != strs.end(); ++it ) { +      lambdas.set_value(fmap1[j], atof( it->c_str() ) ); +      j++; +    } +  } + +  private: +    map<int,int> fmap; +    map<int,int> fmap1; +}; + | 
