summaryrefslogtreecommitdiff
path: root/dtrain/updater.h
blob: 6307a00569de1af557843397500ea2816985e168 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#ifndef _DTRAIN_LEARNER_H_
#define _DTRAIN_LEARNER_H_

#include <string>
#include <vector>
#include <map>

#include "sparse_vector.h"
#include "score.h"


namespace dtrain
{


class Updater
{
  public:
    virtual void Init( const vector<SparseVector<double> >& kbest, const Scores& scores,
                       const bool invert_score = false ) {};
    virtual void Update( SparseVector<double>& lambdas ) {};
};


class SofiaUpdater : public Updater
{
  public:
    void
    Init( const size_t sid, const vector<SparseVector<double> >& kbest, /*const FIXME operator[]*/ Scores& scores,
          const bool invert_score = false )
    {
      assert( kbest.size() == scores.size() );
      ofstream o;
      char tmp[] = DTRAIN_TMP_DIR"/dtrain-sofia-data-XXXXXX";
      mkstemp( tmp );
      tmp_data_fn = tmp;
      o.open( tmp_data_fn.c_str(), ios::trunc );
      int fid = 0;
      map<int,int>::iterator ff;
      double score;
      for ( size_t k = 0; k < kbest.size(); ++k ) {
        map<int,double> m;
        SparseVector<double>::const_iterator it = kbest[k].begin();
        score = scores[k].GetScore();
        if ( invert_score ) score = -score;
        o << score;
        for ( ; it != kbest[k].end(); ++it ) {
          ff = fmap.find( it->first );
          if ( ff == fmap.end() ) {
            fmap.insert( pair<int,int>(it->first, fid) );
            fmap1.insert( pair<int,int>(fid, it->first) );
            fid++;
          }
          m.insert( pair<int,double>(fmap[it->first], it->second) );
        }
        map<int,double>::iterator ti = m.begin();
        for ( ; ti != m.end(); ++ti ) {
          o << " " << ti->first << ":" << ti->second;
        }
        o << endl;
      }
      o.close();
    }

    void
    Update(SparseVector<double>& lambdas)
    {
      char tmp[] = DTRAIN_TMP_DIR"/dtrain-sofia-model-XXXXXX";
      mkstemp(tmp);
      tmp_model_fn = tmp;
      string call = "./sofia-ml --training_file " + tmp_data_fn;
      call += " --model_out " + tmp_model_fn;
      call += " --loop_type stochastic --lambda 100 --dimensionality ";
      std::stringstream out;
      out << fmap.size();
      call += out.str();
      call += " &>/dev/null";
      system ( call.c_str() );
      ifstream i;
      i.open( tmp_model_fn.c_str(), ios::in );
      string model;
      getline( i, model );
      vector<string> strs;
      boost::split( strs, model, boost::is_any_of(" ") );
      int j = 0;
      for ( vector<string>::iterator it = strs.begin(); it != strs.end(); ++it ) {
        lambdas.set_value(fmap1[j], atof( it->c_str() ) );
        j++;
      }
      i.close();
      unlink( tmp_data_fn.c_str() );
      unlink( tmp_model_fn.c_str() );
    }

  private:
    string tmp_data_fn;
    string tmp_model_fn;
    map<int,int> fmap;
    map<int,int> fmap1;
};


} // namespace

#endif