1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
#ifndef _DTRAIN_LEARNER_H_
#define _DTRAIN_LEARNER_H_
#include <string>
#include <vector>
#include <map>
#include "sparse_vector.h"
#include "score.h"
namespace dtrain
{
class Updater
{
public:
virtual void Init( const vector<SparseVector<double> >& kbest, const Scores& scores,
const bool invert_score = false ) {};
virtual void Update( SparseVector<double>& lambdas ) {};
};
class SofiaUpdater : public Updater
{
public:
void
Init( const size_t sid, const vector<SparseVector<double> >& kbest, /*const FIXME operator[]*/ Scores& scores,
const bool invert_score = false )
{
assert( kbest.size() == scores.size() );
ofstream o;
char tmp[] = DTRAIN_TMP_DIR"/dtrain-sofia-data-XXXXXX";
mkstemp( tmp );
tmp_data_fn = tmp;
o.open( tmp_data_fn.c_str(), ios::trunc );
int fid = 0;
map<int,int>::iterator ff;
double score;
for ( size_t k = 0; k < kbest.size(); ++k ) {
map<int,double> m;
SparseVector<double>::const_iterator it = kbest[k].begin();
score = scores[k].GetScore();
if ( invert_score ) score = -score;
o << score;
for ( ; it != kbest[k].end(); ++it ) {
ff = fmap.find( it->first );
if ( ff == fmap.end() ) {
fmap.insert( pair<int,int>(it->first, fid) );
fmap1.insert( pair<int,int>(fid, it->first) );
fid++;
}
m.insert( pair<int,double>(fmap[it->first], it->second) );
}
map<int,double>::iterator ti = m.begin();
for ( ; ti != m.end(); ++ti ) {
o << " " << ti->first << ":" << ti->second;
}
o << endl;
}
o.close();
}
void
Update(SparseVector<double>& lambdas)
{
char tmp[] = DTRAIN_TMP_DIR"/dtrain-sofia-model-XXXXXX";
mkstemp(tmp);
tmp_model_fn = tmp;
//--random_seed 123456789010
string call = "./sofia-ml --training_file " + tmp_data_fn;
call += " --model_out " + tmp_model_fn;
call += " --loop_type rank --lambda 100 --eta_type constant --dimensionality ";
std::stringstream out;
out << fmap.size();
call += out.str();
call += " &>/dev/null";
system ( call.c_str() );
ifstream i;
i.open( tmp_model_fn.c_str(), ios::in );
string model;
getline( i, model );
vector<string> strs;
boost::split( strs, model, boost::is_any_of(" ") );
int j = 0;
for ( vector<string>::iterator it = strs.begin(); it != strs.end(); ++it ) {
lambdas.set_value(fmap1[j], atof( it->c_str() ) );
j++;
}
i.close();
unlink( tmp_data_fn.c_str() );
unlink( tmp_model_fn.c_str() );
}
private:
string tmp_data_fn;
string tmp_model_fn;
map<int,int> fmap;
map<int,int> fmap1;
};
} // namespace
#endif
|