diff options
author | Patrick Simianer <p@simianer.de> | 2011-07-31 19:24:02 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-23 19:13:57 +0200 |
commit | b7568a8dad2720d5ea0418171e9b85229adbbcc5 (patch) | |
tree | 06cc0f72e35e062fe0841f055f2d7f5e5626d407 /dtrain/dcommon.h | |
parent | b732e625ffcf59da8440db577183110488f5c4b7 (diff) |
bugfixing, begin refactoring
Diffstat (limited to 'dtrain/dcommon.h')
-rw-r--r-- | dtrain/dcommon.h | 71 |
1 files changed, 2 insertions, 69 deletions
diff --git a/dtrain/dcommon.h b/dtrain/dcommon.h index ff796642..6df841bb 100644 --- a/dtrain/dcommon.h +++ b/dtrain/dcommon.h @@ -30,6 +30,8 @@ using namespace std; namespace po = boost::program_options; + + struct ScorePair { ScorePair(double modelscore, double score) : modelscore_(modelscore), score_(score) {} @@ -139,72 +141,7 @@ struct NgramCounts }; -/*class Learnerx -{ - public: - virtual void Init(const vector<SparseVector<double> >& kbest, const Scores& scores) {}; - virtual void Update(SparseVector<double>& lambdas); -};*/ - -class SofiaLearner //: public Learnerx FIXME -{ - // TODO bool invert_score - public: - void - Init( const size_t sid, const vector<SparseVector<double> >& kbest, /*const*/ Scores& scores ) - { - assert( kbest.size() == scores.size() ); - ofstream o; - unlink( "/tmo/sofia_ml_training" ); - o.open( "/tmp/sofia_ml_training", ios::trunc ); // TODO randomize, filename exists - int fid = 0; - map<int,int>::iterator ff; - for ( size_t k = 0; k < kbest.size(); ++k ) { - SparseVector<double>::const_iterator it = kbest[k].begin(); - o << scores[k].GetScore(); - for ( ; it != kbest[k].end(); ++it) { - ff = fmap.find( it->first ); - if ( ff == fmap.end() ) { - fmap.insert( pair<int,int>(it->first, fid) ); - fmap1.insert( pair<int,int>(fid, it->first) ); - fid++; - } - o << " "<< fmap[it->first] << ":" << it->second; - } - o << endl; - } - o.close(); - } - - void - Update(SparseVector<double>& lambdas) - { - string call = "./sofia-ml --training_file /tmp/sofia_ml_training --model_out /tmp/sofia_ml_model --loop_type stochastic --lambda 100 --dimensionality "; - std::stringstream out; - out << fmap.size(); - call += out.str(); - call += " &>/dev/null"; - system ( call.c_str() ); - ifstream i; - unlink( "/tmo/sofia_ml_model" ); - i.open( "/tmp/sofia_ml_model", ios::in ); - string model; - getline( i, model ); - //cout << model << endl; - vector<string> strs; - boost::split( strs, model, boost::is_any_of(" ") ); - int j = 0; - for ( vector<string>::iterator it = strs.begin(); it != strs.end(); ++it ) { - lambdas.set_value(fmap1[j], atof( it->c_str() ) ); - j++; - } - - } - private: - map<int,int> fmap; - map<int,int> fmap1; -}; typedef map<vector<WordID>, size_t> Ngrams; Ngrams make_ngrams( vector<WordID>& s, size_t N ); @@ -215,10 +152,6 @@ double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_ double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector<float> weights = vector<float>() ); double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector<float> weights = vector<float>() ); void register_and_convert(const vector<string>& strs, vector<WordID>& ids); - - - - void print_FD(); void run_tests(); void test_SetWeights(); |