From b7568a8dad2720d5ea0418171e9b85229adbbcc5 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 31 Jul 2011 19:24:02 +0200 Subject: bugfixing, begin refactoring --- dtrain/dcommon.h | 71 ++------------------------------------------------------ 1 file changed, 2 insertions(+), 69 deletions(-) (limited to 'dtrain/dcommon.h') diff --git a/dtrain/dcommon.h b/dtrain/dcommon.h index ff796642..6df841bb 100644 --- a/dtrain/dcommon.h +++ b/dtrain/dcommon.h @@ -30,6 +30,8 @@ using namespace std; namespace po = boost::program_options; + + struct ScorePair { ScorePair(double modelscore, double score) : modelscore_(modelscore), score_(score) {} @@ -139,72 +141,7 @@ struct NgramCounts }; -/*class Learnerx -{ - public: - virtual void Init(const vector >& kbest, const Scores& scores) {}; - virtual void Update(SparseVector& lambdas); -};*/ - -class SofiaLearner //: public Learnerx FIXME -{ - // TODO bool invert_score - public: - void - Init( const size_t sid, const vector >& kbest, /*const*/ Scores& scores ) - { - assert( kbest.size() == scores.size() ); - ofstream o; - unlink( "/tmo/sofia_ml_training" ); - o.open( "/tmp/sofia_ml_training", ios::trunc ); // TODO randomize, filename exists - int fid = 0; - map::iterator ff; - for ( size_t k = 0; k < kbest.size(); ++k ) { - SparseVector::const_iterator it = kbest[k].begin(); - o << scores[k].GetScore(); - for ( ; it != kbest[k].end(); ++it) { - ff = fmap.find( it->first ); - if ( ff == fmap.end() ) { - fmap.insert( pair(it->first, fid) ); - fmap1.insert( pair(fid, it->first) ); - fid++; - } - o << " "<< fmap[it->first] << ":" << it->second; - } - o << endl; - } - o.close(); - } - - void - Update(SparseVector& lambdas) - { - string call = "./sofia-ml --training_file /tmp/sofia_ml_training --model_out /tmp/sofia_ml_model --loop_type stochastic --lambda 100 --dimensionality "; - std::stringstream out; - out << fmap.size(); - call += out.str(); - call += " &>/dev/null"; - system ( call.c_str() ); - ifstream i; - unlink( "/tmo/sofia_ml_model" ); - i.open( "/tmp/sofia_ml_model", ios::in ); - string model; - getline( i, model ); - //cout << model << endl; - vector strs; - boost::split( strs, model, boost::is_any_of(" ") ); - int j = 0; - for ( vector::iterator it = strs.begin(); it != strs.end(); ++it ) { - lambdas.set_value(fmap1[j], atof( it->c_str() ) ); - j++; - } - - } - private: - map fmap; - map fmap1; -}; typedef map, size_t> Ngrams; Ngrams make_ngrams( vector& s, size_t N ); @@ -215,10 +152,6 @@ double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_ double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector weights = vector() ); double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector weights = vector() ); void register_and_convert(const vector& strs, vector& ids); - - - - void print_FD(); void run_tests(); void test_SetWeights(); -- cgit v1.2.3