summaryrefslogtreecommitdiff
path: root/dtrain/dcommon.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-07-31 19:24:02 +0200
committerPatrick Simianer <p@simianer.de>2011-09-23 19:13:57 +0200
commitb7568a8dad2720d5ea0418171e9b85229adbbcc5 (patch)
tree06cc0f72e35e062fe0841f055f2d7f5e5626d407 /dtrain/dcommon.h
parentb732e625ffcf59da8440db577183110488f5c4b7 (diff)
bugfixing, begin refactoring
Diffstat (limited to 'dtrain/dcommon.h')
-rw-r--r--dtrain/dcommon.h71
1 files changed, 2 insertions, 69 deletions
diff --git a/dtrain/dcommon.h b/dtrain/dcommon.h
index ff796642..6df841bb 100644
--- a/dtrain/dcommon.h
+++ b/dtrain/dcommon.h
@@ -30,6 +30,8 @@ using namespace std;
namespace po = boost::program_options;
+
+
struct ScorePair
{
ScorePair(double modelscore, double score) : modelscore_(modelscore), score_(score) {}
@@ -139,72 +141,7 @@ struct NgramCounts
};
-/*class Learnerx
-{
- public:
- virtual void Init(const vector<SparseVector<double> >& kbest, const Scores& scores) {};
- virtual void Update(SparseVector<double>& lambdas);
-};*/
-
-class SofiaLearner //: public Learnerx FIXME
-{
- // TODO bool invert_score
- public:
- void
- Init( const size_t sid, const vector<SparseVector<double> >& kbest, /*const*/ Scores& scores )
- {
- assert( kbest.size() == scores.size() );
- ofstream o;
- unlink( "/tmo/sofia_ml_training" );
- o.open( "/tmp/sofia_ml_training", ios::trunc ); // TODO randomize, filename exists
- int fid = 0;
- map<int,int>::iterator ff;
- for ( size_t k = 0; k < kbest.size(); ++k ) {
- SparseVector<double>::const_iterator it = kbest[k].begin();
- o << scores[k].GetScore();
- for ( ; it != kbest[k].end(); ++it) {
- ff = fmap.find( it->first );
- if ( ff == fmap.end() ) {
- fmap.insert( pair<int,int>(it->first, fid) );
- fmap1.insert( pair<int,int>(fid, it->first) );
- fid++;
- }
- o << " "<< fmap[it->first] << ":" << it->second;
- }
- o << endl;
- }
- o.close();
- }
-
- void
- Update(SparseVector<double>& lambdas)
- {
- string call = "./sofia-ml --training_file /tmp/sofia_ml_training --model_out /tmp/sofia_ml_model --loop_type stochastic --lambda 100 --dimensionality ";
- std::stringstream out;
- out << fmap.size();
- call += out.str();
- call += " &>/dev/null";
- system ( call.c_str() );
- ifstream i;
- unlink( "/tmo/sofia_ml_model" );
- i.open( "/tmp/sofia_ml_model", ios::in );
- string model;
- getline( i, model );
- //cout << model << endl;
- vector<string> strs;
- boost::split( strs, model, boost::is_any_of(" ") );
- int j = 0;
- for ( vector<string>::iterator it = strs.begin(); it != strs.end(); ++it ) {
- lambdas.set_value(fmap1[j], atof( it->c_str() ) );
- j++;
- }
-
- }
- private:
- map<int,int> fmap;
- map<int,int> fmap1;
-};
typedef map<vector<WordID>, size_t> Ngrams;
Ngrams make_ngrams( vector<WordID>& s, size_t N );
@@ -215,10 +152,6 @@ double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_
double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector<float> weights = vector<float>() );
double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector<float> weights = vector<float>() );
void register_and_convert(const vector<string>& strs, vector<WordID>& ids);
-
-
-
-
void print_FD();
void run_tests();
void test_SetWeights();