diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2011-09-11 13:17:33 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-23 19:13:59 +0200 |
commit | 4433886ac335e6db7ded081b5ef673490ee27718 (patch) | |
tree | e03158fc89fc50c1ede07d67e3849d7c7a02b3ef /dtrain/dtrain.cc | |
parent | edb0cc0cbae1e75e4aeedb6360eab325effe6573 (diff) |
latest version from mtm6
Diffstat (limited to 'dtrain/dtrain.cc')
-rw-r--r-- | dtrain/dtrain.cc | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 35996d6d..f005008e 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -13,6 +13,8 @@ //#include <boost/iostreams/filter/bzip2.hpp> using namespace boost::iostreams; +#include <boost/algorithm/string/predicate.hpp> +#include <boost/lexical_cast.hpp> #ifdef DTRAIN_DEBUG #include "tests.h" @@ -311,7 +313,7 @@ main( int argc, char** argv ) } if ( broken_grammar ) continue; grammar_str = boost::replace_all_copy( in_split[3], " __NEXT__RULE__ ", "\n" ) + "\n"; // FIXME copy, __ - grammar_buf << grammar_str << DTRAIN_GRAMMAR_DELIM << endl; + grammar_buf << grammar_str << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; decoder.SetSentenceGrammarFromString( grammar_str ); // decode, kbest src_str_buf.push_back( in_split[1] ); @@ -323,7 +325,8 @@ main( int argc, char** argv ) while ( true ) { string g; getline( grammar_buf_in, g ); - if ( g == DTRAIN_GRAMMAR_DELIM ) break; + //if ( g == DTRAIN_GRAMMAR_DELIM ) break; + if (boost::starts_with(g, DTRAIN_GRAMMAR_DELIM)) break; grammar_str += g+"\n"; i += 1; } @@ -430,6 +433,7 @@ main( int argc, char** argv ) } ++sid; + cerr << "reporter:counter:dtrain,sent," << sid << endl; } // input loop @@ -446,6 +450,7 @@ main( int argc, char** argv ) avg_1best_score_diff = avg_1best_score; avg_1best_model_diff = avg_1best_model; } + if ( !quiet ) { cout << _prec5 << _pos << "WEIGHTS" << endl; for (vector<string>::iterator it = wprint.begin(); it != wprint.end(); it++) { cout << setw(16) << *it << " = " << dense_weights[FD::Convert( *it )] << endl; @@ -456,6 +461,7 @@ main( int argc, char** argv ) cout << _pos << " (" << avg_1best_score_diff << ")" << endl; cout << _nopos << "avg model score: " << avg_1best_model; cout << _pos << " (" << avg_1best_model_diff << ")" << endl; + } vector<double> remember_scores; remember_scores.push_back( avg_1best_score ); remember_scores.push_back( avg_1best_model ); @@ -478,7 +484,7 @@ main( int argc, char** argv ) cout << time_dif/(double)in_sz<< " s/S)" << endl; } - if ( t+1 != T ) cout << endl; + if ( t+1 != T && !quiet ) cout << endl; if ( noup ) break; @@ -486,8 +492,21 @@ main( int argc, char** argv ) unlink( grammar_buf_tmp_fn ); if ( !noup ) { + // TODO BEST ITER if ( !quiet ) cout << endl << "writing weights file '" << cfg["output"].as<string>() << "' ..."; - weights.WriteToFile( cfg["output"].as<string>(), true ); + if ( cfg["output"].as<string>() == "-" ) { + for ( SparseVector<double>::const_iterator ti = lambdas.begin(); + ti != lambdas.end(); ++ti ) { + if ( ti->second == 0 ) continue; + //if ( ti->first == "__bias" ) continue; + cout << setprecision(9); + cout << _nopos << FD::Convert(ti->first) << "\t" << ti->second << endl; + //cout << "__SHARD_COUNT__\t1" << endl; + } + } else { + weights.InitFromVector( lambdas ); + weights.WriteToFile( cfg["output"].as<string>(), true ); + } if ( !quiet ) cout << "done" << endl; } |