#include "common.h" #include "kbestget.h" #include "util.h" /* * init * */ bool init(int argc, char** argv, po::variables_map* conf) { int N; bool q; po::options_description opts( "Options" ); opts.add_options() ( "decoder-config,c", po::value(), "configuration file for cdec" ) ( "weights,w", po::value(), "weights file" ) ( "ngrams,n", po::value(&N)->default_value(DTRAIN_DEFAULT_N), "N for Ngrams (default 5)" ) ( "quiet,q", po::value(&q)->default_value(true), "do not output translations" ); po::options_description cmdline_options; cmdline_options.add(opts); po::store( parse_command_line(argc, argv, cmdline_options), *conf ); po::notify( *conf ); if ( ! (conf->count("decoder-config") || conf->count("weights")) ) { cerr << cmdline_options << endl; return false; } return true; } /* * main * */ int main(int argc, char** argv) { SetSilent(true); po::variables_map conf; if (!init(argc, argv, &conf)) return 1; register_feature_functions(); size_t k = 1; ReadFile ini_rf(conf["decoder-config"].as()); Decoder decoder(ini_rf.stream()); KBestGetter observer(k); size_t N = conf["ngrams"].as(); bool quiet = conf["quiet"].as(); Weights weights; weights.InitFromFile(conf["weights"].as()); vector w; weights.InitVector(&w); decoder.SetWeights(w); vector strs, ref_strs; vector ref_ids; string in, psg; size_t sn = 0; double overall = 0.0; double overall1 = 0.0; double overall2 = 0.0; cerr << "(A dot represents " << DTRAIN_DOTOUT << " lines of input.)" << endl; while( getline(cin, in) ) { if ( (sn+1) % DTRAIN_DOTOUT == 0 ) { cerr << "."; if ( (sn+1) % (20*DTRAIN_DOTOUT) == 0 ) cerr << " " << sn+1 << endl; } //if ( sn > 5000 ) break; strs.clear(); boost::split( strs, in, boost::is_any_of("\t") ); // grammar psg = boost::replace_all_copy( strs[2], " __NEXT_RULE__ ", "\n" ); psg += "\n"; decoder.SetSentenceGrammar( psg ); decoder.Decode( strs[0], &observer ); KBestList* kb = observer.GetKBest(); // reference ref_strs.clear(); ref_ids.clear(); boost::split( ref_strs, strs[1], boost::is_any_of(" ") ); register_and_convert( ref_strs, ref_ids ); // scoring kbest double score = 0.0; double score1 = 0.0; double score2 = 0.0; NgramCounts counts = make_ngram_counts( ref_ids, kb->sents[0], 4 ); score = smooth_bleu( counts, ref_ids.size(), kb->sents[0].size(), N ); score1 = stupid_bleu( counts, ref_ids.size(), kb->sents[0].size(), N ); score2 = bleu( counts, ref_ids.size(), kb->sents[0].size(), N ); if ( ! quiet ) cout << TD::GetString( kb->sents[0] ) << endl; overall += score; overall1 += score1; overall2 += score2; sn += 1; } cerr << "Average score (smooth) : " << overall/(double)(sn+1) << endl; cerr << "Average score (stupid) : " << overall1/(double)(sn+1) << endl; cerr << "Average score (vanilla): " << overall2/(double)(sn+1) << endl; cerr << endl; return 0; }