#ifndef _DTRAIN_H_ #define _DTRAIN_H_ #include #include #include #include #include #include #include #include "decoder.h" #include "ff_register.h" #include "sentence_metadata.h" #include "verbose.h" #include "viterbi.h" using namespace std; namespace po = boost::program_options; namespace dtrain { struct ScoredHyp { vector w; SparseVector f; weight_t model, gold; size_t rank; }; inline void PrintWordIDVec(vector& v, ostream& os=cerr) { for (size_t i = 0; i < v.size(); i++) { os << TD::Convert(v[i]); if (i < v.size()-1) os << " "; } } inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } inline ostream& _p4(ostream& out) { return out << setprecision(4); } bool dtrain_init(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration File Options"); opts.add_options() ("bitext,b", po::value(), "bitext") ("decoder_conf,C", po::value(), "configuration file for decoder") ("iterations,T", po::value()->default_value(15), "number of iterations T (per shard)") ("k", po::value()->default_value(100), "size of kbest list") ("learning_rate,l", po::value()->default_value(0.00001), "learning rate") ("l1_reg,r", po::value()->default_value(0.), "l1 regularization strength") ("margin,m", po::value()->default_value(1.0), "margin for margin perceptron") ("score,s", po::value()->default_value("chiang"), "per-sentence BLEU approx.") ("N", po::value()->default_value(4), "N for BLEU approximation") ("input_weights,w", po::value(), "input weights file") ("average,a", po::bool_switch()->default_value(true), "output average weights") ("keep,K", po::bool_switch()->default_value(false), "output a weight file per iteration") ("struct,S", po::bool_switch()->default_value(false), "structured SGD with hope/fear") ("output,o", po::value()->default_value("-"), "output weights file, '-' for STDOUT") ("disable_learning,X", po::bool_switch()->default_value(false), "disable learning") ("output_data,D", po::value()->default_value(""), "output data to STDOUT; arg. is 'kbest', 'default' or 'all'") ("print_weights,P", po::value()->default_value("EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV"), "list of weights to print after each iteration"); po::options_description clopts("Command Line Options"); clopts.add_options() ("conf,c", po::value(), "dtrain configuration file") ("help,h", po::bool_switch(), "display options"); opts.add(clopts); po::store(parse_command_line(argc, argv, opts), *conf); cerr << "dtrain" << endl << endl; if ((*conf)["help"].as()) { cerr << opts << endl; return false; } if (conf->count("conf")) { ifstream f((*conf)["conf"].as().c_str()); po::store(po::parse_config_file(f, opts), *conf); } po::notify(*conf); if (!conf->count("decoder_conf")) { cerr << "Missing decoder configuration." << endl; cerr << opts << endl; return false; } if (!conf->count("bitext")) { cerr << "No input given." << endl; cerr << opts << endl; return false; } if ((*conf)["output_data"].as() != "") { if ((*conf)["output_data"].as() != "kbest" && (*conf)["output_data"].as() != "default" && (*conf)["output_data"].as() != "all") { cerr << "Wrong 'output_data' argument: "; cerr << (*conf)["output_data"].as() << endl; return false; } } return true; } } // namespace #endif