From 4933c202b3877a607e7ff6f266f0cdf3cc16dc62 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 2 Jul 2010 22:04:35 +0000 Subject: cdec --prelm_weights wf: rescore forest with 0-state features before prelm_beam_prune git-svn-id: https://ws10smt.googlecode.com/svn/trunk@123 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec.cc | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 9e40ae8a..0aa46be5 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -61,6 +61,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("input,i",po::value()->default_value("-"),"Source file") ("grammar,g",po::value >()->composing(),"Either SCFG grammar file(s) or phrase tables file(s)") ("weights,w",po::value(),"Feature weights file") + ("prelm_weights",po::value(),"Feature weights file for prelm_beam_prune. Requires --weights.") + ("prelm_copy_weights","use --weights as value for --prelm_weights.") + ("no_freeze_feature_set,Z", "Do not freeze feature set after reading feature weights file") ("feature_function,F",po::value >()->composing(), "Additional feature function(s) (-L for list)") ("list_feature_functions,L","List available feature functions") @@ -244,6 +247,20 @@ bool beam_param(po::variables_map const& conf,char const* name,double *val,bool return false; } +bool prelm_weights_string(po::variables_map const& conf,string &s) +{ + if (conf.count("prelm_weights")) { + s=conf["prelm_weights"].as(); + return true; + } + if (conf.count("prelm_copy_weights")) { + s=conf["weights"].as(); + return true; + } + return false; +} + + int main(int argc, char** argv) { global_ff_registry.reset(new FFRegistry); register_feature_functions(); @@ -275,12 +292,20 @@ int main(int argc, char** argv) { } // load feature weights (and possibly freeze feature set) - vector feature_weights; - Weights w; + vector feature_weights,prelm_feature_weights; + Weights w,prelm_w; + bool has_prelm_models = false; if (conf.count("weights")) { w.InitFromFile(conf["weights"].as()); feature_weights.resize(FD::NumFeats()); w.InitVector(&feature_weights); + string plmw; + if (prelm_weights_string(conf,plmw)) { + has_prelm_models = true; + prelm_w.InitFromFile(plmw); + prelm_feature_weights.resize(FD::NumFeats()); + prelm_w.InitVector(&prelm_feature_weights); + } if (!conf.count("no_freeze_feature_set")) { cerr << "Freezing feature set (use --no_freeze_feature_set to change)." << endl; FD::Freeze(); @@ -307,7 +332,8 @@ int main(int argc, char** argv) { // set up additional scoring features vector > pffs; - vector late_ffs; + + vector late_ffs,prelm_ffs; if (conf.count("feature_function") > 0) { const vector& add_ffs = conf["feature_function"].as >(); for (int i = 0; i < add_ffs.size(); ++i) { @@ -317,13 +343,17 @@ int main(int argc, char** argv) { if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n"; else cerr << " (no config parameters)\n"; shared_ptr pff = global_ff_registry->Create(ff, param); - if (!pff) { exit(1); } + FeatureFunction const* p=pff.get(); + if (!p) { exit(1); } // TODO check that multiple features aren't trying to set the same fid pffs.push_back(pff); - late_ffs.push_back(pff.get()); + late_ffs.push_back(p); + if (p->NumBytesContext()==0) + prelm_ffs.push_back(p); } } ModelSet late_models(feature_weights, late_ffs); + int palg = 1; if (LowercaseString(conf["intersection_strategy"].as()) == "full") { palg = 0; @@ -417,6 +447,22 @@ int main(int argc, char** argv) { if (extract_file) ExtractRulesDedupe(forest, extract_file->stream()); + if (has_prelm_models) { + ModelSet prelm_models(prelm_feature_weights, prelm_ffs); + Timer t("prelm rescoring"); + forest.Reweight(prelm_feature_weights); + forest.SortInEdgesByEdgeWeights(); + Hypergraph prelm_forest; + ApplyModelSet(forest, + smeta, + prelm_models, + inter_conf, + &prelm_forest); + forest.swap(prelm_forest); + forest.Reweight(prelm_feature_weights); + cerr << viterbi_stats(forest," prelm forest",true,show_tree_structure); + } + double prelm_beam_prune; if (beam_param(conf,"prelm_beam_prune",&prelm_beam_prune,scale_prune_srclen,srclen)) { double presize=forest.edges_.size(); -- cgit v1.2.3