From 538bc2149631e989e4806165632c5460c3514670 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 16 Jul 2010 01:57:08 +0000 Subject: oracle refactor, oracle vest directions, sparse_vector git-svn-id: https://ws10smt.googlecode.com/svn/trunk@280 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/apply_models.h | 2 +- decoder/cdec.cc | 18 ++++++++---- decoder/oracle_bleu.h | 28 +++++++++--------- decoder/sentences.h | 53 +++++++++++++++++++++++++++++++++++ decoder/sparse_vector.h | 20 ++++++++++++- decoder/stringlib.h | 5 ++-- vest/Makefile.am | 2 +- vest/mr_vest_generate_mapper_input.cc | 20 +++++++++---- 8 files changed, 118 insertions(+), 30 deletions(-) create mode 100755 decoder/sentences.h diff --git a/decoder/apply_models.h b/decoder/apply_models.h index 5c220afd..61a5b8f7 100644 --- a/decoder/apply_models.h +++ b/decoder/apply_models.h @@ -11,7 +11,7 @@ struct IntersectionConfiguration { const int algorithm; // 0 = full intersection, 1 = cube pruning const int pop_limit; // max number of pops off the heap at each node IntersectionConfiguration(int alg, int k) : algorithm(alg), pop_limit(k) {} - IntersectionConfiguration(exhaustive_t t) : algorithm(0), pop_limit() {(void)t;} + IntersectionConfiguration(exhaustive_t /* t */) : algorithm(0), pop_limit() {} }; void ApplyModelSet(const Hypergraph& in, diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 77179948..8827cce3 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -323,6 +323,12 @@ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_featur } } +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,DenseWeightVector const& feature_weights) { + WeightVector fw(feature_weights); + forest_stats(forest,name,show_tree,show_features,&fw); +} + + void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,string ndensity,string forestname,double srclen) { double beam_prune=0,density_prune=0; bool use_beam_prune=beam_param(conf,nbeam,&beam_prune,conf.count("scale_prune_srclen"),srclen); @@ -390,9 +396,9 @@ int main(int argc, char** argv) { prelm_w.InitFromFile(plmw); prelm_feature_weights.resize(FD::NumFeats()); prelm_w.InitVector(&prelm_feature_weights); -// cerr << "prelm_weights: " << FeatureVector(prelm_feature_weights)< res = Inside, @@ -574,7 +580,7 @@ int main(int argc, char** argv) { &prelm_forest); forest.swap(prelm_forest); forest.Reweight(prelm_feature_weights); - forest_stats(forest," prelm forest",show_tree_structure,show_features,&prelm_feature_weights); + forest_stats(forest," prelm forest",show_tree_structure,show_features,prelm_feature_weights); } maybe_prune(forest,conf,"prelm_beam_prune","prelm_density_prune","-LM",srclen); @@ -593,7 +599,7 @@ int main(int argc, char** argv) { &lm_forest); forest.swap(lm_forest); forest.Reweight(feature_weights); - forest_stats(forest," +LM forest",show_tree_structure,show_features,&feature_weights); + forest_stats(forest," +LM forest",show_tree_structure,show_features,feature_weights); } maybe_prune(forest,conf,"beam_prune","density_prune","+LM",srclen); @@ -604,7 +610,7 @@ int main(int argc, char** argv) { /*Oracle Rescoring*/ if(get_oracle_forest) { - Oracles o=oracles.ComputeOracles(smeta,&forest,feature_weights,&cerr,10,conf["forest_output"].as()); + Oracle o=oracle.ComputeOracle(smeta,&forest,FeatureVector(feature_weights),&cerr,10,conf["forest_output"].as()); cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl; cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; o.hope.Print(cerr," +Oracle BLEU"); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index b58117c1..cc19fbca 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -17,6 +17,7 @@ #include "apply_models.h" #include "kbest.h" #include "timing_stats.h" +#include "sentences.h" //TODO: put function impls into .cc //TODO: disentangle @@ -44,7 +45,7 @@ struct Translation { }; -struct Oracles { +struct Oracle { bool is_null() { return model.is_null() /* && fear.is_null() && hope.is_null() */; } @@ -52,13 +53,13 @@ struct Oracles { Translation model,fear,hope; // feature 0 will be the error rate in fear and hope // move toward hope - FeatureVector ModelHopeGradient() { + FeatureVector ModelHopeGradient() const { FeatureVector r=hope.features-model.features; r.set_value(0,0); return r; } // move toward hope from fear - FeatureVector FearHopeGradient() { + FeatureVector FearHopeGradient() const { FeatureVector r=hope.features-fear.features; r.set_value(0,0); return r; @@ -150,9 +151,9 @@ struct OracleBleu { } // destroys forest (replaces it w/ rescored oracle one) - Oracles ComputeOracles(SentenceMetadata & smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,std::ostream *log=0,unsigned kbest=0,std::string const& forest_output="") { + Oracle ComputeOracle(SentenceMetadata const& smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,std::ostream *log=0,unsigned kbest=0,std::string const& forest_output="") { Hypergraph &forest=*forest_in_out; - Oracles r; + Oracle r; int sent_id=smeta.GetSentenceID(); r.model=Translation(forest); if (kbest) DumpKBest("model",sent_id, forest, kbest, true, forest_output); @@ -169,23 +170,24 @@ struct OracleBleu { if (kbest) DumpKBest("negative",sent_id, forest, kbest, true, forest_output); return r; } - typedef std::vector Sentence; - void Rescore(SentenceMetadata & smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0,std::ostream *log=&std::cerr) { + void Rescore(SentenceMetadata const& smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0,std::ostream *log=&std::cerr) { // the sentence bleu stats will get added to doc only if you call IncludeLastScore sentscore=GetScore(forest,smeta.GetSentenceID()); if (!doc_score) { doc_score.reset(sentscore->GetOne()); } tmp_src_length = smeta.GetSourceLength(); //TODO: where does this come from? using namespace std; - ModelSet oracle_models(WeightVector(bleu_weight,1),vector(1,pff.get())); - const IntersectionConfiguration inter_conf_oracle(0, 0); + DenseWeightVector w; + feature_weights_=feature_weights; + feature_weights_.set_value(0,bleu_weight); + feature_weights.init_vector(&w); + ModelSet oracle_models(w,vector(1,pff.get())); if (log) *log << "Going to call Apply Model " << endl; ApplyModelSet(forest, smeta, oracle_models, - inter_conf_oracle, + IntersectionConfiguration(exhaustive_t()), dest_forest); - feature_weights_=feature_weights; ReweightBleu(dest_forest,bleu_weight); } @@ -202,7 +204,7 @@ struct OracleBleu { } void ReweightBleu(Hypergraph *dest_forest,double bleu_weight=-1.) { - feature_weights_[0]=bleu_weight; + feature_weights_.set_value(0,bleu_weight); dest_forest->Reweight(feature_weights_); // dest_forest->SortInEdgesByEdgeWeights(); } @@ -227,7 +229,7 @@ struct OracleBleu { kbest.LazyKthBest(forest.nodes_.size() - 1, i); if (!d) break; //calculate score in context of psuedo-doc - Score* sentscore = GetScore(d->yield,sent_id); + ScoreP sentscore = GetScore(d->yield,sent_id); sentscore->PlusEquals(*doc_score,float(1)); float bleu = curr_src_length * sentscore->ComputeScore(); kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " diff --git a/decoder/sentences.h b/decoder/sentences.h new file mode 100755 index 00000000..842072b9 --- /dev/null +++ b/decoder/sentences.h @@ -0,0 +1,53 @@ +#ifndef CDEC_SENTENCES_H +#define CDEC_SENTENCES_H + +#include +#include +#include +#include "filelib.h" +#include "tdict.h" +#include "stringlib.h" +typedef std::vector Sentence; + +inline void StringToSentence(std::string const& str,Sentence &s) { + using namespace std; + vector ss=SplitOnWhitespace(str); + s.clear(); + transform(ss.begin(),ss.end(),back_inserter(s),ToTD()); +} + +inline Sentence StringToSentence(std::string const& str) { + Sentence s; + StringToSentence(str,s); + return s; +} + +inline std::istream& operator >> (std::istream &in,Sentence &s) { + using namespace std; + string str; + if (getline(in,str)) { + StringToSentence(str,s); + } + return in; +} + + +class Sentences : public std::vector { + typedef std::vector VS; +public: + Sentences() { } + Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) { } + Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) { } + void Load(std::string file) { + ReadFile r(file); + Load(*r.stream()); + } + void Load(std::istream &in) { + this->push_back(Sentence()); + while(in>>this->back()) ; + this->pop_back(); + } +}; + + +#endif diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index 9c7c9c79..43880014 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -12,6 +12,13 @@ #include "fdict.h" +template +inline T & extend_vector(std::vector &v,int i) { + if (i>=v.size()) + v.resize(i+1); + return v[i]; +} + template class SparseVector { public: @@ -29,6 +36,17 @@ public: } + void init_vector(std::vector *vp) const { + init_vector(*vp); + } + + void init_vector(std::vector &v) const { + v.clear(); + for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) + extend_vector(v,i->first)=i->second; + } + + void set_new_value(int index, T const& val) { assert(values_.find(index)==values_.end()); values_[index]=val; @@ -312,7 +330,7 @@ private: typedef SparseVector FeatureVector; typedef SparseVector WeightVector; - +typedef std::vector DenseWeightVector; template SparseVector operator+(const SparseVector& a, const SparseVector& b) { SparseVector result = a; diff --git a/decoder/stringlib.h b/decoder/stringlib.h index eac1dce6..6bb8cff0 100644 --- a/decoder/stringlib.h +++ b/decoder/stringlib.h @@ -1,4 +1,5 @@ -#ifndef _STRINGLIB_H_ +#ifndef CDEC_STRINGLIB_H_ +#define CDEC_STRINGLIB_H_ #include #include @@ -14,7 +15,7 @@ void ParseTranslatorInput(const std::string& line, std::string* input, std::stri struct Lattice; void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref); -inline const std::string Trim(const std::string& str, const std::string& dropChars = " \t") { +inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") { std::string res = str; res.erase(str.find_last_not_of(dropChars)+1); return res.erase(0, res.find_first_not_of(dropChars)); diff --git a/vest/Makefile.am b/vest/Makefile.am index 99bd6430..1c797d50 100644 --- a/vest/Makefile.am +++ b/vest/Makefile.am @@ -23,7 +23,7 @@ mbr_kbest_LDADD = $(top_srcdir)/decoder/libcdec.a -lz fast_score_SOURCES = fast_score.cc ter.cc comb_scorer.cc aer_scorer.cc scorer.cc viterbi_envelope.cc fast_score_LDADD = $(top_srcdir)/decoder/libcdec.a -lz -mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc +mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc timing_stats.cc mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a -lz mr_vest_map_SOURCES = viterbi_envelope.cc error_surface.cc aer_scorer.cc mr_vest_map.cc scorer.cc ter.cc comb_scorer.cc line_optimizer.cc diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index cbda78c5..01e93f61 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -62,6 +62,7 @@ struct oracle_directions { bool start_random; bool include_primary; + bool old_to_hope; bool fear_to_hope; unsigned n_random; void AddPrimaryAndRandomDirections() { @@ -87,14 +88,15 @@ struct oracle_directions { ("forest_repository,r",po::value(&forest_repository),"[REQD] Path to forest repository") ("weights,w",po::value(&weights_file),"[REQD] Current feature weights file") ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value(&random_directions)->default_value(10),"Number of random directions to run the line optimizer in") + ("random_directions,d",po::value(&n_random)->default_value(10),"Number of random directions to run the line optimizer in") ("no_primary,n","don't use the primary (orthogonal each feature alone) directions") ("oracle_directions,O",po::value(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.") ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it") ("oracle_batch,b",po::value(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences") ("max_similarity,m",po::value(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?") ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") - ("decoder_translations",po::value(&decoder_translations)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") + ("no_old_to_hope,n","don't emit the usual old -> hope oracle") + ("decoder_translations",po::value(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); @@ -173,7 +175,10 @@ struct oracle_directions { oracle_directions() { } + Sentences model_hyps; void Init() { + if (!decoder_translations_file.empty()) + model_hyps.Load(decoder_translations_file); start_random=false; assert(DirectoryExists(forest_repository)); vector features; @@ -206,7 +211,7 @@ struct oracle_directions { Timer t("Loading forest from JSON "+forest_file(i)); HypergraphIO::ReadFromJSON(rf.stream(), &hg); } - o=oracle.ComputeOracles(MakeMetadata(hg,i),&hg,origin,&cerr); + o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin,&cerr); } return o; } @@ -221,13 +226,16 @@ struct oracle_directions { for (unsigned j=0;j=dev_set_size) ? rsg() : b); - o2hope+=o.ModelHopeGradient(); + if (old_to_hope) + o2hope+=o.ModelHopeGradient(); if (fear_to_hope) fear2hope+=o.FearHopeGradient(); } double N=(double)oracle_batch; - o2hope/=N; - directions.push_back(o2hope); + if (old_to_hope) { + o2hope/=N; + directions.push_back(o2hope); + } if (fear_to_hope) { fear2hope/=N; directions.push_back(fear2hope); -- cgit v1.2.3