diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-16 01:56:34 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-16 01:56:34 +0000 |
commit | d7d59c4bb81262f1dfece384ec68fa2c25096843 (patch) | |
tree | 5521dc624dc23adeb3bc9d9c8f8fecc7feb57724 | |
parent | ff323448416bbfa691a9697ddf3b30a0398fa08a (diff) |
oracle directions
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@276 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | decoder/cdec.cc | 29 | ||||
-rwxr-xr-x | decoder/ff_fsa.h | 5 | ||||
-rw-r--r-- | decoder/logval.h | 6 | ||||
-rwxr-xr-x | decoder/oracle_bleu.h | 79 | ||||
-rw-r--r-- | decoder/sparse_vector.h | 39 | ||||
-rwxr-xr-x | decoder/value_array.h | 12 | ||||
-rw-r--r-- | decoder/viterbi.cc | 2 | ||||
-rw-r--r-- | decoder/viterbi.h | 2 | ||||
-rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 75 |
9 files changed, 174 insertions, 75 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index e616f1bb..75c907b1 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -308,7 +308,7 @@ bool prelm_weights_string(po::variables_map const& conf,string &s) } -void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,FeatureWeights *weights=0) { +void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0) { cerr << viterbi_stats(forest,name,true,show_tree); if (show_features) { cerr << name<<" features: "; @@ -601,33 +601,14 @@ int main(int argc, char** argv) { vector<WordID> trans; ViterbiESentence(forest, &trans); + /*Oracle Rescoring*/ if(get_oracle_forest) { - Timer t("Forest Oracle rescoring:"); - - oracle.DumpKBest(conf,"model",sent_id, forest, 10, true); - - Translation best(forest); - { - Hypergraph oracle_forest; - oracle.Rescore(smeta,forest,&oracle_forest,feature_weights,1.0); - forest.swap(oracle_forest); - } - Translation oracle_trans(forest); - + Oracles o=oracles.ComputeOracles(smeta,forest,feature_weights,&cerr,10,conf["forest_output"].as<std::string>()); cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl; cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; - oracle_trans.Print(cerr," +Oracle BLEU"); - //compute kbest for oracle - oracle.DumpKBest(conf,"oracle",sent_id, forest, 10, true); - - //reweight the model with -1 for the BLEU feature to compute k-best list for negative examples - oracle.ReweightBleu(&forest,-1.0); - Translation neg_trans(forest); - neg_trans.Print(cerr," -Oracle BLEU"); - //compute kbest for negative - oracle.DumpKBest(conf,"negative",sent_id, forest, 10, true); - + o.hope.Print(cerr," +Oracle BLEU"); + o.fear.Print(cerr," -Oracle BLEU"); //Add 1-best translation (trans) to psuedo-doc vectors oracle.IncludeLastScore(&cerr); } diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index cd56f1a5..2ffd6ef8 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -1,9 +1,14 @@ #ifndef FF_FSA_H #define FF_FSA_H +#include <stdint.h> //C99 #include <string> #include "ff.h" #include "sparse_vector.h" +#include "value_array.h" + +typedef ValueArray<uint8_t> Bytes; + /* */ diff --git a/decoder/logval.h b/decoder/logval.h index 9aaba557..c8c342a3 100644 --- a/decoder/logval.h +++ b/decoder/logval.h @@ -58,6 +58,12 @@ class LogVal { return *this += b; } + // LogVal(fabs(log(x)),x.s_) + friend LogVal abslog(LogVal x) { + if (x.v_<0) x.v_=-x.v_; + return x; + } + LogVal& poweq(const T& power) { #if LOGVAL_CHECK_NEG if (s_) { diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 5fef53fd..550f438f 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -37,7 +37,31 @@ struct Translation { out<<pre<<"Viterbi: "<<TD::GetString(sentence)<<"\n"; out<<pre<<"features: "<<features<<std::endl; } + bool is_null() { + return features.size()==0 /* && sentence.size()==0 */; + } + +}; + +struct Oracles { + bool is_null() { + return model.is_null() /* && fear.is_null() && hope.is_null() */; + } + Translation model,fear,hope; + // feature 0 will be the error rate in fear and hope + // move toward hope + WeightVector ModelHopeGradient() { + WeightVector r=hope-model; + r[0]=0; + return r; + } + // move toward hope from fear + WeightVector FearHopeGradient() { + Weightvector r=hope-fear; + r[0]=0; + return r; + } }; @@ -53,6 +77,7 @@ struct OracleBleu { opts->add_options() ("references,R", value<Refs >(), "Translation reference files") ("oracle_loss", value<string>(), "IBM_BLEU_3 (default), IBM_BLEU etc") + ("bleu_weight", value<double>()->default_value(1.), "weight to give the hope/fear loss function vs. model score") ; } int order; @@ -66,17 +91,20 @@ struct OracleBleu { double doc_src_length; void set_oracle_doc_size(int size) { oracle_doc_size=size; - scale_oracle= 1-1./oracle_doc_size;\ + scale_oracle= 1-1./oracle_doc_size; doc_src_length=0; } OracleBleu(int doc_size=10) { set_oracle_doc_size(doc_size); } - boost::shared_ptr<Score> doc_score,sentscore; // made from factory, so we delete them + typedef boost::shared_ptr<Score> ScoreP; + ScoreP doc_score,sentscore; // made from factory, so we delete them + double bleu_weight; void UseConf(boost::program_options::variables_map const& conf) { using namespace std; + bleu_weight=conf["bleu_weight"].as<double>(); set_loss(conf["oracle_loss"].as<string>()); set_refs(conf["references"].as<Refs>()); } @@ -108,21 +136,48 @@ struct OracleBleu { ViterbiFSentence(forest,&srcsent); SentenceMetadata sm(sent_id,Lattice()); //TODO: make reference from refs? sm.SetSourceLength(srcsent.size()); + smeta.SetScore(doc_score.get()); + smeta.SetDocScorer(&ds); + smeta.SetDocLen(doc_src_length); return sm; } - void Rescore(SentenceMetadata & smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0) { - Translation model_trans(forest); - sentscore.reset(ds[smeta.GetSentenceID()]->ScoreCandidate(model_trans.sentence)); + Oracles ComputeOracles(SentenceMetadata & smeta,Hypergraph const& forest,WeightVector const& feature_weights,std::ostream *log=0,unsigned kbest=0,std::string const& forest_output="") { + Oracles r; + int sent_id=smeta.GetSentenceID(); + r.model=Translation(forest); + + if (kbest) DumpKBest("model",sent_id, forest, kbest, true, forest_output); + { + Timer t("Forest Oracle rescoring:"); + Hypergraph oracle_forest; + Rescore(smeta,forest,&oracle_forest,feature_weights,blue_weight,log); + forest.swap(oracle_forest); + } + r.hope=Translation(forest); + if (kbest) DumpKBest("oracle",sent_id, forest, kbest, true, forest_output); + oracle.ReweightBleu(&forest,-blue_weight); + r.fear=Translation(forest); + if (kbest) DumpKBest("negative",sent_id, forest, kbest, true, forest_output); + return r; + } + + ScoreP Score(Sentence const& sentence,int sent_id) { + return ds[sent_id]->ScoreCandidate(sentence); + } + ScoreP Score(Hypergraph const& forest,int sent_id) { + return Score(model_trans(forest).translation,sent_id); + } + + void Rescore(SentenceMetadata & smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0,std::ostream *log=&std::cerr) { + // the sentence bleu stats will get added to doc only if you call IncludeLastScore + sentscore=Score(forest,smeta.GetSentenceID()); if (!doc_score) { doc_score.reset(sentscore->GetOne()); } tmp_src_length = smeta.GetSourceLength(); //TODO: where does this come from? - smeta.SetScore(doc_score.get()); - smeta.SetDocLen(doc_src_length); - smeta.SetDocScorer(&ds); using namespace std; - ModelSet oracle_models(FeatureWeights(bleu_weight,1),vector<FeatureFunction const*>(1,pff.get())); + ModelSet oracle_models(WeightVector(bleu_weight,1),vector<FeatureFunction const*>(1,pff.get())); const IntersectionConfiguration inter_conf_oracle(0, 0); - cerr << "Going to call Apply Model " << endl; + if (log) *log << "Going to call Apply Model " << endl; ApplyModelSet(forest, smeta, oracle_models, @@ -190,10 +245,10 @@ struct OracleBleu { } } - void DumpKBest(boost::program_options::variables_map const& conf,std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique) +void DumpKBest(boost::program_options::variables_map const& conf,std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output) { std::ostringstream kbest_string_stream; - kbest_string_stream << conf["forest_output"].as<std::string>() << "/kbest_"<<suffix<< "." << sent_id; + kbest_string_stream << forest_output << "/kbest_"<<suffix<< "." << sent_id; DumpKBest(sent_id, forest, k, unique, kbest_string_stream.str()); } diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index bda10974..c6c57150 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -20,10 +20,21 @@ public: SparseVector() {} explicit SparseVector(std::vector<T> const& v) { typename MapType::iterator p=values_.begin(); - for (unsigned i=0;i<v.size();++i) - p=values_.insert(p,typename MapType::value_type(i,v[i])); //faster + const T z=T(0); + for (unsigned i=0;i<v.size();++i) { + T const& t=v[i]; + if (t!=z) + p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster + } + + } + + void set_new_value(int index, T const& val) { + assert(values_.find(index)==values_.end()); + values_[index]=val; } + const T operator[](int index) const { typename MapType::const_iterator found = values_.find(index); if (found == values_.end()) @@ -265,9 +276,29 @@ private: MapType values_; }; +// doesn't support fast indexing directly +template <class T> +class SparseVectorList { + typedef std::vector<const int,T> ListType; + typedef typename ListType::value_type pair_type; + typedef typename ListType::const_iterator const_iterator; + SparseVectorList() { } + explicit SparseVectorList(std::vector<T> const& v) { + const T z=T(0); + for (unsigned i=0;i<v.size();++i) { + T const& t=v[i]; + if (t!=z) + p.push_back(pair_type(i,t)); + } + p.resize(p.size()); + } +private: + ListType p; +}; + + typedef SparseVector<double> FeatureVector; -typedef std::vector<double> FeatureWeights; -typedef FeatureWeights WeightVector; +typedef SparseVector<double> WeightVector; template <typename T> SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) { diff --git a/decoder/value_array.h b/decoder/value_array.h index bfdd1155..7401938a 100755 --- a/decoder/value_array.h +++ b/decoder/value_array.h @@ -1,12 +1,12 @@ #ifndef VALUE_ARRAY_H #define VALUE_ARRAY_H -# include <cstdlib> -# include <algorithm> -# include <new> -# include <boost/range.hpp> -# include <boost/utility/enable_if.hpp> -# include <boost/type_traits.hpp> +#include <cstdlib> +#include <algorithm> +#include <new> +#include <boost/range.hpp> +#include <boost/utility/enable_if.hpp> +#include <boost/type_traits.hpp> #ifdef USE_BOOST_SERIALIZE # include <boost/serialization/split_member.hpp> # include <boost/serialization/access.hpp> diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index f11b77ec..7719de32 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -116,7 +116,7 @@ inline bool close_enough(double a,double b,double epsilon) return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b); } -FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights,bool fatal_dotprod_disagreement) { +FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { FeatureVector r; const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r); if (weights) { diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 4697590b..388bff3c 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -205,6 +205,6 @@ int ViterbiELength(const Hypergraph& hg); int ViterbiPathLength(const Hypergraph& hg); /// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not). return features (sum over all edges in viterbi derivation) -FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights=0,bool fatal_dotprod_disagreement=false); +FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); #endif diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index e9a5650b..677c0497 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -84,16 +84,16 @@ struct oracle_directions { OracleBleu::AddOptions(&opts); opts.add_options() ("dev_set_size,s",po::value<unsigned>(&dev_set_size),"[REQD] Development set size (# of parallel sentences)") - ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository") - ("weights,w",po::value<string>(),"[REQD] Current feature weights file") + ("forest_repository,r",po::value<string>(&forest_repository),"[REQD] Path to forest repository") + ("weights,w",po::value<string>(&weights_file),"[REQD] Current feature weights file") ("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in") + ("random_directions,d",po::value<unsigned>(&random_directions)->default_value(10),"Number of random directions to run the line optimizer in") ("no_primary,n","don't use the primary (orthogonal each feature alone) directions") - ("oracle_directions,O",po::value<unsigned>()->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.") + ("oracle_directions,O",po::value<unsigned>(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.") ("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it") - ("oracle_batch,b",po::value<unsigned>()->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences") - ("max_similarity,m",po::value<double>()->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?") - ("fear_to_hope,f","for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") + ("oracle_batch,b",po::value<unsigned>(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences") + ("max_similarity,m",po::value<double>(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?") + ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); @@ -139,16 +139,20 @@ struct oracle_directions { oracle.UseConf(conf); include_primary=!conf.count("no_primary"); + old_to_hope=!conf.count("no_old_to_hope"); + if (conf.count("optimize_feature") > 0) optimize_features=conf["optimize_feature"].as<vector<string> >(); - fear_to_hope=conf.count("fear_to_hope"); - n_random=conf["random_directions"].as<unsigned int>(); - forest_repository=conf["forest_repository"].as<string>(); + + // po::value<X>(&var) takes care of below: +// fear_to_hope=conf.count("fear_to_hope"); +// n_random=conf["random_directions"].as<unsigned int>(); +// forest_repository=conf["forest_repository"].as<string>(); // dev_set_size=conf["dev_set_size"].as<unsigned int>(); - n_oracle=conf["oracle_directions"].as<unsigned>(); - oracle_batch=conf["oracle_batch"].as<unsigned>(); - max_similarity=conf["max_similarity"].as<double>(); - weights_file=conf["weights"].as<string>(); +// n_oracle=conf["oracle_directions"].as<unsigned>(); +// oracle_batch=conf["oracle_batch"].as<unsigned>(); +// max_similarity=conf["max_similarity"].as<double>(); +// weights_file=conf["weights"].as<string>(); Init(); } @@ -158,7 +162,7 @@ struct oracle_directions { unsigned n_oracle, oracle_batch; string forest_repository; unsigned dev_set_size; - vector<Dir> dirs; //best_to_hope_dirs + vector<Oracle> oracles; vector<int> fids; string forest_file(unsigned i) const { ostringstream o; @@ -178,6 +182,7 @@ struct oracle_directions { weights.InitSparseVector(&origin); fids.clear(); AddFeatureIds(features); + oracles.resize(dev_set_size); } Weights weights; @@ -189,26 +194,42 @@ struct oracle_directions { } - Dir const& operator[](unsigned i) { - Dir &dir=dirs[i]; - if (dir.empty()) { + //TODO: is it worthwhile to get a complete document bleu first? would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive) + Oracle const& ComputeOracle(unsigned i) { + Oracle &o=oracles[i]; + if (o.is_null()) { ReadFile rf(forest_file(i)); - FeatureVector fear,hope,best; - //TODO: get hope/oracle from vlad. random for now. - LineOptimizer::RandomUnitVector(fids,&dir,&rng); + Hypergraph hg; + { + Timer t("Loading forest from JSON "+forest_file(i)); + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + } + o=oracle.ComputeOracles(MakeMetadata(hg,i),hg,origin,&cerr); } - return dir; + return o; } + // if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random. oracle vectors are summed void AddOracleDirections() { MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1); unsigned b=0; for(unsigned i=0;i<n_oracle;++i) { - directions.push_back(Dir()); - Dir &d=directions.back(); - for (unsigned j=0;j<oracle_batch;++j,++b) - d+=(*this)[(start_random || b>=dev_set_size)?rsg():b]; - d/=(double)oracle_batch; + Dir o2hope; + Dir fear2hope; + for (unsigned j=0;j<oracle_batch;++j,++b) { + Oracle const& o=ComputeOracle((start_random||b>=dev_set_size) ? rsg() : b); + + o2hope+=o.ModelHopeGradient(); + if (fear_to_hope) + fear2hope+=o.FearHopeGradient(); + } + double N=(double)oracle_batch; + o2hope/=N; + directions.push_back(o2hope); + if (fear_to_hope) { + fear2hope/=N; + directions.push_back(fear2hope); + } } } }; |