summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-16 01:56:34 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-16 01:56:34 +0000
commitd7d59c4bb81262f1dfece384ec68fa2c25096843 (patch)
tree5521dc624dc23adeb3bc9d9c8f8fecc7feb57724
parentff323448416bbfa691a9697ddf3b30a0398fa08a (diff)
oracle directions
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@276 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/cdec.cc29
-rwxr-xr-xdecoder/ff_fsa.h5
-rw-r--r--decoder/logval.h6
-rwxr-xr-xdecoder/oracle_bleu.h79
-rw-r--r--decoder/sparse_vector.h39
-rwxr-xr-xdecoder/value_array.h12
-rw-r--r--decoder/viterbi.cc2
-rw-r--r--decoder/viterbi.h2
-rw-r--r--vest/mr_vest_generate_mapper_input.cc75
9 files changed, 174 insertions, 75 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index e616f1bb..75c907b1 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -308,7 +308,7 @@ bool prelm_weights_string(po::variables_map const& conf,string &s)
}
-void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,FeatureWeights *weights=0) {
+void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_features,WeightVector *weights=0) {
cerr << viterbi_stats(forest,name,true,show_tree);
if (show_features) {
cerr << name<<" features: ";
@@ -601,33 +601,14 @@ int main(int argc, char** argv) {
vector<WordID> trans;
ViterbiESentence(forest, &trans);
+
/*Oracle Rescoring*/
if(get_oracle_forest) {
- Timer t("Forest Oracle rescoring:");
-
- oracle.DumpKBest(conf,"model",sent_id, forest, 10, true);
-
- Translation best(forest);
- {
- Hypergraph oracle_forest;
- oracle.Rescore(smeta,forest,&oracle_forest,feature_weights,1.0);
- forest.swap(oracle_forest);
- }
- Translation oracle_trans(forest);
-
+ Oracles o=oracles.ComputeOracles(smeta,forest,feature_weights,&cerr,10,conf["forest_output"].as<std::string>());
cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl;
cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl;
- oracle_trans.Print(cerr," +Oracle BLEU");
- //compute kbest for oracle
- oracle.DumpKBest(conf,"oracle",sent_id, forest, 10, true);
-
- //reweight the model with -1 for the BLEU feature to compute k-best list for negative examples
- oracle.ReweightBleu(&forest,-1.0);
- Translation neg_trans(forest);
- neg_trans.Print(cerr," -Oracle BLEU");
- //compute kbest for negative
- oracle.DumpKBest(conf,"negative",sent_id, forest, 10, true);
-
+ o.hope.Print(cerr," +Oracle BLEU");
+ o.fear.Print(cerr," -Oracle BLEU");
//Add 1-best translation (trans) to psuedo-doc vectors
oracle.IncludeLastScore(&cerr);
}
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index cd56f1a5..2ffd6ef8 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -1,9 +1,14 @@
#ifndef FF_FSA_H
#define FF_FSA_H
+#include <stdint.h> //C99
#include <string>
#include "ff.h"
#include "sparse_vector.h"
+#include "value_array.h"
+
+typedef ValueArray<uint8_t> Bytes;
+
/*
*/
diff --git a/decoder/logval.h b/decoder/logval.h
index 9aaba557..c8c342a3 100644
--- a/decoder/logval.h
+++ b/decoder/logval.h
@@ -58,6 +58,12 @@ class LogVal {
return *this += b;
}
+ // LogVal(fabs(log(x)),x.s_)
+ friend LogVal abslog(LogVal x) {
+ if (x.v_<0) x.v_=-x.v_;
+ return x;
+ }
+
LogVal& poweq(const T& power) {
#if LOGVAL_CHECK_NEG
if (s_) {
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 5fef53fd..550f438f 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -37,7 +37,31 @@ struct Translation {
out<<pre<<"Viterbi: "<<TD::GetString(sentence)<<"\n";
out<<pre<<"features: "<<features<<std::endl;
}
+ bool is_null() {
+ return features.size()==0 /* && sentence.size()==0 */;
+ }
+
+};
+
+struct Oracles {
+ bool is_null() {
+ return model.is_null() /* && fear.is_null() && hope.is_null() */;
+ }
+ Translation model,fear,hope;
+ // feature 0 will be the error rate in fear and hope
+ // move toward hope
+ WeightVector ModelHopeGradient() {
+ WeightVector r=hope-model;
+ r[0]=0;
+ return r;
+ }
+ // move toward hope from fear
+ WeightVector FearHopeGradient() {
+ Weightvector r=hope-fear;
+ r[0]=0;
+ return r;
+ }
};
@@ -53,6 +77,7 @@ struct OracleBleu {
opts->add_options()
("references,R", value<Refs >(), "Translation reference files")
("oracle_loss", value<string>(), "IBM_BLEU_3 (default), IBM_BLEU etc")
+ ("bleu_weight", value<double>()->default_value(1.), "weight to give the hope/fear loss function vs. model score")
;
}
int order;
@@ -66,17 +91,20 @@ struct OracleBleu {
double doc_src_length;
void set_oracle_doc_size(int size) {
oracle_doc_size=size;
- scale_oracle= 1-1./oracle_doc_size;\
+ scale_oracle= 1-1./oracle_doc_size;
doc_src_length=0;
}
OracleBleu(int doc_size=10) {
set_oracle_doc_size(doc_size);
}
- boost::shared_ptr<Score> doc_score,sentscore; // made from factory, so we delete them
+ typedef boost::shared_ptr<Score> ScoreP;
+ ScoreP doc_score,sentscore; // made from factory, so we delete them
+ double bleu_weight;
void UseConf(boost::program_options::variables_map const& conf) {
using namespace std;
+ bleu_weight=conf["bleu_weight"].as<double>();
set_loss(conf["oracle_loss"].as<string>());
set_refs(conf["references"].as<Refs>());
}
@@ -108,21 +136,48 @@ struct OracleBleu {
ViterbiFSentence(forest,&srcsent);
SentenceMetadata sm(sent_id,Lattice()); //TODO: make reference from refs?
sm.SetSourceLength(srcsent.size());
+ smeta.SetScore(doc_score.get());
+ smeta.SetDocScorer(&ds);
+ smeta.SetDocLen(doc_src_length);
return sm;
}
- void Rescore(SentenceMetadata & smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0) {
- Translation model_trans(forest);
- sentscore.reset(ds[smeta.GetSentenceID()]->ScoreCandidate(model_trans.sentence));
+ Oracles ComputeOracles(SentenceMetadata & smeta,Hypergraph const& forest,WeightVector const& feature_weights,std::ostream *log=0,unsigned kbest=0,std::string const& forest_output="") {
+ Oracles r;
+ int sent_id=smeta.GetSentenceID();
+ r.model=Translation(forest);
+
+ if (kbest) DumpKBest("model",sent_id, forest, kbest, true, forest_output);
+ {
+ Timer t("Forest Oracle rescoring:");
+ Hypergraph oracle_forest;
+ Rescore(smeta,forest,&oracle_forest,feature_weights,blue_weight,log);
+ forest.swap(oracle_forest);
+ }
+ r.hope=Translation(forest);
+ if (kbest) DumpKBest("oracle",sent_id, forest, kbest, true, forest_output);
+ oracle.ReweightBleu(&forest,-blue_weight);
+ r.fear=Translation(forest);
+ if (kbest) DumpKBest("negative",sent_id, forest, kbest, true, forest_output);
+ return r;
+ }
+
+ ScoreP Score(Sentence const& sentence,int sent_id) {
+ return ds[sent_id]->ScoreCandidate(sentence);
+ }
+ ScoreP Score(Hypergraph const& forest,int sent_id) {
+ return Score(model_trans(forest).translation,sent_id);
+ }
+
+ void Rescore(SentenceMetadata & smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0,std::ostream *log=&std::cerr) {
+ // the sentence bleu stats will get added to doc only if you call IncludeLastScore
+ sentscore=Score(forest,smeta.GetSentenceID());
if (!doc_score) { doc_score.reset(sentscore->GetOne()); }
tmp_src_length = smeta.GetSourceLength(); //TODO: where does this come from?
- smeta.SetScore(doc_score.get());
- smeta.SetDocLen(doc_src_length);
- smeta.SetDocScorer(&ds);
using namespace std;
- ModelSet oracle_models(FeatureWeights(bleu_weight,1),vector<FeatureFunction const*>(1,pff.get()));
+ ModelSet oracle_models(WeightVector(bleu_weight,1),vector<FeatureFunction const*>(1,pff.get()));
const IntersectionConfiguration inter_conf_oracle(0, 0);
- cerr << "Going to call Apply Model " << endl;
+ if (log) *log << "Going to call Apply Model " << endl;
ApplyModelSet(forest,
smeta,
oracle_models,
@@ -190,10 +245,10 @@ struct OracleBleu {
}
}
- void DumpKBest(boost::program_options::variables_map const& conf,std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique)
+void DumpKBest(boost::program_options::variables_map const& conf,std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output)
{
std::ostringstream kbest_string_stream;
- kbest_string_stream << conf["forest_output"].as<std::string>() << "/kbest_"<<suffix<< "." << sent_id;
+ kbest_string_stream << forest_output << "/kbest_"<<suffix<< "." << sent_id;
DumpKBest(sent_id, forest, k, unique, kbest_string_stream.str());
}
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index bda10974..c6c57150 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -20,10 +20,21 @@ public:
SparseVector() {}
explicit SparseVector(std::vector<T> const& v) {
typename MapType::iterator p=values_.begin();
- for (unsigned i=0;i<v.size();++i)
- p=values_.insert(p,typename MapType::value_type(i,v[i])); //faster
+ const T z=T(0);
+ for (unsigned i=0;i<v.size();++i) {
+ T const& t=v[i];
+ if (t!=z)
+ p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster
+ }
+
+ }
+
+ void set_new_value(int index, T const& val) {
+ assert(values_.find(index)==values_.end());
+ values_[index]=val;
}
+
const T operator[](int index) const {
typename MapType::const_iterator found = values_.find(index);
if (found == values_.end())
@@ -265,9 +276,29 @@ private:
MapType values_;
};
+// doesn't support fast indexing directly
+template <class T>
+class SparseVectorList {
+ typedef std::vector<const int,T> ListType;
+ typedef typename ListType::value_type pair_type;
+ typedef typename ListType::const_iterator const_iterator;
+ SparseVectorList() { }
+ explicit SparseVectorList(std::vector<T> const& v) {
+ const T z=T(0);
+ for (unsigned i=0;i<v.size();++i) {
+ T const& t=v[i];
+ if (t!=z)
+ p.push_back(pair_type(i,t));
+ }
+ p.resize(p.size());
+ }
+private:
+ ListType p;
+};
+
+
typedef SparseVector<double> FeatureVector;
-typedef std::vector<double> FeatureWeights;
-typedef FeatureWeights WeightVector;
+typedef SparseVector<double> WeightVector;
template <typename T>
SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
diff --git a/decoder/value_array.h b/decoder/value_array.h
index bfdd1155..7401938a 100755
--- a/decoder/value_array.h
+++ b/decoder/value_array.h
@@ -1,12 +1,12 @@
#ifndef VALUE_ARRAY_H
#define VALUE_ARRAY_H
-# include <cstdlib>
-# include <algorithm>
-# include <new>
-# include <boost/range.hpp>
-# include <boost/utility/enable_if.hpp>
-# include <boost/type_traits.hpp>
+#include <cstdlib>
+#include <algorithm>
+#include <new>
+#include <boost/range.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/type_traits.hpp>
#ifdef USE_BOOST_SERIALIZE
# include <boost/serialization/split_member.hpp>
# include <boost/serialization/access.hpp>
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index f11b77ec..7719de32 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -116,7 +116,7 @@ inline bool close_enough(double a,double b,double epsilon)
return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b);
}
-FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights,bool fatal_dotprod_disagreement) {
+FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) {
FeatureVector r;
const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r);
if (weights) {
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index 4697590b..388bff3c 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -205,6 +205,6 @@ int ViterbiELength(const Hypergraph& hg);
int ViterbiPathLength(const Hypergraph& hg);
/// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not). return features (sum over all edges in viterbi derivation)
-FeatureVector ViterbiFeatures(Hypergraph const& hg,FeatureWeights const* weights=0,bool fatal_dotprod_disagreement=false);
+FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false);
#endif
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index e9a5650b..677c0497 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -84,16 +84,16 @@ struct oracle_directions {
OracleBleu::AddOptions(&opts);
opts.add_options()
("dev_set_size,s",po::value<unsigned>(&dev_set_size),"[REQD] Development set size (# of parallel sentences)")
- ("forest_repository,r",po::value<string>(),"[REQD] Path to forest repository")
- ("weights,w",po::value<string>(),"[REQD] Current feature weights file")
+ ("forest_repository,r",po::value<string>(&forest_repository),"[REQD] Path to forest repository")
+ ("weights,w",po::value<string>(&weights_file),"[REQD] Current feature weights file")
("optimize_feature,o",po::value<vector<string> >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)")
- ("random_directions,d",po::value<unsigned int>()->default_value(20),"Number of random directions to run the line optimizer in")
+ ("random_directions,d",po::value<unsigned>(&random_directions)->default_value(10),"Number of random directions to run the line optimizer in")
("no_primary,n","don't use the primary (orthogonal each feature alone) directions")
- ("oracle_directions,O",po::value<unsigned>()->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.")
+ ("oracle_directions,O",po::value<unsigned>(&n_oracle)->default_value(0),"read the forests and choose this many directions based on heading toward a hope max (bleu+modelscore) translation.")
("oracle_start_random",po::bool_switch(&start_random),"sample random subsets of dev set for ALL oracle directions, not just those after a sequential run through it")
- ("oracle_batch,b",po::value<unsigned>()->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences")
- ("max_similarity,m",po::value<double>()->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?")
- ("fear_to_hope,f","for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)")
+ ("oracle_batch,b",po::value<unsigned>(&oracle_batch)->default_value(10),"to produce each oracle direction, sum the 'gradient' over this many sentences")
+ ("max_similarity,m",po::value<double>(&max_similarity)->default_value(0),"remove directions that are too similar (Tanimoto coeff. less than (1-this)). 0 means don't filter, 1 means only 1 direction allowed?")
+ ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)")
("help,h", "Help");
po::options_description dcmdline_options;
dcmdline_options.add(opts);
@@ -139,16 +139,20 @@ struct oracle_directions {
oracle.UseConf(conf);
include_primary=!conf.count("no_primary");
+ old_to_hope=!conf.count("no_old_to_hope");
+
if (conf.count("optimize_feature") > 0)
optimize_features=conf["optimize_feature"].as<vector<string> >();
- fear_to_hope=conf.count("fear_to_hope");
- n_random=conf["random_directions"].as<unsigned int>();
- forest_repository=conf["forest_repository"].as<string>();
+
+ // po::value<X>(&var) takes care of below:
+// fear_to_hope=conf.count("fear_to_hope");
+// n_random=conf["random_directions"].as<unsigned int>();
+// forest_repository=conf["forest_repository"].as<string>();
// dev_set_size=conf["dev_set_size"].as<unsigned int>();
- n_oracle=conf["oracle_directions"].as<unsigned>();
- oracle_batch=conf["oracle_batch"].as<unsigned>();
- max_similarity=conf["max_similarity"].as<double>();
- weights_file=conf["weights"].as<string>();
+// n_oracle=conf["oracle_directions"].as<unsigned>();
+// oracle_batch=conf["oracle_batch"].as<unsigned>();
+// max_similarity=conf["max_similarity"].as<double>();
+// weights_file=conf["weights"].as<string>();
Init();
}
@@ -158,7 +162,7 @@ struct oracle_directions {
unsigned n_oracle, oracle_batch;
string forest_repository;
unsigned dev_set_size;
- vector<Dir> dirs; //best_to_hope_dirs
+ vector<Oracle> oracles;
vector<int> fids;
string forest_file(unsigned i) const {
ostringstream o;
@@ -178,6 +182,7 @@ struct oracle_directions {
weights.InitSparseVector(&origin);
fids.clear();
AddFeatureIds(features);
+ oracles.resize(dev_set_size);
}
Weights weights;
@@ -189,26 +194,42 @@ struct oracle_directions {
}
- Dir const& operator[](unsigned i) {
- Dir &dir=dirs[i];
- if (dir.empty()) {
+ //TODO: is it worthwhile to get a complete document bleu first? would take a list of 1best translations one per line from the decoders, rather than loading all the forests (expensive)
+ Oracle const& ComputeOracle(unsigned i) {
+ Oracle &o=oracles[i];
+ if (o.is_null()) {
ReadFile rf(forest_file(i));
- FeatureVector fear,hope,best;
- //TODO: get hope/oracle from vlad. random for now.
- LineOptimizer::RandomUnitVector(fids,&dir,&rng);
+ Hypergraph hg;
+ {
+ Timer t("Loading forest from JSON "+forest_file(i));
+ HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+ }
+ o=oracle.ComputeOracles(MakeMetadata(hg,i),hg,origin,&cerr);
}
- return dir;
+ return o;
}
+
// if start_random is true, immediately sample w/ replacement from src sentences; otherwise, consume them sequentially until exhausted, then random. oracle vectors are summed
void AddOracleDirections() {
MT19937::IntRNG rsg=rng.inclusive(0,dev_set_size-1);
unsigned b=0;
for(unsigned i=0;i<n_oracle;++i) {
- directions.push_back(Dir());
- Dir &d=directions.back();
- for (unsigned j=0;j<oracle_batch;++j,++b)
- d+=(*this)[(start_random || b>=dev_set_size)?rsg():b];
- d/=(double)oracle_batch;
+ Dir o2hope;
+ Dir fear2hope;
+ for (unsigned j=0;j<oracle_batch;++j,++b) {
+ Oracle const& o=ComputeOracle((start_random||b>=dev_set_size) ? rsg() : b);
+
+ o2hope+=o.ModelHopeGradient();
+ if (fear_to_hope)
+ fear2hope+=o.FearHopeGradient();
+ }
+ double N=(double)oracle_batch;
+ o2hope/=N;
+ directions.push_back(o2hope);
+ if (fear_to_hope) {
+ fear2hope/=N;
+ directions.push_back(fear2hope);
+ }
}
}
};