summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/cdec.cc11
-rw-r--r--decoder/ff_bleu.cc4
-rwxr-xr-xdecoder/oracle_bleu.h37
-rw-r--r--vest/mr_vest_generate_mapper_input.cc37
-rw-r--r--vest/scorer.cc2
5 files changed, 56 insertions, 35 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 8827cce3..a9c1cb3b 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -91,7 +91,7 @@ void print_options(std::ostream &out,po::options_description const& opts) {
}
-void InitCommandLine(int argc, char** argv, po::variables_map* confp) {
+void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* confp) {
po::variables_map &conf=*confp;
po::options_description opts("Configuration options");
opts.add_options()
@@ -151,7 +151,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) {
("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)")
("forest_output,O",po::value<string>(),"Directory to write forests to")
("minimal_forests,m","Write minimal forests (excludes Rule information). Such forests can be used for ML/MAP training, but not rescoring, etc.");
- OracleBleu::AddOptions(&opts);
+ ob.AddOptions(&opts);
po::options_description clo("Command line options");
clo.add_options()
("config,c", po::value<string>(), "Configuration file")
@@ -206,6 +206,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) {
cerr << dcmdline_options << endl;
exit(1);
}
+ po::notify(conf);
+
}
// TODO move out of cdec into some sampling decoder file
@@ -358,7 +360,9 @@ int main(int argc, char** argv) {
global_ff_registry.reset(new FFRegistry);
register_feature_functions();
po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
+ OracleBleu oracle;
+
+ InitCommandLine(argc, argv, oracle, &conf);
const bool write_gradient = conf.count("cll_gradient");
const bool feature_expectations = conf.count("feature_expectations");
if (write_gradient && feature_expectations) {
@@ -488,7 +492,6 @@ int main(int argc, char** argv) {
const bool crf_uniform_empirical = conf.count("crf_uniform_empirical");
const bool get_oracle_forest = conf.count("get_oracle_forest");
- OracleBleu oracle;
if (get_oracle_forest)
oracle.UseConf(conf);
diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc
index ab61ed10..f8d62aa2 100644
--- a/decoder/ff_bleu.cc
+++ b/decoder/ff_bleu.cc
@@ -243,8 +243,8 @@ BLEUModel::BLEUModel(const string& param) :
//loop over argv and load all references into vector of NgramMaps
if (argc >= 1) {
- if (argv[1] != "-o" || argc<2) {
- cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<bleu_usage_verbose<<endl;
+ if (argv[0] != "-o" || argc<2) {
+ cerr<<bleu_usage_name<<" specification should be: "<<bleu_usage_short<<"; you provided: "<<param<<endl<<argv[0]<<endl<<bleu_usage_verbose<<endl;
abort();
} else
order=boost::lexical_cast<int>(argv[1]);
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index cc19fbca..07d83b7f 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -73,13 +73,13 @@ struct OracleBleu {
WeightVector feature_weights_;
DocScorer ds;
- static void AddOptions(boost::program_options::options_description *opts) {
+ void AddOptions(boost::program_options::options_description *opts) {
using namespace boost::program_options;
using namespace std;
opts->add_options()
- ("references,R", value<Refs >(), "Translation reference files")
- ("oracle_loss", value<string>(), "IBM_BLEU_3 (default), IBM_BLEU etc")
- ("bleu_weight", value<double>()->default_value(1.), "weight to give the hope/fear loss function vs. model score")
+ ("references,R", value<Refs >(&refs), "Translation reference files")
+ ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc")
+ ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score")
;
}
int order;
@@ -110,28 +110,41 @@ struct OracleBleu {
}
double bleu_weight;
- void UseConf(boost::program_options::variables_map const& conf) {
+ // you have to call notify(conf) yourself, once, in main or similar
+ void UseConf(boost::program_options::variables_map const& /* conf */) {
using namespace std;
- bleu_weight=conf["bleu_weight"].as<double>();
- set_loss(conf["oracle_loss"].as<string>());
- set_refs(conf["references"].as<Refs>());
+ // bleu_weight=conf["bleu_weight"].as<double>();
+ //set_loss(conf["oracle_loss"].as<string>());
+ //set_refs(conf["references"].as<Refs>());
+ init_loss();
+ init_refs();
}
ScoreType loss;
-// std::string loss_name;
+ std::string loss_name;
boost::shared_ptr<FeatureFunction> pff;
- void set_loss(std::string const& lossd="IBM_BLEU_3") {
-// loss_name=lossd;
- loss=ScoreTypeFromString(lossd);
+ void set_loss(std::string const& lossd) {
+ loss_name=lossd;
+ init_loss();
+ }
+ void init_loss() {
+ loss=ScoreTypeFromString(loss_name);
order=(loss==IBM_BLEU_3)?3:4;
std::ostringstream param;
param<<"-o "<<order;
pff=global_ff_registry->Create("BLEUModel",param.str());
}
+ bool is_null() const {
+ return refs.empty();
+ }
void set_refs(Refs const& r) {
refs=r;
+ init_refs();
+ }
+ void init_refs() {
+ if (is_null()) return;
assert(refs.size());
ds=DocScorer(loss,refs);
doc_score.reset();
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index 50e620f8..ac826ecf 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -33,13 +33,16 @@ typedef SparseVector<double> Dir;
typedef Dir Point;
-void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true) {
+void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool avg=true,bool verbose=true) {
+ // return; //TODO: debug
if (min_dist<=0) return;
double max_s=1.-min_dist;
+ if (log&&verbose) *log<<"max allowed S="<<max_s<<' ';
unsigned N=dirs.size();
for (int i=0;i<N;++i) {
for (int j=i+1;j<N;++j) {
double s=dirs[i].tanimoto_coef(dirs[j]);
+ if (log&&verbose) *log<<"S["<<i<<","<<j<<"]="<<s<<' ';
if (s>max_s) {
if (log) *log << "Collapsing similar directions (T="<<s<<" > "<<max_s<<"). dirs["<<i<<"]="<<dirs[i]<<" dirs["<<j<<"]";
if (avg) {
@@ -51,6 +54,8 @@ void compress_similar(vector<Dir> &dirs,double min_dist,ostream *log=&cerr,bool
swap(dirs[j],dirs[--N]);
}
}
+ if (log&&verbose) *log<<endl;
+
}
dirs.resize(N);
}
@@ -118,10 +123,14 @@ struct oracle_directions {
cerr << dcmdline_options << endl;
exit(1);
}
+ po::notify(*conf);
+
+ if (0) {
dev_set_size = (*conf)["dev_set_size"].as<unsigned>();
forest_repository = (*conf)["forest_repository"].as<string>();
weights_file = (*conf)["weights"].as<string>();
n_random = (*conf)["random_directions"].as<unsigned>();
+ }
}
int main(int argc, char *argv[]) {
@@ -134,8 +143,8 @@ struct oracle_directions {
void Run() {
AddPrimaryAndRandomDirections();
- //AddOracleDirections();
- //compress_similar(directions,max_similarity);
+ AddOracleDirections();
+ compress_similar(directions,max_similarity);
Print();
}
@@ -143,25 +152,21 @@ struct oracle_directions {
Point origin; // old weights that gave model 1best.
vector<string> optimize_features;
void UseConf(po::variables_map const& conf) {
-#if 0
oracle.UseConf(conf);
-
+ // po::value<X>(&var) takes care of below:
+ // fear_to_hope=conf.count("fear_to_hope");
+ // n_random=conf["random_directions"].as<unsigned int>();
+ // forest_repository=conf["forest_repository"].as<string>();
+ // dev_set_size=conf["dev_set_size"].as<unsigned int>();
+ // n_oracle=conf["oracle_directions"].as<unsigned>();
+ // oracle_batch=conf["oracle_batch"].as<unsigned>();
+ // max_similarity=conf["max_similarity"].as<double>();
+ // weights_file=conf["weights"].as<string>();
include_primary=!conf.count("no_primary");
old_to_hope=!conf.count("no_old_to_hope");
if (conf.count("optimize_feature") > 0)
optimize_features=conf["optimize_feature"].as<vector<string> >();
-
- // po::value<X>(&var) takes care of below:
-// fear_to_hope=conf.count("fear_to_hope");
-// n_random=conf["random_directions"].as<unsigned int>();
-// forest_repository=conf["forest_repository"].as<string>();
-// dev_set_size=conf["dev_set_size"].as<unsigned int>();
-// n_oracle=conf["oracle_directions"].as<unsigned>();
-// oracle_batch=conf["oracle_batch"].as<unsigned>();
-// max_similarity=conf["max_similarity"].as<double>();
-// weights_file=conf["weights"].as<string>();
-#endif
Init();
}
diff --git a/vest/scorer.cc b/vest/scorer.cc
index 8f981af6..30015bb2 100644
--- a/vest/scorer.cc
+++ b/vest/scorer.cc
@@ -44,7 +44,7 @@ ScoreType ScoreTypeFromString(const string& st) {
return Koehn_BLEU;
if (sl == "combi")
return BLEU_minus_TER_over_2;
- cerr << "Don't understand score type '" << sl << "', defaulting to ibm_bleu.\n";
+ cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";
return IBM_BLEU;
}