diff options
Diffstat (limited to 'dtrain')
-rw-r--r-- | dtrain/Makefile.am | 2 | ||||
-rw-r--r-- | dtrain/dtrain.cc | 26 | ||||
-rw-r--r-- | dtrain/hgsampler.cc | 75 | ||||
-rw-r--r-- | dtrain/hgsampler.h | 30 | ||||
-rw-r--r-- | dtrain/kbestget.h | 4 | ||||
-rw-r--r-- | dtrain/ksampler.h | 6 | ||||
-rw-r--r-- | dtrain/test/example/dtrain.ini | 6 |
7 files changed, 28 insertions, 121 deletions
diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am index baf6883a..471977e1 100644 --- a/dtrain/Makefile.am +++ b/dtrain/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = dtrain -dtrain_SOURCES = dtrain.cc score.cc hgsampler.cc +dtrain_SOURCES = dtrain.cc score.cc dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -O3 diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 5c95c7f1..79047fd9 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -19,12 +19,13 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*") ("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences") ("print_weights", po::value<string>(), "weights to print on each iteration") - ("hstreaming", po::value<string>()->default_value("N/A"), "run in hadoop streaming mode, arg is a task id") + ("hstreaming", po::value<string>(), "run in hadoop streaming mode, arg is a task id") ("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate") ("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)") ("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use") ("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)") ("keep_w", po::value<bool>()->zero_tokens(), "protocol weights for each iteration") + ("unit_weight_vector", po::value<bool>()->zero_tokens(), "Rescale weight vector after each input") #ifdef DTRAIN_LOCAL ("refs,r", po::value<string>(), "references in local mode") #endif @@ -46,7 +47,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) return false; } if (cfg->count("hstreaming") && (*cfg)["output"].as<string>() != "-") { - cerr << "When using 'hstreaming' the 'output' param should be '-'."; + cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl; return false; } #ifdef DTRAIN_LOCAL @@ -98,6 +99,8 @@ main(int argc, char** argv) task_id = cfg["hstreaming"].as<string>(); cerr.precision(17); } + bool unit_weight_vector = false; + if (cfg.count("unit_weight_vector")) unit_weight_vector = true; HSReporter rep(task_id); bool keep_w = false; if (cfg.count("keep_w")) keep_w = true; @@ -226,7 +229,7 @@ main(int argc, char** argv) #endif score_t score_sum = 0.; score_t model_sum(0); - unsigned ii = 0, nup = 0, npairs = 0; + unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0; if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; while(true) @@ -369,21 +372,25 @@ main(int argc, char** argv) if (rank_error > 0) { SparseVector<weight_t> diff_vec = it->second.f - it->first.f; lambdas.plus_eq_v_times_s(diff_vec, eta); - nup++; + rank_errors++; } + if (margin < 1) margin_violations++; } else { // SVM score_t margin = it->first.model - it->second.model; if (rank_error > 0 || margin < 1) { SparseVector<weight_t> diff_vec = it->second.f - it->first.f; lambdas.plus_eq_v_times_s(diff_vec, eta); - nup++; + if (rank_error > 0) rank_errors++; + if (margin < 1) margin_violations++; } // regularization lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); } } } + + if (unit_weight_vector && sample_from == "forest") lambdas /= lambdas.l2norm(); ++ii; @@ -437,15 +444,18 @@ main(int argc, char** argv) cerr << _p << " (" << model_diff << ")" << endl; cerr << " avg #pairs: "; cerr << _np << npairs/(float)in_sz << endl; - cerr << " avg #up: "; - cerr << nup/(float)in_sz << endl; + cerr << " avg #rank err: "; + cerr << rank_errors/(float)in_sz << endl; + cerr << " avg #margin viol: "; + cerr << margin_violations/float(in_sz) << endl; } if (hstreaming) { rep.update_counter("Score 1best avg #"+boost::lexical_cast<string>(t+1), score_avg); rep.update_counter("Model 1best avg #"+boost::lexical_cast<string>(t+1), model_avg); rep.update_counter("Pairs avg #"+boost::lexical_cast<string>(t+1), npairs/(weight_t)in_sz); - rep.update_counter("Updates avg #"+boost::lexical_cast<string>(t+1), nup/(weight_t)in_sz); + rep.update_counter("Rank errors avg #"+boost::lexical_cast<string>(t+1), rank_errors/(weight_t)in_sz); + rep.update_counter("Margin violations avg #"+boost::lexical_cast<string>(t+1), margin_violations/(weight_t)in_sz); unsigned nonz = (unsigned)lambdas.size_nonzero(); rep.update_counter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz); rep.update_gcounter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz); diff --git a/dtrain/hgsampler.cc b/dtrain/hgsampler.cc deleted file mode 100644 index ad28b162..00000000 --- a/dtrain/hgsampler.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Chris Dyer -#include "hgsampler.h" - -#include <queue> - -#include "viterbi.h" -#include "inside_outside.h" - -using namespace std; - -struct SampledDerivationWeightFunction { - typedef double Weight; - explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {} - double operator()(const Hypergraph::Edge& e) const { - return static_cast<double>(sampled_edges[e.id_]); - } - const vector<bool>& sampled_edges; -}; - -void HypergraphSampler::sample_hypotheses(const Hypergraph& hg, - unsigned n, - MT19937* rng, - vector<Hypothesis>* hypos) { - hypos->clear(); - hypos->resize(n); - - // compute inside probabilities - vector<prob_t> node_probs; - Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb()); - - vector<bool> sampled_edges(hg.edges_.size()); - queue<unsigned> q; - SampleSet<prob_t> ss; - for (unsigned i = 0; i < n; ++i) { - fill(sampled_edges.begin(), sampled_edges.end(), false); - // sample derivation top down - assert(q.empty()); - Hypothesis& hyp = (*hypos)[i]; - SparseVector<double>& deriv_features = hyp.fmap; - q.push(hg.nodes_.size() - 1); - prob_t& model_score = hyp.model_score; - model_score = prob_t::One(); - while(!q.empty()) { - unsigned cur_node_id = q.front(); - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge_idx = 0; - if (num_in_edges == 1) { - sampled_edge_idx = node.in_edges_[0]; - } else { - assert(num_in_edges > 1); - ss.clear(); - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = edge.edge_prob_; // edge weight - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight - ss.add(p); - } - sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_edges[sampled_edge_idx] = true; - const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx]; - deriv_features += sampled_edge.feature_values_; - model_score *= sampled_edge.edge_prob_; - //sampled_deriv->push_back(sampled_edge_idx); - for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) { - q.push(sampled_edge.tail_nodes_[j]); - } - } - Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges)); - } -} - diff --git a/dtrain/hgsampler.h b/dtrain/hgsampler.h deleted file mode 100644 index 45c5b8f2..00000000 --- a/dtrain/hgsampler.h +++ /dev/null @@ -1,30 +0,0 @@ -// Chris Dyer -#ifndef _DTRAIN_HGSAMPLER_H_ -#define _DTRAIN_HGSAMPLER_H_ - - -#include <vector> -#include "sparse_vector.h" -#include "sampler.h" -#include "wordid.h" - -class Hypergraph; - -struct HypergraphSampler { - - struct Hypothesis { - std::vector<WordID> words; - SparseVector<double> fmap; - prob_t model_score; - }; - - static void - sample_hypotheses(const Hypergraph& hg, - unsigned n, - MT19937* rng, - std::vector<Hypothesis>* hypos); -}; - - -#endif - diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h index abe657d0..88f8bc17 100644 --- a/dtrain/kbestget.h +++ b/dtrain/kbestget.h @@ -78,13 +78,13 @@ struct KBestGetter : public HypSampler virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - KBest(*hg); + KBestScored(*hg); } vector<ScoredHyp>* GetSamples() { return &s_; } void - KBest(const Hypergraph& forest) + KBestScored(const Hypergraph& forest) { if (filter_type_ == "unique") { KBestUnique(forest); diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h index 276f2cc9..8b1c09f2 100644 --- a/dtrain/ksampler.h +++ b/dtrain/ksampler.h @@ -1,7 +1,7 @@ #ifndef _DTRAIN_KSAMPLER_H_ #define _DTRAIN_KSAMPLER_H_ -#include "hgsampler.h" +#include "hg_sampler.h" // cdec #include "kbestget.h" #include "score.h" @@ -22,12 +22,12 @@ struct KSampler : public HypSampler virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - Sample(*hg); + ScoredSamples(*hg); } vector<ScoredHyp>* GetSamples() { return &s_; } - void Sample(const Hypergraph& forest) { + void ScoredSamples(const Hypergraph& forest) { s_.clear(); std::vector<HypergraphSampler::Hypothesis> samples; HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index e8a20759..95eeb8e5 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,14 +1,16 @@ decoder_config=test/example/cdec.ini k=100 N=3 -gamma=0 +gamma=0.001 epochs=20 input=test/example/nc-1k-tabs.gz scorer=stupid_bleu output=weights.gz stop_after=10 -sample_from=kbest +sample_from=forest pair_sampling=108010 select_weights=VOID print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough tmp=/tmp +unit_weight_vector=true +keep_w=true |