summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/Makefile.am2
-rw-r--r--dtrain/dtrain.cc26
-rw-r--r--dtrain/hgsampler.cc75
-rw-r--r--dtrain/hgsampler.h30
-rw-r--r--dtrain/kbestget.h4
-rw-r--r--dtrain/ksampler.h6
-rw-r--r--dtrain/test/example/dtrain.ini6
7 files changed, 28 insertions, 121 deletions
diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am
index baf6883a..471977e1 100644
--- a/dtrain/Makefile.am
+++ b/dtrain/Makefile.am
@@ -1,6 +1,6 @@
bin_PROGRAMS = dtrain
-dtrain_SOURCES = dtrain.cc score.cc hgsampler.cc
+dtrain_SOURCES = dtrain.cc score.cc
dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -O3
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 5c95c7f1..79047fd9 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -19,12 +19,13 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*")
("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences")
("print_weights", po::value<string>(), "weights to print on each iteration")
- ("hstreaming", po::value<string>()->default_value("N/A"), "run in hadoop streaming mode, arg is a task id")
+ ("hstreaming", po::value<string>(), "run in hadoop streaming mode, arg is a task id")
("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate")
("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)")
("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use")
("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")
("keep_w", po::value<bool>()->zero_tokens(), "protocol weights for each iteration")
+ ("unit_weight_vector", po::value<bool>()->zero_tokens(), "Rescale weight vector after each input")
#ifdef DTRAIN_LOCAL
("refs,r", po::value<string>(), "references in local mode")
#endif
@@ -46,7 +47,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
return false;
}
if (cfg->count("hstreaming") && (*cfg)["output"].as<string>() != "-") {
- cerr << "When using 'hstreaming' the 'output' param should be '-'.";
+ cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl;
return false;
}
#ifdef DTRAIN_LOCAL
@@ -98,6 +99,8 @@ main(int argc, char** argv)
task_id = cfg["hstreaming"].as<string>();
cerr.precision(17);
}
+ bool unit_weight_vector = false;
+ if (cfg.count("unit_weight_vector")) unit_weight_vector = true;
HSReporter rep(task_id);
bool keep_w = false;
if (cfg.count("keep_w")) keep_w = true;
@@ -226,7 +229,7 @@ main(int argc, char** argv)
#endif
score_t score_sum = 0.;
score_t model_sum(0);
- unsigned ii = 0, nup = 0, npairs = 0;
+ unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0;
if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
while(true)
@@ -369,21 +372,25 @@ main(int argc, char** argv)
if (rank_error > 0) {
SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
lambdas.plus_eq_v_times_s(diff_vec, eta);
- nup++;
+ rank_errors++;
}
+ if (margin < 1) margin_violations++;
} else {
// SVM
score_t margin = it->first.model - it->second.model;
if (rank_error > 0 || margin < 1) {
SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
lambdas.plus_eq_v_times_s(diff_vec, eta);
- nup++;
+ if (rank_error > 0) rank_errors++;
+ if (margin < 1) margin_violations++;
}
// regularization
lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
}
}
}
+
+ if (unit_weight_vector && sample_from == "forest") lambdas /= lambdas.l2norm();
++ii;
@@ -437,15 +444,18 @@ main(int argc, char** argv)
cerr << _p << " (" << model_diff << ")" << endl;
cerr << " avg #pairs: ";
cerr << _np << npairs/(float)in_sz << endl;
- cerr << " avg #up: ";
- cerr << nup/(float)in_sz << endl;
+ cerr << " avg #rank err: ";
+ cerr << rank_errors/(float)in_sz << endl;
+ cerr << " avg #margin viol: ";
+ cerr << margin_violations/float(in_sz) << endl;
}
if (hstreaming) {
rep.update_counter("Score 1best avg #"+boost::lexical_cast<string>(t+1), score_avg);
rep.update_counter("Model 1best avg #"+boost::lexical_cast<string>(t+1), model_avg);
rep.update_counter("Pairs avg #"+boost::lexical_cast<string>(t+1), npairs/(weight_t)in_sz);
- rep.update_counter("Updates avg #"+boost::lexical_cast<string>(t+1), nup/(weight_t)in_sz);
+ rep.update_counter("Rank errors avg #"+boost::lexical_cast<string>(t+1), rank_errors/(weight_t)in_sz);
+ rep.update_counter("Margin violations avg #"+boost::lexical_cast<string>(t+1), margin_violations/(weight_t)in_sz);
unsigned nonz = (unsigned)lambdas.size_nonzero();
rep.update_counter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz);
rep.update_gcounter("Non zero feature count #"+boost::lexical_cast<string>(t+1), nonz);
diff --git a/dtrain/hgsampler.cc b/dtrain/hgsampler.cc
deleted file mode 100644
index ad28b162..00000000
--- a/dtrain/hgsampler.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Chris Dyer
-#include "hgsampler.h"
-
-#include <queue>
-
-#include "viterbi.h"
-#include "inside_outside.h"
-
-using namespace std;
-
-struct SampledDerivationWeightFunction {
- typedef double Weight;
- explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {}
- double operator()(const Hypergraph::Edge& e) const {
- return static_cast<double>(sampled_edges[e.id_]);
- }
- const vector<bool>& sampled_edges;
-};
-
-void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
- unsigned n,
- MT19937* rng,
- vector<Hypothesis>* hypos) {
- hypos->clear();
- hypos->resize(n);
-
- // compute inside probabilities
- vector<prob_t> node_probs;
- Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
-
- vector<bool> sampled_edges(hg.edges_.size());
- queue<unsigned> q;
- SampleSet<prob_t> ss;
- for (unsigned i = 0; i < n; ++i) {
- fill(sampled_edges.begin(), sampled_edges.end(), false);
- // sample derivation top down
- assert(q.empty());
- Hypothesis& hyp = (*hypos)[i];
- SparseVector<double>& deriv_features = hyp.fmap;
- q.push(hg.nodes_.size() - 1);
- prob_t& model_score = hyp.model_score;
- model_score = prob_t::One();
- while(!q.empty()) {
- unsigned cur_node_id = q.front();
- q.pop();
- const Hypergraph::Node& node = hg.nodes_[cur_node_id];
- const unsigned num_in_edges = node.in_edges_.size();
- unsigned sampled_edge_idx = 0;
- if (num_in_edges == 1) {
- sampled_edge_idx = node.in_edges_[0];
- } else {
- assert(num_in_edges > 1);
- ss.clear();
- for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
- prob_t p = edge.edge_prob_; // edge weight
- for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
- p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight
- ss.add(p);
- }
- sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
- }
- sampled_edges[sampled_edge_idx] = true;
- const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
- deriv_features += sampled_edge.feature_values_;
- model_score *= sampled_edge.edge_prob_;
- //sampled_deriv->push_back(sampled_edge_idx);
- for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
- q.push(sampled_edge.tail_nodes_[j]);
- }
- }
- Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
- }
-}
-
diff --git a/dtrain/hgsampler.h b/dtrain/hgsampler.h
deleted file mode 100644
index 45c5b8f2..00000000
--- a/dtrain/hgsampler.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Chris Dyer
-#ifndef _DTRAIN_HGSAMPLER_H_
-#define _DTRAIN_HGSAMPLER_H_
-
-
-#include <vector>
-#include "sparse_vector.h"
-#include "sampler.h"
-#include "wordid.h"
-
-class Hypergraph;
-
-struct HypergraphSampler {
-
- struct Hypothesis {
- std::vector<WordID> words;
- SparseVector<double> fmap;
- prob_t model_score;
- };
-
- static void
- sample_hypotheses(const Hypergraph& hg,
- unsigned n,
- MT19937* rng,
- std::vector<Hypothesis>* hypos);
-};
-
-
-#endif
-
diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h
index abe657d0..88f8bc17 100644
--- a/dtrain/kbestget.h
+++ b/dtrain/kbestget.h
@@ -78,13 +78,13 @@ struct KBestGetter : public HypSampler
virtual void
NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
{
- KBest(*hg);
+ KBestScored(*hg);
}
vector<ScoredHyp>* GetSamples() { return &s_; }
void
- KBest(const Hypergraph& forest)
+ KBestScored(const Hypergraph& forest)
{
if (filter_type_ == "unique") {
KBestUnique(forest);
diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h
index 276f2cc9..8b1c09f2 100644
--- a/dtrain/ksampler.h
+++ b/dtrain/ksampler.h
@@ -1,7 +1,7 @@
#ifndef _DTRAIN_KSAMPLER_H_
#define _DTRAIN_KSAMPLER_H_
-#include "hgsampler.h"
+#include "hg_sampler.h" // cdec
#include "kbestget.h"
#include "score.h"
@@ -22,12 +22,12 @@ struct KSampler : public HypSampler
virtual void
NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
{
- Sample(*hg);
+ ScoredSamples(*hg);
}
vector<ScoredHyp>* GetSamples() { return &s_; }
- void Sample(const Hypergraph& forest) {
+ void ScoredSamples(const Hypergraph& forest) {
s_.clear();
std::vector<HypergraphSampler::Hypothesis> samples;
HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples);
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index e8a20759..95eeb8e5 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,14 +1,16 @@
decoder_config=test/example/cdec.ini
k=100
N=3
-gamma=0
+gamma=0.001
epochs=20
input=test/example/nc-1k-tabs.gz
scorer=stupid_bleu
output=weights.gz
stop_after=10
-sample_from=kbest
+sample_from=forest
pair_sampling=108010
select_weights=VOID
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp
+unit_weight_vector=true
+keep_w=true