summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/README.md2
-rw-r--r--dtrain/dtrain.cc70
-rw-r--r--dtrain/dtrain.h2
-rw-r--r--dtrain/hgsampler.cc1
-rw-r--r--dtrain/hgsampler.h1
-rw-r--r--dtrain/kbestget.h1
-rw-r--r--dtrain/test/example/dtrain.ini4
7 files changed, 52 insertions, 29 deletions
diff --git a/dtrain/README.md b/dtrain/README.md
index 1ee3823e..b453c649 100644
--- a/dtrain/README.md
+++ b/dtrain/README.md
@@ -43,7 +43,7 @@ Uncertain, known bugs, problems
FIXME
-----
-* merge with cdec master
+none
Data
----
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 0a94f7aa..05c3728d 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -20,8 +20,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences")
("print_weights", po::value<string>(), "weights to print on each iteration")
("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode")
- ("learning_rate", po::value<double>()->default_value(0.0005), "learning rate")
- ("gamma", po::value<double>()->default_value(0), "gamma for SVM (0 for perceptron)")
+ ("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate")
+ ("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)")
("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use")
("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")
("noup", po::value<bool>()->zero_tokens(), "do not update weights");
@@ -134,15 +134,14 @@ main(int argc, char** argv)
observer->SetScorer(scorer);
// init weights
- Weights weights;
- if (cfg.count("input_weights")) weights.InitFromFile(cfg["input_weights"].as<string>());
- SparseVector<double> lambdas;
- weights.InitSparseVector(&lambdas);
- vector<double> dense_weights;
+ vector<weight_t>& dense_weights = decoder.CurrentWeightVector();
+ SparseVector<weight_t> lambdas;
+ if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as<string>(), &dense_weights);
+ Weights::InitSparseVector(dense_weights, &lambdas);
// meta params for perceptron, SVM
- double eta = cfg["learning_rate"].as<double>();
- double gamma = cfg["gamma"].as<double>();
+ weight_t eta = cfg["learning_rate"].as<weight_t>();
+ weight_t gamma = cfg["gamma"].as<weight_t>();
WordID __bias = FD::Convert("__bias");
lambdas.add_value(__bias, 0);
@@ -160,7 +159,7 @@ main(int argc, char** argv)
grammar_buf_out.open(grammar_buf_fn.c_str());
unsigned in_sz = 999999999; // input index, input size
- vector<pair<score_t,score_t> > all_scores;
+ vector<pair<score_t, score_t> > all_scores;
score_t max_score = 0.;
unsigned best_it = 0;
float overall_time = 0.;
@@ -196,7 +195,8 @@ main(int argc, char** argv)
time(&start);
igzstream grammar_buf_in;
if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
- score_t score_sum = 0., model_sum = 0.;
+ score_t score_sum = 0.;
+ score_t model_sum(0);
unsigned ii = 0, nup = 0, npairs = 0;
if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl;
@@ -238,10 +238,7 @@ main(int argc, char** argv)
if (next || stop) break;
// weights
- dense_weights.clear();
- weights.InitFromVector(lambdas);
- weights.InitVector(&dense_weights);
- decoder.SetWeights(dense_weights);
+ lambdas.init_vector(&dense_weights);
// getting input
vector<string> in_split; // input: sid\tsrc\tref\tpsg
@@ -289,15 +286,24 @@ main(int argc, char** argv)
// get (scored) samples
vector<ScoredHyp>* samples = observer->GetSamples();
- if (verbose) {
+ // FIXME
+ /*if (verbose) {
cout << "[ref: '";
- if (t > 0) cout << ref_ids_buf[ii];
+ if (t > 0) cout << ref_ids_buf[ii]; <---
else cout << ref_ids;
cout << endl;
cout << _p5 << _np << "1best: " << "'" << (*samples)[0].w << "'" << endl;
cout << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl;
cout << "F{" << (*samples)[0].f << "} ]" << endl << endl;
- }
+ }*/
+ /*cout << lambdas.get(FD::Convert("PhraseModel_0")) << endl;
+ cout << (*samples)[0].model << endl;
+ cout << "1best: ";
+ for (unsigned u = 0; u < (*samples)[0].w.size(); u++) cout << TD::Convert((*samples)[0].w[u]) << " ";
+ cout << endl;
+ cout << (*samples)[0].f << endl;
+ cout << "___" << endl;*/
+
score_sum += (*samples)[0].score;
model_sum += (*samples)[0].model;
@@ -317,21 +323,21 @@ main(int argc, char** argv)
if (!gamma) {
// perceptron
if (it->first.score - it->second.score < 0) { // rank error
- SparseVector<double> dv = it->second.f - it->first.f;
+ SparseVector<weight_t> dv = it->second.f - it->first.f;
dv.add_value(__bias, -1);
lambdas.plus_eq_v_times_s(dv, eta);
nup++;
}
} else {
// SVM
- double rank_error = it->second.score - it->first.score;
+ score_t rank_error = it->second.score - it->first.score;
if (rank_error > 0) {
- SparseVector<double> dv = it->second.f - it->first.f;
+ SparseVector<weight_t> dv = it->second.f - it->first.f;
dv.add_value(__bias, -1);
lambdas.plus_eq_v_times_s(dv, eta);
}
// regularization
- double margin = it->first.model - it->second.model;
+ score_t margin = it->first.model - it->second.model;
if (rank_error || margin < 1) {
lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ?
nup++;
@@ -339,6 +345,17 @@ main(int argc, char** argv)
}
}
}
+
+ // DEBUG
+ vector<weight_t> x;
+ lambdas.init_vector(&x);
+ //cout << "[" << ii << "]" << endl;
+ for (int jj = 0; jj < x.size(); jj++) {
+ //if (x[jj] != 0)
+ //cout << FD::Convert(jj) << " " << x[jj] << endl;
+ }
+ //cout << " --- " << endl;
+ // /DEBUG
++ii;
@@ -358,7 +375,8 @@ main(int argc, char** argv)
// print some stats
score_t score_avg = score_sum/(score_t)in_sz;
score_t model_avg = model_sum/(score_t)in_sz;
- score_t score_diff, model_diff;
+ score_t score_diff;
+ score_t model_diff;
if (t > 0) {
score_diff = score_avg - all_scores[t-1].first;
model_diff = model_avg - all_scores[t-1].second;
@@ -402,10 +420,10 @@ main(int argc, char** argv)
// write weights to file
if (select_weights == "best") {
- weights.InitFromVector(lambdas);
string infix = "dtrain-weights-" + boost::lexical_cast<string>(t);
+ lambdas.init_vector(&dense_weights);
string w_fn = gettmpf(tmp_path, infix, "gz");
- weights.WriteToFile(w_fn, true);
+ Weights::WriteToFile(w_fn, dense_weights, true);
weights_files.push_back(w_fn);
}
@@ -420,7 +438,7 @@ main(int argc, char** argv)
ostream& o = *of.stream();
o.precision(17);
o << _np;
- for (SparseVector<double>::const_iterator it = lambdas.begin(); it != lambdas.end(); ++it) {
+ for (SparseVector<weight_t>::const_iterator it = lambdas.begin(); it != lambdas.end(); ++it) {
if (it->second == 0) continue;
o << FD::Convert(it->first) << '\t' << it->second << endl;
}
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index e98ef470..7c1509e4 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -11,6 +11,8 @@
#include "ksampler.h"
#include "pairsampling.h"
+#include "filelib.h"
+
#define DTRAIN_DOTS 100 // when to display a '.'
#define DTRAIN_GRAMMAR_DELIM "########EOS########"
diff --git a/dtrain/hgsampler.cc b/dtrain/hgsampler.cc
index 7a00a3d3..ad28b162 100644
--- a/dtrain/hgsampler.cc
+++ b/dtrain/hgsampler.cc
@@ -1,3 +1,4 @@
+// Chris Dyer
#include "hgsampler.h"
#include <queue>
diff --git a/dtrain/hgsampler.h b/dtrain/hgsampler.h
index b840c07f..45c5b8f2 100644
--- a/dtrain/hgsampler.h
+++ b/dtrain/hgsampler.h
@@ -1,3 +1,4 @@
+// Chris Dyer
#ifndef _DTRAIN_HGSAMPLER_H_
#define _DTRAIN_HGSAMPLER_H_
diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h
index d141da60..abe657d0 100644
--- a/dtrain/kbestget.h
+++ b/dtrain/kbestget.h
@@ -7,6 +7,7 @@
#include "ff_register.h"
#include "decoder.h"
#include "weights.h"
+#include "logval.h"
using namespace std;
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 8a793a7a..185d6d90 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,14 +1,14 @@
decoder_config=test/example/cdec.ini
k=100
N=3
-gamma=0 #.00001
+gamma=0
epochs=5
input=test/example/nc-1k-tabs.gz
scorer=stupid_bleu
output=-
stop_after=100
sample_from=kbest
-pair_sampling=all #108010
+pair_sampling=all
select_weights=VOID
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp