From 74d3ac177d70b77646f6a0b3b4095d725f893a36 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 8 Jun 2011 00:39:09 -0400 Subject: external MT evaluator client code. most logic in place, needs to be integrated. actually, the whole evaluation architecture needs to be trashed and rewritten from scratch. what a disaster it is --- mteval/Makefile.am | 2 +- mteval/external_scorer.cc | 150 ++++++++++++++++++++++++++++++++++++++++++++++ mteval/external_scorer.h | 35 +++++++++++ 3 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 mteval/external_scorer.cc create mode 100644 mteval/external_scorer.h (limited to 'mteval') diff --git a/mteval/Makefile.am b/mteval/Makefile.am index f9277779..95845090 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -10,7 +10,7 @@ endif noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc +libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc fast_score_SOURCES = fast_score.cc fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/external_scorer.cc b/mteval/external_scorer.cc new file mode 100644 index 00000000..4327ce9b --- /dev/null +++ b/mteval/external_scorer.cc @@ -0,0 +1,150 @@ +#include "external_scorer.h" + +#include // popen +#include +#include +#include +#include + +#include "tdict.h" + +using namespace std; + +ScoreServer::ScoreServer(const string& cmd) : pipe_() { + cerr << "Invoking " << cmd << " ..." << endl; + pipe_ = popen(cmd.c_str(), "r+"); + assert(pipe_); + string dummy; + RequestResponse("EVAL ||| Reference initialization string . ||| Testing initialization string .\n", &dummy); + assert(dummy.size() > 0); + cerr << "Connection established.\n"; +} + +ScoreServer::~ScoreServer() { + pclose(pipe_); +} + +double ScoreServer::ComputeScore(const vector& fields) { + ostringstream os; + os << "EVAL"; + for (unsigned i = 0; i < fields.size(); ++i) + os << ' ' << fields[i]; + os << endl; + string sres; + RequestResponse(os.str(), &sres); + return strtod(sres.c_str(), NULL); +} + +void ScoreServer::Evaluate(const vector >& refs, const vector& hyp, vector* fields) { + ostringstream os; + os << "SCORE"; + for (unsigned i = 0; i < refs.size(); ++i) { + os << " |||"; + for (unsigned j = 0; j < refs[i].size(); ++j) { + os << ' ' << TD::Convert(refs[i][j]); + } + } + os << " |||"; + for (unsigned i = 0; i < hyp.size(); ++i) { + os << ' ' << TD::Convert(hyp[i]); + } + os << endl; + string sres; + RequestResponse(os.str(), &sres); + istringstream is(sres); + double val; + fields->clear(); + while(is >> val) { + fields->push_back(val); + } +} + +#define MAX_BUF 16000 + +void ScoreServer::RequestResponse(const string& request, string* response) { + fprintf(pipe_, "%s", request.c_str()); + fflush(pipe_); + char buf[MAX_BUF]; + size_t cr = fread(buf, 1, MAX_BUF, pipe_); + if (cr == 0) { + cerr << "Read error. Request: " << request << endl; + abort(); + } + while (buf[cr-1] != '\n') { + size_t n = fread(&buf[cr], 1, MAX_BUF-cr, pipe_); + assert(n > 0); + cr += n; + assert(cr < MAX_BUF); + } + buf[cr - 1] = 0; + *response = buf; +} + +struct ExternalScore : public ScoreBase { + ExternalScore() : score_server() {} + explicit ExternalScore(const ScoreServer* s) : score_server(s), fields() {} + ExternalScore(const ScoreServer* s, const vector& f) : score_server(s), fields(f) {} + float ComputePartialScore() const { return 0.0;} + float ComputeScore() const { + // TODO make EVAL call + assert(!"not implemented"); + } + void ScoreDetails(string* details) const { + ostringstream os; + os << "EXT=" << ComputeScore() << " <"; + for (unsigned i = 0; i < fields.size(); ++i) + os << (i ? " " : "") << fields[i]; + os << '>'; + *details = os.str(); + } + void PlusPartialEquals(const Score&, int, int, int){ + assert(!"not implemented"); // no idea + } + void PlusEquals(const Score& delta, const float scale) { + assert(!"not implemented"); // don't even know what this is + } + void PlusEquals(const Score& delta) { + if (static_cast(delta).score_server) score_server = static_cast(delta).score_server; + if (fields.size() != static_cast(delta).fields.size()) + fields.resize(max(fields.size(), static_cast(delta).fields.size())); + for (unsigned i = 0; i < static_cast(delta).fields.size(); ++i) + fields[i] += static_cast(delta).fields[i]; + } + ScoreP GetZero() const { + return ScoreP(new ExternalScore(score_server)); + } + ScoreP GetOne() const { + return ScoreP(new ExternalScore(score_server)); + } + void Subtract(const Score& rhs, Score* res) const { + static_cast(res)->score_server = score_server; + vector& rf = static_cast(res)->fields; + rf.resize(max(fields.size(), static_cast(rhs).fields.size())); + for (unsigned i = 0; i < rf.size(); ++i) { + rf[i] = (i < fields.size() ? fields[i] : 0.0f) - + (i < static_cast(rhs).fields.size() ? static_cast(rhs).fields[i] : 0.0f); + } + } + void Encode(string* out) const { + ostringstream os; + } + bool IsAdditiveIdentity() const { + for (int i = 0; i < fields.size(); ++i) + if (fields[i]) return false; + return true; + } + + const ScoreServer* score_server; + vector fields; +}; + +ScoreP ExternalSentenceScorer::ScoreCandidate(const Sentence& hyp) const { + ExternalScore* res = new ExternalScore(eval_server); + eval_server->Evaluate(refs, hyp, &res->fields); + return ScoreP(res); +} + +ScoreP ExternalSentenceScorer::ScoreCCandidate(const Sentence& hyp) const { + assert(!"not implemented"); +} + diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h new file mode 100644 index 00000000..a2c91960 --- /dev/null +++ b/mteval/external_scorer.h @@ -0,0 +1,35 @@ +#ifndef _EXTERNAL_SCORER_H_ +#define _EXTERNAL_SCORER_H_ + +#include +#include + +#include "scorer.h" + +class ScoreServer { + public: + explicit ScoreServer(const std::string& cmd); + virtual ~ScoreServer(); + + double ComputeScore(const std::vector& fields); + void Evaluate(const std::vector >& refs, const std::vector& hyp, std::vector* fields); + + private: + void RequestResponse(const std::string& request, std::string* response); + FILE* pipe_; +}; + +class ExternalSentenceScorer : public SentenceScorer { + public: + virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; + protected: + ScoreServer* eval_server; +}; + +class METEORServer : public ScoreServer { + public: + METEORServer() : ScoreServer("java -Xmx1024m -jar meteor-1.3.jar - - -mira -lower") {} +}; + +#endif -- cgit v1.2.3 From c456e5b4470a244de811bf8c070532f8012f5731 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 8 Jun 2011 22:35:06 -0400 Subject: rudimentary support for meteor via an external process. still needs configuration of path, but it should work --- mteval/external_scorer.cc | 79 ++++++++++++++++++++++++++++++++--------------- mteval/external_scorer.h | 28 +++++++++++------ mteval/scorer.cc | 10 ++++-- mteval/scorer.h | 4 +-- vest/dist-vest.pl | 2 ++ 5 files changed, 85 insertions(+), 38 deletions(-) (limited to 'mteval') diff --git a/mteval/external_scorer.cc b/mteval/external_scorer.cc index 4327ce9b..3757064b 100644 --- a/mteval/external_scorer.cc +++ b/mteval/external_scorer.cc @@ -2,20 +2,42 @@ #include // popen #include +#include #include #include #include +#include "stringlib.h" #include "tdict.h" using namespace std; +map > ScoreServerManager::servers_; + +class METEORServer : public ScoreServer { + public: + METEORServer() : ScoreServer("java -Xmx1024m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en") {} +}; + +ScoreServer* ScoreServerManager::Instance(const string& score_type) { + boost::shared_ptr& s = servers_[score_type]; + if (!s) { + if (score_type == "meteor") { + s.reset(new METEORServer); + } else { + cerr << "Don't know how to create score server for type '" << score_type << "'\n"; + abort(); + } + } + return s.get(); +} + ScoreServer::ScoreServer(const string& cmd) : pipe_() { cerr << "Invoking " << cmd << " ..." << endl; pipe_ = popen(cmd.c_str(), "r+"); - assert(pipe_); + if (!pipe_) { perror("popen"); abort(); } string dummy; - RequestResponse("EVAL ||| Reference initialization string . ||| Testing initialization string .\n", &dummy); + RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy); assert(dummy.size() > 0); cerr << "Connection established.\n"; } @@ -24,12 +46,11 @@ ScoreServer::~ScoreServer() { pclose(pipe_); } -double ScoreServer::ComputeScore(const vector& fields) { +float ScoreServer::ComputeScore(const vector& fields) { ostringstream os; - os << "EVAL"; + os << "EVAL |||"; for (unsigned i = 0; i < fields.size(); ++i) os << ' ' << fields[i]; - os << endl; string sres; RequestResponse(os.str(), &sres); return strtod(sres.c_str(), NULL); @@ -48,46 +69,42 @@ void ScoreServer::Evaluate(const vector >& refs, const vectorclear(); - while(is >> val) { + while(is >> val) fields->push_back(val); - } } #define MAX_BUF 16000 void ScoreServer::RequestResponse(const string& request, string* response) { - fprintf(pipe_, "%s", request.c_str()); + //cerr << "@SERVER: " << request << endl; + fputs(request.c_str(), pipe_); + fputc('\n', pipe_); fflush(pipe_); char buf[MAX_BUF]; - size_t cr = fread(buf, 1, MAX_BUF, pipe_); - if (cr == 0) { + if (NULL == fgets(buf, MAX_BUF, pipe_)) { cerr << "Read error. Request: " << request << endl; abort(); } - while (buf[cr-1] != '\n') { - size_t n = fread(&buf[cr], 1, MAX_BUF-cr, pipe_); - assert(n > 0); - cr += n; - assert(cr < MAX_BUF); + size_t len = strlen(buf); + if (len < 2) { + cerr << "Malformed response: " << buf << endl; } - buf[cr - 1] = 0; - *response = buf; + *response = Trim(buf, " \t\n"); + //cerr << "@RESPONSE: '" << *response << "'\n"; } struct ExternalScore : public ScoreBase { ExternalScore() : score_server() {} - explicit ExternalScore(const ScoreServer* s) : score_server(s), fields() {} - ExternalScore(const ScoreServer* s, const vector& f) : score_server(s), fields(f) {} + explicit ExternalScore(ScoreServer* s) : score_server(s), fields() {} + ExternalScore(ScoreServer* s, const vector& f) : score_server(s), fields(f) {} float ComputePartialScore() const { return 0.0;} float ComputeScore() const { - // TODO make EVAL call - assert(!"not implemented"); + return score_server->ComputeScore(fields); } void ScoreDetails(string* details) const { ostringstream os; @@ -127,14 +144,17 @@ struct ExternalScore : public ScoreBase { } void Encode(string* out) const { ostringstream os; + for (unsigned i = 0; i < fields.size(); ++i) + os << (i == 0 ? "" : " ") << fields[i]; + *out = os.str(); } bool IsAdditiveIdentity() const { - for (int i = 0; i < fields.size(); ++i) + for (unsigned i = 0; i < fields.size(); ++i) if (fields[i]) return false; return true; } - const ScoreServer* score_server; + mutable ScoreServer* score_server; vector fields; }; @@ -148,3 +168,12 @@ ScoreP ExternalSentenceScorer::ScoreCCandidate(const Sentence& hyp) const { assert(!"not implemented"); } +ScoreP ExternalSentenceScorer::ScoreFromString(ScoreServer* s, const string& data) { + istringstream is(data); + vector fields; + float val; + while(is >> val) + fields.push_back(val); + return ScoreP(new ExternalScore(s, fields)); +} + diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h index a2c91960..59ece269 100644 --- a/mteval/external_scorer.h +++ b/mteval/external_scorer.h @@ -3,15 +3,20 @@ #include #include +#include +#include +#include #include "scorer.h" class ScoreServer { - public: + friend class ScoreServerManager; + protected: explicit ScoreServer(const std::string& cmd); virtual ~ScoreServer(); - double ComputeScore(const std::vector& fields); + public: + float ComputeScore(const std::vector& fields); void Evaluate(const std::vector >& refs, const std::vector& hyp, std::vector* fields); private: @@ -19,17 +24,22 @@ class ScoreServer { FILE* pipe_; }; +struct ScoreServerManager { + static ScoreServer* Instance(const std::string& score_type); + private: + static std::map > servers_; +}; + class ExternalSentenceScorer : public SentenceScorer { public: - virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; - virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; + ExternalSentenceScorer(ScoreServer* server, const std::vector >& r) : + SentenceScorer("External", r), eval_server(server) {} + virtual ScoreP ScoreCandidate(const Sentence& hyp) const; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const; + static ScoreP ScoreFromString(ScoreServer* s, const std::string& data); + protected: ScoreServer* eval_server; }; -class METEORServer : public ScoreServer { - public: - METEORServer() : ScoreServer("java -Xmx1024m -jar meteor-1.3.jar - - -mira -lower") {} -}; - #endif diff --git a/mteval/scorer.cc b/mteval/scorer.cc index 64ce63af..2daa0daa 100644 --- a/mteval/scorer.cc +++ b/mteval/scorer.cc @@ -17,11 +17,12 @@ #include "comb_scorer.h" #include "tdict.h" #include "stringlib.h" +#include "external_scorer.h" using boost::shared_ptr; using namespace std; -void Score::TimesEquals(float scale) { +void Score::TimesEquals(float /*scale*/) { cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"< Sentences; std::string desc; Sentences refs; - SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) { } + explicit SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) { } std::string verbose_desc() const; virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length virtual ~SentenceScorer(); diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 789b5b14..b7a862c4 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -118,6 +118,8 @@ if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } if ($metric =~ /^(combi|ter)$/i) { $lines_per_mapper = 40; +} elsif ($metric =~ /^meteor$/i) { + $lines_per_mapper = 2000; # start up time is really high } ($iniFile) = @ARGV; -- cgit v1.2.3 From 9366fc1ce04385290722bd703933bf0c1c166671 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Jun 2011 16:20:17 -0400 Subject: proper use of pipes --- mteval/external_scorer.cc | 52 ++++++++++++++++++++++++++++++++--------------- mteval/external_scorer.h | 4 ++-- 2 files changed, 38 insertions(+), 18 deletions(-) (limited to 'mteval') diff --git a/mteval/external_scorer.cc b/mteval/external_scorer.cc index 3757064b..1c09c2a1 100644 --- a/mteval/external_scorer.cc +++ b/mteval/external_scorer.cc @@ -3,6 +3,7 @@ #include // popen #include #include +#include #include #include #include @@ -16,7 +17,7 @@ map > ScoreServerManager::servers_; class METEORServer : public ScoreServer { public: - METEORServer() : ScoreServer("java -Xmx1024m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en") {} + METEORServer() : ScoreServer("java -Xmx1024m -jar /usr0/cdyer/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en") {} }; ScoreServer* ScoreServerManager::Instance(const string& score_type) { @@ -32,10 +33,30 @@ ScoreServer* ScoreServerManager::Instance(const string& score_type) { return s.get(); } -ScoreServer::ScoreServer(const string& cmd) : pipe_() { +ScoreServer::ScoreServer(const string& cmd) { cerr << "Invoking " << cmd << " ..." << endl; - pipe_ = popen(cmd.c_str(), "r+"); - if (!pipe_) { perror("popen"); abort(); } + if (pipe(p2c) < 0) { perror("pipe"); exit(1); } + if (pipe(c2p) < 0) { perror("pipe"); exit(1); } + pid_t cpid = fork(); + if (cpid < 0) { perror("fork"); exit(1); } + if (cpid == 0) { // child + close(p2c[1]); + close(c2p[0]); + dup2(p2c[0], 0); + close(p2c[0]); + dup2(c2p[1], 1); + close(c2p[1]); + cerr << "Exec'ing from child " << cmd << endl; + vector vargs; + SplitOnWhitespace(cmd, &vargs); + const char** cargv = static_cast(malloc(sizeof(const char*) * vargs.size())); + for (unsigned i = 1; i < vargs.size(); ++i) cargv[i-1] = vargs[i].c_str(); + cargv[vargs.size() - 1] = NULL; + execvp(vargs[0].c_str(), (char* const*)cargv); + } else { // parent + close(c2p[1]); + close(p2c[0]); + } string dummy; RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy); assert(dummy.size() > 0); @@ -43,7 +64,7 @@ ScoreServer::ScoreServer(const string& cmd) : pipe_() { } ScoreServer::~ScoreServer() { - pclose(pipe_); + // TODO close stuff, join stuff } float ScoreServer::ComputeScore(const vector& fields) { @@ -81,21 +102,20 @@ void ScoreServer::Evaluate(const vector >& refs, const vector { diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h index 59ece269..a28fb920 100644 --- a/mteval/external_scorer.h +++ b/mteval/external_scorer.h @@ -2,7 +2,6 @@ #define _EXTERNAL_SCORER_H_ #include -#include #include #include #include @@ -21,7 +20,8 @@ class ScoreServer { private: void RequestResponse(const std::string& request, std::string* response); - FILE* pipe_; + int p2c[2]; + int c2p[2]; }; struct ScoreServerManager { -- cgit v1.2.3 From c3828b0a2deb42de5c7378e93f93f5e69efb304c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 16 Jul 2011 19:13:21 -0400 Subject: tune regularizer --- mteval/scorer.cc | 12 +++- pro-train/dist-pro.pl | 139 ++++++++++++++++++++++++++------------------- pro-train/mr_pro_reduce.cc | 128 ++++++++++++++++++++++++++++++----------- 3 files changed, 185 insertions(+), 94 deletions(-) (limited to 'mteval') diff --git a/mteval/scorer.cc b/mteval/scorer.cc index 2daa0daa..a83b9e2f 100644 --- a/mteval/scorer.cc +++ b/mteval/scorer.cc @@ -430,6 +430,7 @@ float BLEUScore::ComputeScore(vector* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; + vector total_precs(N()); for (int i = 0; i < N(); ++i) { if (hyp_ngram_counts[i] > 0) { float cor_count = correct_ngram_hit_counts[i]; @@ -440,14 +441,21 @@ float BLEUScore::ComputeScore(vector* precs, float* bp) const { log_bleu += lprec; ++count; } + total_precs[i] = log_bleu; } - log_bleu /= static_cast(count); + vector bleus(N()); float lbp = 0.0; if (hyp_len < ref_len) lbp = (hyp_len - ref_len) / hyp_len; log_bleu += lbp; if (bp) *bp = exp(lbp); - return exp(log_bleu); + float wb = 0; + for (int i = 0; i < N(); ++i) { + bleus[i] = exp(total_precs[i] / (i+1) + lbp); + wb += bleus[i] / pow(2.0, 4.0 - i); + } + //return wb; + return bleus.back(); } diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl index c42e3876..dbfa329a 100755 --- a/pro-train/dist-pro.pl +++ b/pro-train/dist-pro.pl @@ -37,42 +37,36 @@ die "Can't find decoder in $cdec" unless -x $cdec; die "Can't find $parallelize" unless -x $parallelize; die "Can't find $libcall" unless -e $libcall; my $decoder = $cdec; -my $lines_per_mapper = 100; +my $lines_per_mapper = 30; my $iteration = 1; my $run_local = 0; my $best_weights; -my $max_iterations = 15; -my $optimization_iters = 6; +my $max_iterations = 30; my $decode_nodes = 15; # number of decode nodes -my $pmem = "9g"; +my $pmem = "4g"; my $disable_clean = 0; my %seen_weights; -my $normalize; my $help = 0; my $epsilon = 0.0001; -my $interval = 5; my $dryrun = 0; my $last_score = -10000000; my $metric = "ibm_bleu"; my $dir; my $iniFile; my $weights; -my $decoderOpt; -my $noprimary; -my $maxsim=0; -my $oraclen=0; -my $oracleb=20; -my $bleu_weight=1; -my $use_make; # use make to parallelize line search -my $dirargs=''; +my $use_make; # use make to parallelize my $usefork; my $initial_weights; my $pass_suffix = ''; my $cpbin=1; + +# regularization strength +my $tune_regularizer = 0; +my $reg = 1e-2; + # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( - "decoder=s" => \$decoderOpt, "decode-nodes=i" => \$decode_nodes, "dont-clean" => \$disable_clean, "pass-suffix=s" => \$pass_suffix, @@ -81,21 +75,13 @@ if (GetOptions( "epsilon=s" => \$epsilon, "help" => \$help, "weights=s" => \$initial_weights, - "interval" => \$interval, - "iteration=i" => \$iteration, + "tune-regularizer" => \$tune_regularizer, + "reg=f" => \$reg, "local" => \$run_local, "use-make=i" => \$use_make, "max-iterations=i" => \$max_iterations, - "normalize=s" => \$normalize, "pmem=s" => \$pmem, "cpbin!" => \$cpbin, - "bleu_weight=s" => \$bleu_weight, - "no-primary!" => \$noprimary, - "max-similarity=s" => \$maxsim, - "oracle-directions=i" => \$oraclen, - "n-oracle=i" => \$oraclen, - "oracle-batch=i" => \$oracleb, - "directions-args=s" => \$dirargs, "ref-files=s" => \$refFiles, "metric=s" => \$metric, "source-file=s" => \$srcFile, @@ -108,9 +94,7 @@ if (GetOptions( if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } if ($metric =~ /^(combi|ter)$/i) { - $lines_per_mapper = 40; -} elsif ($metric =~ /^meteor$/i) { - $lines_per_mapper = 2000; # start up time is really high + $lines_per_mapper = 5; } ($iniFile) = @ARGV; @@ -144,8 +128,6 @@ unless ($dir =~ /^\//){ # convert relative path to absolute path $dir = "$basedir/$dir"; } -if ($decoderOpt){ $decoder = $decoderOpt; } - # Initializations and helper functions srand; @@ -378,6 +360,22 @@ while (1){ else {$joblist = $joblist . "\|" . $jobid; } } } + my @dev_outs = (); + my @devtest_outs = (); + if ($tune_regularizer) { + for (my $i = 0; $i < scalar @mapoutputs; $i++) { + if ($i % 3 == 1) { + push @devtest_outs, $mapoutputs[$i]; + } else { + push @dev_outs, $mapoutputs[$i]; + } + } + if (scalar @devtest_outs == 0) { + die "Not enough training instances for regularization tuning! Rerun without --tune-regularizer\n"; + } + } else { + @dev_outs = @mapoutputs; + } if ($run_local) { print STDERR "\nCompleted extraction of training exemplars.\n"; } elsif ($use_make) { @@ -399,7 +397,13 @@ while (1){ } my $tol = 0; my $til = 0; - print STDERR "MO: @mapoutputs\n"; + my $dev_test_file = "$dir/splag.$im1/devtest.gz"; + if ($tune_regularizer) { + my $cmd = "cat @devtest_outs | gzip > $dev_test_file"; + check_bash_call($cmd); + die "Can't find file $dev_test_file" unless -f $dev_test_file; + } + #print STDERR "MO: @mapoutputs\n"; for my $mo (@mapoutputs) { #my $olines = get_lines($mo); #my $ilines = get_lines($o2i{$mo}); @@ -407,10 +411,24 @@ while (1){ } print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; print STDERR unchecked_output("date"); - $cmd="cat @mapoutputs | $REDUCER -w $dir/weights.$im1 > $dir/weights.$iteration"; + $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -s $reg"; + if ($tune_regularizer) { + $cmd .= " -T -t $dev_test_file"; + } + $cmd .= " > $dir/weights.$iteration"; print STDERR "COMMAND:\n$cmd\n"; check_bash_call($cmd); $lastWeightsFile = "$dir/weights.$iteration"; + if ($tune_regularizer) { + open W, "<$lastWeightsFile" or die "Can't read $lastWeightsFile: $!"; + my $line = ; + close W; + my ($sharp, $label, $nreg) = split /\s|=/, $line; + print STDERR "REGULARIZATION STRENGTH ($label) IS $nreg\n"; + $reg = $nreg; + # only tune regularizer on first iteration? + $tune_regularizer = 0; + } $lastPScore = $score; $iteration++; print STDERR "\n==========\n"; @@ -473,7 +491,6 @@ sub write_config { print $fh "SOURCE (DEV): $srcFile\n"; print $fh "REFS (DEV): $refFiles\n"; print $fh "EVAL METRIC: $metric\n"; - print $fh "START ITERATION: $iteration\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; print $fh "DECODE NODES: $decode_nodes\n"; print $fh "HEAD NODE: $host\n"; @@ -535,31 +552,38 @@ Usage: $executable [options] based on certain conventions. For details, refer to descriptions of the options --decoder, --weights, and --workdir. -Options: +Required: + + --ref-files + Dev set ref files. This option takes only a single string argument. + To use multiple files (including file globbing), this argument should + be quoted. + + --source-file + Dev set source file. + + --weights + Initial weights file (use empty file to start from 0) + +General options: --local Run the decoder and optimizer locally with a single thread. - --use-make - Use make -j to run the optimizer commands (useful on large - shared-memory machines where qsub is unavailable). - --decode-nodes Number of decoder processes to run in parallel. [default=15] - --decoder - Decoder binary to use. - --help Print this message and exit. - --iteration - Starting iteration number. If not specified, defaults to 1. - --max-iterations Maximum number of iterations to run. If not specified, defaults to 10. + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + --pass-suffix If the decoder is doing multi-pass decoding, the pass suffix "2", "3", etc., is used to control what iteration of weights is set. @@ -567,21 +591,9 @@ Options: --pmem Amount of physical memory requested for parallel decoding jobs. - --ref-files - Dev set ref files. This option takes only a single string argument. - To use multiple files (including file globbing), this argument should - be quoted. - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --normalize - After each iteration, rescale all feature weights such that feature- - name has a weight of 1.0. - - --source-file - Dev set source file. + --use-make + Use make -j to run the optimizer commands (useful on large + shared-memory machines where qsub is unavailable). --workdir Directory for intermediate and output files. If not specified, the @@ -591,6 +603,14 @@ Options: the filename. E.g. an ini file named decoder.foo.ini would have a default working directory name foo. +Regularization options: + + --tune-regularizer + Hold out one third of the tuning data and used this to tune the + regularization parameter. + + --reg + Help } @@ -606,7 +626,6 @@ sub convert { } - sub cmdline { return join ' ',($0,@ORIG_ARGV); } diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index 491ceb3a..9b422f33 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -16,7 +16,7 @@ using namespace std; namespace po = boost::program_options; // since this is a ranking model, there should be equal numbers of -// positive and negative examples so the bias should be 0 +// positive and negative examples, so the bias should be 0 static const double MAX_BIAS = 1e-10; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { @@ -25,8 +25,11 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") ("interpolation,p",po::value()->default_value(0.9), "Output weights are p*w + (1-p)*w_prev") ("memory_buffers,m",po::value()->default_value(200), "Number of memory buffers (LBFGS)") - ("sigma_squared,s",po::value()->default_value(1.0), "Sigma squared for Gaussian prior") - ("testset,t",po::value(), "Optional held-out test set to tune regularizer") + ("sigma_squared,s",po::value()->default_value(0.1), "Sigma squared for Gaussian prior") + ("min_reg,r",po::value()->default_value(1e-8), "When tuning (-T) regularization strength, minimum regularization strenght") + ("max_reg,R",po::value()->default_value(10.0), "When tuning (-T) regularization strength, maximum regularization strenght") + ("testset,t",po::value(), "Optional held-out test set") + ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); @@ -95,8 +98,6 @@ void GradAdd(const SparseVector& v, const double scale, vector* double TrainingInference(const vector& x, const vector > >& corpus, vector* g = NULL) { - if (g) fill(g->begin(), g->end(), 0.0); - double cll = 0; for (int i = 0; i < corpus.size(); ++i) { const double dotprod = corpus[i].second.dot(x) + x[0]; // x[0] is bias @@ -130,39 +131,23 @@ double TrainingInference(const vector& x, return cll; } -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - string line; - vector > > training, testing; - SparseVector old_weights; - const double psi = conf["interpolation"].as(); - if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; } - if (conf.count("weights")) { - Weights w; - w.InitFromFile(conf["weights"].as()); - w.InitSparseVector(&old_weights); - } - ReadCorpus(&cin, &training); - if (conf.count("testset")) { - ReadFile rf(conf["testset"].as()); - ReadCorpus(rf.stream(), &testing); - } - - cerr << "Number of features: " << FD::NumFeats() << endl; - vector x(FD::NumFeats(), 0.0); // x[0] is bias - for (SparseVector::const_iterator it = old_weights.begin(); - it != old_weights.end(); ++it) - x[it->first] = it->second; +// return held-out log likelihood +double LearnParameters(const vector > >& training, + const vector > >& testing, + const double sigsq, + const unsigned memory_buffers, + vector* px) { + vector& x = *px; vector vg(FD::NumFeats(), 0.0); bool converged = false; - LBFGSOptimizer opt(FD::NumFeats(), conf["memory_buffers"].as()); + LBFGSOptimizer opt(FD::NumFeats(), memory_buffers); + double tppl = 0.0; while(!converged) { + fill(vg.begin(), vg.end(), 0.0); double cll = TrainingInference(x, training, &vg); double ppl = cll / log(2); ppl /= training.size(); ppl = pow(2.0, ppl); - double tppl = 0.0; // evaluate optional held-out test set if (testing.size()) { @@ -173,7 +158,6 @@ int main(int argc, char** argv) { // handle regularizer #if 1 - const double sigsq = conf["sigma_squared"].as(); double norm = 0; for (int i = 1; i < x.size(); ++i) { const double mean_i = 0.0; @@ -202,11 +186,91 @@ int main(int argc, char** argv) { cerr << " BIAS: " << x[0] << endl; } } + return tppl; +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + string line; + vector > > training, testing; + SparseVector old_weights; + const bool tune_regularizer = conf.count("tune_regularizer"); + if (tune_regularizer && !conf.count("testset")) { + cerr << "--tune_regularizer requires --testset to be set\n"; + return 1; + } + const double min_reg = conf["min_reg"].as(); + const double max_reg = conf["max_reg"].as(); + double sigsq = conf["sigma_squared"].as(); + assert(sigsq > 0.0); + assert(min_reg > 0.0); + assert(max_reg > 0.0); + assert(max_reg > min_reg); + const double psi = conf["interpolation"].as(); + if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; } + if (conf.count("weights")) { + Weights w; + w.InitFromFile(conf["weights"].as()); + w.InitSparseVector(&old_weights); + } + ReadCorpus(&cin, &training); + if (conf.count("testset")) { + ReadFile rf(conf["testset"].as()); + ReadCorpus(rf.stream(), &testing); + } + cerr << "Number of features: " << FD::NumFeats() << endl; + vector x(FD::NumFeats(), 0.0); // x[0] is bias + for (SparseVector::const_iterator it = old_weights.begin(); + it != old_weights.end(); ++it) + x[it->first] = it->second; + double tppl = 0.0; + vector > sp; + vector smoothed; + if (tune_regularizer) { + sigsq = min_reg; + const double steps = 18; + double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); + cerr << "SWEEP FACTOR: " << sweep_factor << endl; + while(sigsq < max_reg) { + tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as(), &x); + sp.push_back(make_pair(sigsq, tppl)); + sigsq *= sweep_factor; + } + smoothed.resize(sp.size(), 0); + smoothed[0] = sp[0].second; + smoothed.back() = sp.back().second; + for (int i = 1; i < sp.size()-1; ++i) { + double prev = sp[i-1].second; + double next = sp[i+1].second; + double cur = sp[i].second; + smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next); + } + double best_ppl = 9999999; + unsigned best_i = 0; + for (unsigned i = 0; i < sp.size(); ++i) { + if (smoothed[i] < best_ppl) { + best_ppl = smoothed[i]; + best_i = i; + } + } + sigsq = sp[best_i].first; + tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as(), &x); + } Weights w; if (conf.count("weights")) { for (int i = 1; i < x.size(); ++i) x[i] = (x[i] * psi) + old_weights.get(i) * (1.0 - psi); } + cout.precision(15); + cout << "# sigma^2=" << sigsq << "\theld out perplexity="; + if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } + if (sp.size()) { + cout << "# Parameter sweep:\n"; + for (int i = 0; i < sp.size(); ++i) { + cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl; + } + } w.InitFromVector(x); w.WriteToFile("-"); return 0; -- cgit v1.2.3 From 0af7d663194beddcde420349bbd91430e0b2e423 Mon Sep 17 00:00:00 2001 From: Guest_account Guest_account prguest11 Date: Tue, 11 Oct 2011 16:16:53 +0100 Subject: remove implicit conversion-to-double operator from LogVal that caused overflow errors, clean up some pf code --- decoder/aligner.cc | 2 +- decoder/cfg.cc | 2 +- decoder/cfg_format.h | 2 +- decoder/decoder.cc | 10 ++++---- decoder/hg.cc | 4 ++-- decoder/rule_lexer.l | 2 ++ decoder/trule.h | 15 +++++++++++- gi/pf/brat.cc | 11 --------- gi/pf/cbgi.cc | 10 -------- gi/pf/dpnaive.cc | 12 ---------- gi/pf/itg.cc | 11 --------- gi/pf/pfbrat.cc | 11 --------- gi/pf/pfdist.cc | 11 --------- gi/pf/pfnaive.cc | 11 --------- mteval/mbr_kbest.cc | 4 ++-- phrasinator/ccrp_nt.h | 24 +++++++++++++++---- training/mpi_batch_optimize.cc | 2 +- training/mpi_compute_cllh.cc | 51 +++++++++++++++++++---------------------- training/mpi_online_optimize.cc | 4 ++-- utils/logval.h | 10 ++++---- 20 files changed, 78 insertions(+), 131 deletions(-) (limited to 'mteval') diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 292ee123..53e059fb 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -165,7 +165,7 @@ inline void WriteProbGrid(const Array2D& m, ostream* pos) { if (m(i,j) == prob_t::Zero()) { os << "\t---X---"; } else { - snprintf(b, 1024, "%0.5f", static_cast(m(i,j))); + snprintf(b, 1024, "%0.5f", m(i,j).as_float()); os << '\t' << b; } } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index 651978d2..cd7e66e9 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -639,7 +639,7 @@ void CFG::Print(std::ostream &o,CFGFormat const& f) const { o << '['<& src, SparseVector* trg) { for (SparseVector::const_iterator it = src.begin(); it != src.end(); ++it) - trg->set_value(it->first, it->second); + trg->set_value(it->first, it->second.as_float()); } }; @@ -788,10 +788,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { const bool show_tree_structure=conf.count("show_tree_structure"); if (!SILENT) forest_stats(forest," Init. forest",show_tree_structure,oracle.show_derivation); if (conf.count("show_expected_length")) { - const PRPair res = - Inside, - PRWeightFunction >(forest); - cerr << " Expected length (words): " << res.r / res.p << "\t" << res << endl; + const PRPair res = + Inside, + PRWeightFunction >(forest); + cerr << " Expected length (words): " << (res.r / res.p).as_float() << "\t" << res << endl; } if (conf.count("show_partition")) { diff --git a/decoder/hg.cc b/decoder/hg.cc index 3ad17f1a..180986d7 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -157,14 +157,14 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector* posts) co const ScaledEdgeProb weight(scale); const ScaledTransitionEventWeightFunction w2(scale); SparseVector pv; - const double inside = InsideOutside, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); for (int i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); - return prob_t(inside); + return inside; } prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l index 9331d8ed..083a5bb1 100644 --- a/decoder/rule_lexer.l +++ b/decoder/rule_lexer.l @@ -220,6 +220,8 @@ NT [^\t \[\],]+ std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n"; abort(); } + // const bool ignore_grammar_features = false; + // if (ignore_grammar_features) scfglex_num_feats = 0; TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als)); check_and_update_ctf_stack(rp); TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top()); diff --git a/decoder/trule.h b/decoder/trule.h index 4df4ec90..8eb2a059 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -5,7 +5,9 @@ #include #include #include -#include + +#include "boost/shared_ptr.hpp" +#include "boost/functional/hash.hpp" #include "sparse_vector.h" #include "wordid.h" @@ -162,4 +164,15 @@ class TRule { bool SanityCheck() const; }; +inline size_t hash_value(const TRule& r) { + size_t h = boost::hash_value(r.e_); + boost::hash_combine(h, -r.lhs_); + boost::hash_combine(h, boost::hash_value(r.f_)); + return h; +} + +inline bool operator==(const TRule& a, const TRule& b) { + return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); +} + #endif diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/brat.cc +++ b/gi/pf/brat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc index 20204e8a..97f1ba34 100644 --- a/gi/pf/cbgi.cc +++ b/gi/pf/cbgi.cc @@ -27,16 +27,6 @@ double log_decay(unsigned x, const double& b) { return log(b - 1) - x * log(b); } -size_t hash_value(const TRule& r) { - // TODO fix hash function - size_t h = boost::hash_value(r.e_) * boost::hash_value(r.f_) * r.lhs_; - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - struct SimpleBase { SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : uniform_e(-log(esize)), diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc index 582d1be7..608f73d5 100644 --- a/gi/pf/dpnaive.cc +++ b/gi/pf/dpnaive.cc @@ -20,18 +20,6 @@ namespace po = boost::program_options; static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc index 2c2a86f9..ac3c16a3 100644 --- a/gi/pf/itg.cc +++ b/gi/pf/itg.cc @@ -27,17 +27,6 @@ ostream& operator<<(ostream& os, const vector& p) { return os << ']'; } -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc index 4c6ba3ef..7b60ef23 100644 --- a/gi/pf/pfbrat.cc +++ b/gi/pf/pfbrat.cc @@ -25,17 +25,6 @@ static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; -size_t hash_value(const TRule& r) { - size_t h = 2 - r.lhs_; - boost::hash_combine(h, boost::hash_value(r.e_)); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc index 18dfd03b..81abd61b 100644 --- a/gi/pf/pfdist.cc +++ b/gi/pf/pfdist.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc index 43c604c3..c30e7c4f 100644 --- a/gi/pf/pfnaive.cc +++ b/gi/pf/pfnaive.cc @@ -24,17 +24,6 @@ namespace po = boost::program_options; shared_ptr prng; -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2867b36b..64a6a8bf 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -32,7 +32,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } struct LossComparer { - bool operator()(const pair, double>& a, const pair, double>& b) const { + bool operator()(const pair, prob_t>& a, const pair, prob_t>& b) const { return a.second < b.second; } }; @@ -108,7 +108,7 @@ int main(int argc, char** argv) { ScoreP s = scorer->ScoreCandidate(list[j].first); double loss = 1.0 - s->ComputeScore(); if (type == TER || type == AER) loss = 1.0 - loss; - double weighted_loss = loss * (joints[j] / marginal); + double weighted_loss = loss * (joints[j] / marginal).as_float(); wl_acc += weighted_loss; if ((!output_list) && wl_acc > mbr_loss) break; } diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h index 163b643a..811bce73 100644 --- a/phrasinator/ccrp_nt.h +++ b/phrasinator/ccrp_nt.h @@ -50,15 +50,26 @@ class CCRP_NoTable { return it->second; } - void increment(const Dish& dish) { - ++custs_[dish]; + int increment(const Dish& dish) { + int table_diff = 0; + if (++custs_[dish] == 1) + table_diff = 1; ++num_customers_; + return table_diff; } - void decrement(const Dish& dish) { - if ((--custs_[dish]) == 0) + int decrement(const Dish& dish) { + int table_diff = 0; + int nc = --custs_[dish]; + if (nc == 0) { custs_.erase(dish); + table_diff = -1; + } else if (nc < 0) { + std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; + abort(); + } --num_customers_; + return table_diff; } double prob(const Dish& dish, const double& p0) const { @@ -66,6 +77,11 @@ class CCRP_NoTable { return (at_table + p0 * concentration_) / (num_customers_ + concentration_); } + double logprob(const Dish& dish, const double& logp0) const { + const unsigned at_table = num_customers(dish); + return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); + } + double log_crp_prob() const { return log_crp_prob(concentration_); } diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc index 0ba8c530..046e921c 100644 --- a/training/mpi_batch_optimize.cc +++ b/training/mpi_batch_optimize.cc @@ -92,7 +92,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc index b496d196..d5caa745 100644 --- a/training/mpi_compute_cllh.cc +++ b/training/mpi_compute_cllh.cc @@ -1,6 +1,4 @@ -#include #include -#include #include #include #include @@ -12,6 +10,7 @@ #include #include +#include "sentence_metadata.h" #include "verbose.h" #include "hg.h" #include "prob.h" @@ -52,7 +51,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { return true; } -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* ids) { +void ReadInstances(const string& fname, int rank, int size, vector* c) { + assert(fname != "-"); ReadFile rf(fname); istream& in = *rf.stream(); string line; @@ -60,20 +60,16 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector* while(in) { getline(in, line); if (!in) break; - if (lc % size == rank) { - c->push_back(line); - ids->push_back(lc); - } + if (lc % size == rank) c->push_back(line); ++lc; } } static const double kMINUS_EPSILON = -1e-6; -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_obj = 0; - } +struct ConditionalLikelihoodObserver : public DecoderObserver { + + ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {} virtual void NotifyDecodingStart(const SentenceMetadata&) { cur_obj = 0; @@ -120,8 +116,10 @@ struct TrainingObserver : public DecoderObserver { } assert(!isnan(log_ref_z)); acc_obj += (cur_obj - log_ref_z); + trg_words += smeta.GetReference().size(); } + unsigned trg_words; double acc_obj; double cur_obj; int state; @@ -161,35 +159,32 @@ int main(int argc, char** argv) { if (conf.count("weights")) Weights::InitFromFile(conf["weights"].as(), &weights); - // freeze feature set - //const bool freeze_feature_set = conf.count("freeze_feature_set"); - //if (freeze_feature_set) FD::Freeze(); - - vector corpus; vector ids; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); + vector corpus; + ReadInstances(conf["training_data"].as(), rank, size, &corpus); assert(corpus.size() > 0); - assert(corpus.size() == ids.size()); - - TrainingObserver observer; - double objective = 0; - observer.Reset(); if (rank == 0) - cerr << "Each processor is decoding " << corpus.size() << " training examples...\n"; + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - for (int i = 0; i < corpus.size(); ++i) { - decoder.SetId(ids[i]); + ConditionalLikelihoodObserver observer; + for (int i = 0; i < corpus.size(); ++i) decoder.Decode(corpus[i], &observer); - } + double objective = 0; + unsigned total_words = 0; #ifdef HAVE_MPI reduce(world, observer.acc_obj, objective, std::plus(), 0); + reduce(world, observer.trg_words, total_words, std::plus(), 0); #else objective = observer.acc_obj; #endif - if (rank == 0) - cout << "OBJECTIVE: " << objective << endl; + if (rank == 0) { + cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl; + cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl; + cout << " CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl; + cout << " PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl; + } return 0; } diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 2ef4a2e7..f87b7274 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -94,7 +94,7 @@ struct TrainingObserver : public DecoderObserver { void SetLocalGradientAndObjective(vector* g, double* o) const { *o = acc_obj; for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second; + (*g)[it->first] = it->second.as_float(); } virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { @@ -158,7 +158,7 @@ struct TrainingObserver : public DecoderObserver { void GetGradient(SparseVector* g) const { g->clear(); for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - g->set_value(it->first, it->second); + g->set_value(it->first, it->second.as_float()); } int total_complete; diff --git a/utils/logval.h b/utils/logval.h index 6fdc2c42..8a59d0b1 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -25,12 +25,13 @@ class LogVal { typedef LogVal Self; LogVal() : s_(), v_(LOGVAL_LOG0) {} - explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} + const Self& operator=(double x) { s_ = std::signbit(x); v_ = s_ ? std::log(-x) : std::log(x); return *this; } LogVal(init_minus_1) : s_(true),v_(0) { } LogVal(init_1) : s_(),v_(0) { } LogVal(init_0) : s_(),v_(LOGVAL_LOG0) { } - LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} - LogVal(unsigned x) : s_(0), v_(std::log(x)) { } + explicit LogVal(int x) : s_(x<0), v_(s_ ? std::log(-x) : std::log(x)) {} + explicit LogVal(unsigned x) : s_(0), v_(std::log(x)) { } LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {} LogVal(double lnx,init_lnx) : s_(),v_(lnx) {} static Self exp(T lnx) { return Self(lnx,false); } @@ -141,9 +142,6 @@ class LogVal { return pow(1/root); } - operator T() const { - if (s_) return -std::exp(v_); else return std::exp(v_); - } T as_float() const { if (s_) return -std::exp(v_); else return std::exp(v_); } -- cgit v1.2.3