diff options
-rw-r--r-- | mteval/external_scorer.cc | 79 | ||||
-rw-r--r-- | mteval/external_scorer.h | 28 | ||||
-rw-r--r-- | mteval/scorer.cc | 10 | ||||
-rw-r--r-- | mteval/scorer.h | 4 | ||||
-rwxr-xr-x | vest/dist-vest.pl | 2 |
5 files changed, 85 insertions, 38 deletions
diff --git a/mteval/external_scorer.cc b/mteval/external_scorer.cc index 4327ce9b..3757064b 100644 --- a/mteval/external_scorer.cc +++ b/mteval/external_scorer.cc @@ -2,20 +2,42 @@ #include <cstdio> // popen #include <cstdlib> +#include <cstring> #include <sstream> #include <iostream> #include <cassert> +#include "stringlib.h" #include "tdict.h" using namespace std; +map<string, boost::shared_ptr<ScoreServer> > ScoreServerManager::servers_; + +class METEORServer : public ScoreServer { + public: + METEORServer() : ScoreServer("java -Xmx1024m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en") {} +}; + +ScoreServer* ScoreServerManager::Instance(const string& score_type) { + boost::shared_ptr<ScoreServer>& s = servers_[score_type]; + if (!s) { + if (score_type == "meteor") { + s.reset(new METEORServer); + } else { + cerr << "Don't know how to create score server for type '" << score_type << "'\n"; + abort(); + } + } + return s.get(); +} + ScoreServer::ScoreServer(const string& cmd) : pipe_() { cerr << "Invoking " << cmd << " ..." << endl; pipe_ = popen(cmd.c_str(), "r+"); - assert(pipe_); + if (!pipe_) { perror("popen"); abort(); } string dummy; - RequestResponse("EVAL ||| Reference initialization string . ||| Testing initialization string .\n", &dummy); + RequestResponse("SCORE ||| Reference initialization string . ||| Testing initialization string .", &dummy); assert(dummy.size() > 0); cerr << "Connection established.\n"; } @@ -24,12 +46,11 @@ ScoreServer::~ScoreServer() { pclose(pipe_); } -double ScoreServer::ComputeScore(const vector<float>& fields) { +float ScoreServer::ComputeScore(const vector<float>& fields) { ostringstream os; - os << "EVAL"; + os << "EVAL |||"; for (unsigned i = 0; i < fields.size(); ++i) os << ' ' << fields[i]; - os << endl; string sres; RequestResponse(os.str(), &sres); return strtod(sres.c_str(), NULL); @@ -48,46 +69,42 @@ void ScoreServer::Evaluate(const vector<vector<WordID> >& refs, const vector<Wor for (unsigned i = 0; i < hyp.size(); ++i) { os << ' ' << TD::Convert(hyp[i]); } - os << endl; string sres; RequestResponse(os.str(), &sres); istringstream is(sres); - double val; + float val; fields->clear(); - while(is >> val) { + while(is >> val) fields->push_back(val); - } } #define MAX_BUF 16000 void ScoreServer::RequestResponse(const string& request, string* response) { - fprintf(pipe_, "%s", request.c_str()); + //cerr << "@SERVER: " << request << endl; + fputs(request.c_str(), pipe_); + fputc('\n', pipe_); fflush(pipe_); char buf[MAX_BUF]; - size_t cr = fread(buf, 1, MAX_BUF, pipe_); - if (cr == 0) { + if (NULL == fgets(buf, MAX_BUF, pipe_)) { cerr << "Read error. Request: " << request << endl; abort(); } - while (buf[cr-1] != '\n') { - size_t n = fread(&buf[cr], 1, MAX_BUF-cr, pipe_); - assert(n > 0); - cr += n; - assert(cr < MAX_BUF); + size_t len = strlen(buf); + if (len < 2) { + cerr << "Malformed response: " << buf << endl; } - buf[cr - 1] = 0; - *response = buf; + *response = Trim(buf, " \t\n"); + //cerr << "@RESPONSE: '" << *response << "'\n"; } struct ExternalScore : public ScoreBase<ExternalScore> { ExternalScore() : score_server() {} - explicit ExternalScore(const ScoreServer* s) : score_server(s), fields() {} - ExternalScore(const ScoreServer* s, const vector<float>& f) : score_server(s), fields(f) {} + explicit ExternalScore(ScoreServer* s) : score_server(s), fields() {} + ExternalScore(ScoreServer* s, const vector<float>& f) : score_server(s), fields(f) {} float ComputePartialScore() const { return 0.0;} float ComputeScore() const { - // TODO make EVAL call - assert(!"not implemented"); + return score_server->ComputeScore(fields); } void ScoreDetails(string* details) const { ostringstream os; @@ -127,14 +144,17 @@ struct ExternalScore : public ScoreBase<ExternalScore> { } void Encode(string* out) const { ostringstream os; + for (unsigned i = 0; i < fields.size(); ++i) + os << (i == 0 ? "" : " ") << fields[i]; + *out = os.str(); } bool IsAdditiveIdentity() const { - for (int i = 0; i < fields.size(); ++i) + for (unsigned i = 0; i < fields.size(); ++i) if (fields[i]) return false; return true; } - const ScoreServer* score_server; + mutable ScoreServer* score_server; vector<float> fields; }; @@ -148,3 +168,12 @@ ScoreP ExternalSentenceScorer::ScoreCCandidate(const Sentence& hyp) const { assert(!"not implemented"); } +ScoreP ExternalSentenceScorer::ScoreFromString(ScoreServer* s, const string& data) { + istringstream is(data); + vector<float> fields; + float val; + while(is >> val) + fields.push_back(val); + return ScoreP(new ExternalScore(s, fields)); +} + diff --git a/mteval/external_scorer.h b/mteval/external_scorer.h index a2c91960..59ece269 100644 --- a/mteval/external_scorer.h +++ b/mteval/external_scorer.h @@ -3,15 +3,20 @@ #include <vector> #include <cstdio> +#include <string> +#include <map> +#include <boost/shared_ptr.hpp> #include "scorer.h" class ScoreServer { - public: + friend class ScoreServerManager; + protected: explicit ScoreServer(const std::string& cmd); virtual ~ScoreServer(); - double ComputeScore(const std::vector<float>& fields); + public: + float ComputeScore(const std::vector<float>& fields); void Evaluate(const std::vector<std::vector<WordID> >& refs, const std::vector<WordID>& hyp, std::vector<float>* fields); private: @@ -19,17 +24,22 @@ class ScoreServer { FILE* pipe_; }; +struct ScoreServerManager { + static ScoreServer* Instance(const std::string& score_type); + private: + static std::map<std::string, boost::shared_ptr<ScoreServer> > servers_; +}; + class ExternalSentenceScorer : public SentenceScorer { public: - virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; - virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; + ExternalSentenceScorer(ScoreServer* server, const std::vector<std::vector<WordID> >& r) : + SentenceScorer("External", r), eval_server(server) {} + virtual ScoreP ScoreCandidate(const Sentence& hyp) const; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const; + static ScoreP ScoreFromString(ScoreServer* s, const std::string& data); + protected: ScoreServer* eval_server; }; -class METEORServer : public ScoreServer { - public: - METEORServer() : ScoreServer("java -Xmx1024m -jar meteor-1.3.jar - - -mira -lower") {} -}; - #endif diff --git a/mteval/scorer.cc b/mteval/scorer.cc index 64ce63af..2daa0daa 100644 --- a/mteval/scorer.cc +++ b/mteval/scorer.cc @@ -17,11 +17,12 @@ #include "comb_scorer.h" #include "tdict.h" #include "stringlib.h" +#include "external_scorer.h" using boost::shared_ptr; using namespace std; -void Score::TimesEquals(float scale) { +void Score::TimesEquals(float /*scale*/) { cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort(); } @@ -43,12 +44,14 @@ ScoreType ScoreTypeFromString(const string& st) { return Koehn_BLEU; if (sl == "combi") return BLEU_minus_TER_over_2; + if (sl == "meteor") + return METEOR; cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n"; return IBM_BLEU; } static char const* score_names[]={ - "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3" + "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3", "METEOR" }; std::string StringFromScoreType(ScoreType st) { @@ -356,6 +359,7 @@ ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type, case TER: r = new TERScorer(refs);break; case SER: r = new SERScorer(refs);break; case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break; + case METEOR: r = new ExternalSentenceScorer(ScoreServerManager::Instance("meteor"), refs); break; default: assert(!"Not implemented!"); } @@ -398,6 +402,8 @@ ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& return SERScorer::ScoreFromString(in); case BLEU_minus_TER_over_2: return BLEUTERCombinationScorer::ScoreFromString(in); + case METEOR: + return ExternalSentenceScorer::ScoreFromString(ScoreServerManager::Instance("meteor"), in); default: assert(!"Not implemented!"); } diff --git a/mteval/scorer.h b/mteval/scorer.h index f18c8c7f..8bd76982 100644 --- a/mteval/scorer.h +++ b/mteval/scorer.h @@ -17,7 +17,7 @@ class ErrorSurface; class Hypergraph; // needed for alignment //TODO: BLEU N (N separate arg, not part of enum)? -enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3 }; +enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3, METEOR }; ScoreType ScoreTypeFromString(const std::string& st); std::string StringFromScoreType(ScoreType st); @@ -66,7 +66,7 @@ class SentenceScorer { typedef std::vector<Sentence> Sentences; std::string desc; Sentences refs; - SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) { } + explicit SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) { } std::string verbose_desc() const; virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length virtual ~SentenceScorer(); diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 789b5b14..b7a862c4 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -118,6 +118,8 @@ if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } if ($metric =~ /^(combi|ter)$/i) { $lines_per_mapper = 40; +} elsif ($metric =~ /^meteor$/i) { + $lines_per_mapper = 2000; # start up time is really high } ($iniFile) = @ARGV; |