diff options
| author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 | 
|---|---|---|
| committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 | 
| commit | 1db70a45d59946560fbd5db6487b55a8674ef973 (patch) | |
| tree | 172585dafe4d1462f22d8200e733d52dddb55b1e /mteval | |
| parent | 4dd5216d3afa9ab72b150e250a3c30a5f223ce53 (diff) | |
| parent | 6bbf03ac46bd57400aa9e65a321a304a234af935 (diff) | |
merge upstream/master
Diffstat (limited to 'mteval')
| -rw-r--r-- | mteval/Jamfile | 8 | ||||
| -rw-r--r-- | mteval/mbr_kbest.cc | 38 | ||||
| -rw-r--r-- | mteval/ns.cc | 2 | ||||
| -rw-r--r-- | mteval/ns_docscorer.cc | 4 | ||||
| -rw-r--r-- | mteval/ns_docscorer.h | 2 | 
5 files changed, 32 insertions, 22 deletions
| diff --git a/mteval/Jamfile b/mteval/Jamfile deleted file mode 100644 index 3ed2c2cc..00000000 --- a/mteval/Jamfile +++ /dev/null @@ -1,8 +0,0 @@ -import testing ; - -lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc ..//utils : <include>. : : <include>. <library>..//z ; -exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ; -exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ; -alias programs : fast_score mbr_kbest ; - -unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework : <testing.arg>$(TOP)/mteval/test_data ; diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2bd31566..2519bc01 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -1,7 +1,9 @@  #include <iostream>  #include <vector> +#include <tr1/unordered_map>  #include <boost/program_options.hpp> +#include <boost/functional/hash.hpp>  #include "prob.h"  #include "tdict.h" @@ -10,6 +12,7 @@  #include "stringlib.h"  using namespace std; +using namespace std::tr1;  namespace po = boost::program_options; @@ -31,27 +34,33 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    }  } +struct ScoreComparer { +  bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const { +    return a.second > b.second; +  } +}; +  struct LossComparer {    bool operator()(const pair<vector<WordID>, prob_t>& a, const pair<vector<WordID>, prob_t>& b) const {      return a.second < b.second;    }  }; -bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) { +bool ReadKBestList(const double mbr_scale, istream* in, string* sent_id, vector<pair<vector<WordID>, prob_t> >* list) {    static string cache_id;    static pair<vector<WordID>, prob_t> cache_pair;    list->clear();    string cur_id; +  unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > > sent2id;    if (cache_pair.first.size() > 0) {      list->push_back(cache_pair); +    sent2id[cache_pair.first] = 0;      cur_id = cache_id;      cache_pair.first.clear();    }    string line;    string tstr; -  while(*in) { -    getline(*in, line); -    if (line.empty()) continue; +  while(getline(*in, line)) {      size_t p1 = line.find(" ||| ");      if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); }      size_t p2 = line.find(" ||| ", p1 + 4); @@ -59,16 +68,25 @@ bool ReadKBestList(istream* in, string* sent_id, vector<pair<vector<WordID>, pro      size_t p3 = line.rfind(" ||| ");      cache_id = line.substr(0, p1);      tstr = line.substr(p1 + 5, p2 - p1 - 5); -    double val = strtod(line.substr(p3 + 5).c_str(), NULL); +    double val = strtod(line.substr(p3 + 5).c_str(), NULL) * mbr_scale;      TD::ConvertSentence(tstr, &cache_pair.first);      cache_pair.second.logeq(val);      if (cur_id.empty()) cur_id = cache_id;      if (cur_id == cache_id) { -      list->push_back(cache_pair); +      unordered_map<vector<WordID>, unsigned, boost::hash<vector<WordID> > >::iterator it = +        sent2id.find(cache_pair.first); +      if (it == sent2id.end()) { +        sent2id.insert(make_pair(cache_pair.first, unsigned(list->size()))); +        list->push_back(cache_pair); +      } else { +        (*list)[it->second].second += cache_pair.second; +        // cerr << "Cruch: " << line << "\n newp=" << (*list)[it->second].second << endl; +      }        *sent_id = cur_id;        cache_pair.first.clear();      } else { break; }    } +  sort(list->begin(), list->end(), ScoreComparer());    return !list->empty();  } @@ -87,14 +105,14 @@ int main(int argc, char** argv) {    vector<pair<vector<WordID>, prob_t> > list;    ReadFile rf(file);    string sent_id; -  while(ReadKBestList(rf.stream(), &sent_id, &list)) { +  while(ReadKBestList(mbr_scale, rf.stream(), &sent_id, &list)) {      vector<prob_t> joints(list.size()); -    const prob_t max_score = pow(list.front().second, mbr_scale); +    const prob_t max_score = list.front().second;      prob_t marginal = prob_t::Zero();      for (int i = 0 ; i < list.size(); ++i) { -      const prob_t joint = pow(list[i].second, mbr_scale) / max_score; +      const prob_t joint = list[i].second / max_score;        joints[i] = joint; -      // cerr << "list[" << i << "] joint=" << log(joint) << endl; +      //cerr << "list[" << i << "] joint=" << log(joint) << endl;        marginal += joint;      }      int mbr_idx = -1; diff --git a/mteval/ns.cc b/mteval/ns.cc index 33952da7..3af7cc63 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -252,7 +252,7 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) {      } else if (metric_id == "TER") {        m = new TERMetric;      } else if (metric_id == "METEOR") { -      m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /Users/cdyer/software/meteor/meteor-1.3.jar - - -mira -lower -t tune -l en"); +      m = new ExternalMetric("METEOR", "java -Xmx1536m -jar /cab0/tools/meteor-1.3/meteor-1.3.jar - - -mira -lower -t tune -l en");      } else if (metric_id.find("COMB:") == 0) {        m = new CombinationMetric(metric_id);      } else if (metric_id == "CER") { diff --git a/mteval/ns_docscorer.cc b/mteval/ns_docscorer.cc index 28a2fd09..f72ad115 100644 --- a/mteval/ns_docscorer.cc +++ b/mteval/ns_docscorer.cc @@ -16,7 +16,7 @@ void DocumentScorer::Init(const EvaluationMetric* metric,              const string& src_file,              bool verbose) {    scorers_.clear(); -  cerr << "Loading references (" << ref_files.size() << " files)\n"; +  if (verbose) cerr << "Loading references (" << ref_files.size() << " files)\n";    assert(src_file.empty());    std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());    for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]); @@ -55,6 +55,6 @@ void DocumentScorer::Init(const EvaluationMetric* metric,        ++line;      }    } -  cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n"; +  if (verbose) cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";  } diff --git a/mteval/ns_docscorer.h b/mteval/ns_docscorer.h index 170ac627..a5757258 100644 --- a/mteval/ns_docscorer.h +++ b/mteval/ns_docscorer.h @@ -5,7 +5,7 @@  #include <string>  #include <boost/shared_ptr.hpp> -struct EvaluationMetric; +class EvaluationMetric;  struct SegmentEvaluator;  class DocumentScorer {   public: | 
