summaryrefslogtreecommitdiff
path: root/mteval/scorer.h
blob: f18c8c7f93be1c2dbdfd8f019773f775e2df0261 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#ifndef SCORER_H_
#define SCORER_H_
#include <vector>
#include <string>
#include <boost/shared_ptr.hpp>
//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces)
#include "wordid.h"
#include "intrusive_refcount.hpp"

class Score;
class SentenceScorer;
typedef boost::intrusive_ptr<Score> ScoreP;
typedef boost::shared_ptr<SentenceScorer> ScorerP;

class ViterbiEnvelope;
class ErrorSurface;
class Hypergraph;  // needed for alignment

//TODO: BLEU N (N separate arg, not part of enum)?
enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER, AER, IBM_BLEU_3 };
ScoreType ScoreTypeFromString(const std::string& st);
std::string StringFromScoreType(ScoreType st);

class Score : public boost::intrusive_refcount<Score> {
 public:
  virtual ~Score();
  virtual float ComputeScore() const = 0;
  virtual float ComputePartialScore() const =0;
  virtual void ScoreDetails(std::string* details) const = 0;
  std::string ScoreDetails() {
    std::string d;
    ScoreDetails(&d);
    return d;
  }
  virtual void TimesEquals(float scale); // only for bleu; for mira oracle
  /// same as rhs.TimesEquals(scale);PlusEquals(rhs) except doesn't modify rhs.
  virtual void PlusEquals(const Score& rhs, const float scale) = 0;
  virtual void PlusEquals(const Score& rhs) = 0;
  virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0;
  virtual void Subtract(const Score& rhs, Score *res) const = 0;
  virtual ScoreP GetZero() const = 0;
  virtual ScoreP GetOne() const = 0;
  virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
                                      // to another score results in no score change
				      // under any circumstances
  virtual void Encode(std::string* out) const = 0;
  static ScoreP GetZero(ScoreType type);
  static ScoreP GetOne(ScoreType type);
  virtual ScoreP Clone() const = 0;
protected:
  Score() {  } // we define these explicitly because refcount is noncopyable
  Score(Score const&) {  }
};

//TODO: make sure default copy ctors for score types do what we want.
template <class Derived>
struct ScoreBase : public Score {
  ScoreP Clone() const  {
    return ScoreP(new Derived(dynamic_cast<Derived const&>(*this)));
  }
};

class SentenceScorer {
 public:
  typedef std::vector<WordID> Sentence;
  typedef std::vector<Sentence> Sentences;
  std::string desc;
  Sentences refs;
  SentenceScorer(std::string desc="SentenceScorer_unknown", Sentences const& refs=Sentences()) : desc(desc),refs(refs) {  }
  std::string verbose_desc() const;
  virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length
  virtual ~SentenceScorer();
  virtual ScoreP GetOne() const;
  virtual ScoreP GetZero() const;
  virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0;
  virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0;
  virtual const std::string* GetSource() const;
  static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in);
  static ScorerP CreateSentenceScorer(const ScoreType type,
    const std::vector<Sentence >& refs,
    const std::string& src = "");
};

//TODO: should be able to GetOne GetZero without supplying sentence (just type)
class DocScorer {
 public:
  ~DocScorer();
  DocScorer() {  }
  void Init(const ScoreType type,
            const std::vector<std::string>& ref_files,
            const std::string& src_file = "",
            bool verbose=false
    );
  DocScorer(const ScoreType type,
            const std::vector<std::string>& ref_files,
            const std::string& src_file = "",
            bool verbose=false
    )
  {
    Init(type,ref_files,src_file,verbose);
  }

  int size() const { return scorers_.size(); }
  ScorerP operator[](size_t i) const { return scorers_[i]; }
 private:
  std::vector<ScorerP> scorers_;
};


#endif