summaryrefslogtreecommitdiff
path: root/decoder/sentence_metadata.h
blob: eab9f15de81d87200e7752ab373f5143959172e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#ifndef _SENTENCE_METADATA_H_
#define _SENTENCE_METADATA_H_

#include <string>
#include <map>
#include <cassert>
#include "lattice.h"
#include "scorer.h"

struct SentenceMetadata {
  friend class DecoderImpl;
  SentenceMetadata(int id, const Lattice& ref) :
    sent_id_(id),
    src_len_(-1),
    has_reference_(ref.size() > 0),
    trg_len_(ref.size()),
    ref_(has_reference_ ? &ref : NULL) {}

  int GetSentenceId() const { return sent_id_; }

  // this should be called by the Translator object after
  // it has parsed the source
  void SetSourceLength(int sl) { src_len_ = sl; }

  // this should be called if a separate model needs to
  // specify how long the target sentence should be
  void SetTargetLength(int tl) {
    assert(!has_reference_);
    trg_len_ = tl;
  }
  bool HasReference() const { return has_reference_; }
  const Lattice& GetReference() const { return *ref_; }
  int GetSourceLength() const { return src_len_; }
  int GetTargetLength() const { return trg_len_; }
  int GetSentenceID() const { return sent_id_; }
  // this will be empty if the translator accepts non FS input!
  const Lattice& GetSourceLattice() const { return src_lattice_; }

  // access to document level scores for MIRA vector computation
  void SetScore(Score *s){app_score=s;}
  void SetDocScorer (const DocScorer *d){ds = d;}
  void SetDocLen(double dl){doc_len = dl;}

  const Score& GetScore() const { return *app_score; }
  const DocScorer& GetDocScorer() const { return *ds; }
  double GetDocLen() const {return doc_len;}

  std::string GetSGMLValue(const std::string& key) const {
    std::map<std::string, std::string>::const_iterator it = sgml_.find(key);
    if (it == sgml_.end()) return "";
    return it->second;
  }

 private:
  std::map<std::string, std::string> sgml_;
  const int sent_id_;
  // the following should be set, if possible, by the Translator
  int src_len_;
  double doc_len;
  const DocScorer* ds;
  const Score* app_score;
 public:
  Lattice src_lattice_;  // this will only be set if inputs are finite state!
 private:
  // you need to be very careful when depending on these values
  // they will only be set during training / alignment contexts
  const bool has_reference_;
  int trg_len_;
  const Lattice* const ref_;
};

#endif