summaryrefslogtreecommitdiff
path: root/training/crf/cllh_observer.cc
blob: 4ec2fa6560e3b0d19fe81624d3b2f2bc055ec94f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include "cllh_observer.h"

#include <cmath>
#include <cassert>

#include "inside_outside.h"
#include "hg.h"
#include "sentence_metadata.h"

using namespace std;

static const double kMINUS_EPSILON = -1e-6;

ConditionalLikelihoodObserver::~ConditionalLikelihoodObserver() {}

void ConditionalLikelihoodObserver::NotifyDecodingStart(const SentenceMetadata&) {
  cur_obj = 0;
  state = 1;
}

void ConditionalLikelihoodObserver::NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) {
  assert(state == 1);
  state = 2;
  SparseVector<prob_t> cur_model_exp;
  const prob_t z = InsideOutside<prob_t,
                                 EdgeProb,
                                 SparseVector<prob_t>,
                                 EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp);
  cur_obj = log(z);
}

void ConditionalLikelihoodObserver::NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) {
  assert(state == 2);
  state = 3;
  SparseVector<prob_t> ref_exp;
  const prob_t ref_z = InsideOutside<prob_t,
                                     EdgeProb,
                                     SparseVector<prob_t>,
                                     EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp);

  double log_ref_z = log(ref_z);

  // rounding errors means that <0 is too strict
  if ((cur_obj - log_ref_z) < kMINUS_EPSILON) {
    cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl;
    exit(1);
  }
  assert(!std::isnan(log_ref_z));
  acc_obj += (cur_obj - log_ref_z);
  trg_words += smeta.GetReference().size();
}