summaryrefslogtreecommitdiff
path: root/decoder/ffset.h
blob: 84f9fdb94c076891d8489f4c104b0c9eb57a516f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#ifndef FFSET_H_
#define FFSET_H_

#include <utility>
#include <vector>
#include "value_array.h"
#include "prob.h"

namespace HG { struct Edge; struct Node; }
class Hypergraph;
class FeatureFunction;
class SentenceMetadata;
class FeatureFunction;  // see definition below

// TODO let states be dynamically sized
typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string

//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
typedef std::vector<FFState> FFStates;

// this class is a set of FeatureFunctions that can be used to score, rescore,
// etc. a (translation?) forest
class ModelSet {
 public:
  ModelSet(const std::vector<double>& weights,
           const std::vector<const FeatureFunction*>& models);

  // sets edge->feature_values_ and edge->edge_prob_
  // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
  // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
  void AddFeaturesToEdge(const SentenceMetadata& smeta,
                         const Hypergraph& hg,
                         const FFStates& node_states,
                         HG::Edge* edge,
                         FFState* residual_context,
                         prob_t* combination_cost_estimate = NULL) const;

  //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
  void AddFinalFeatures(const FFState& residual_context,
                        HG::Edge* edge,
                        SentenceMetadata const& smeta) const;

  // this is called once before any feature functions apply to a hypergraph
  // it can be used to initialize sentence-specific data structures
  void PrepareForInput(const SentenceMetadata& smeta);

  bool empty() const { return models_.empty(); }

  bool stateless() const { return !state_size_; }

  // Part of a feature state may be used for storing some side data for
  // calculating feature values but not necessary for splitting hypernodes. Such
  // bytes needs to be erased for hypernode splitting.
  bool NeedsStateErasure() const;
  void EraseIgnoredBytes(FFState* state) const;

 private:
  std::vector<const FeatureFunction*> models_;
  const std::vector<double>& weights_;
  int state_size_;
  std::vector<int> model_state_pos_;
  std::vector<std::pair<int, int> > ranges_to_erase_;
};

#endif