1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
#ifndef FFSET_H_
#define FFSET_H_
#include <utility>
#include <vector>
#include "value_array.h"
#include "prob.h"
namespace HG { struct Edge; struct Node; }
class Hypergraph;
class FeatureFunction;
class SentenceMetadata;
class FeatureFunction; // see definition below
// TODO let states be dynamically sized
typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string
//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
typedef std::vector<FFState> FFStates;
// this class is a set of FeatureFunctions that can be used to score, rescore,
// etc. a (translation?) forest
class ModelSet {
public:
ModelSet(const std::vector<double>& weights,
const std::vector<const FeatureFunction*>& models);
// sets edge->feature_values_ and edge->edge_prob_
// NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes
// must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
void AddFeaturesToEdge(const SentenceMetadata& smeta,
const Hypergraph& hg,
const FFStates& node_states,
HG::Edge* edge,
FFState* residual_context,
prob_t* combination_cost_estimate = NULL) const;
//this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
void AddFinalFeatures(const FFState& residual_context,
HG::Edge* edge,
SentenceMetadata const& smeta) const;
// this is called once before any feature functions apply to a hypergraph
// it can be used to initialize sentence-specific data structures
void PrepareForInput(const SentenceMetadata& smeta);
bool empty() const { return models_.empty(); }
bool stateless() const { return !state_size_; }
// Part of a feature state may be used for storing some side data for
// calculating feature values but not necessary for splitting hypernodes. Such
// bytes needs to be erased for hypernode splitting.
bool NeedsStateErasure() const;
void EraseIgnoredBytes(FFState* state) const;
private:
std::vector<const FeatureFunction*> models_;
const std::vector<double>& weights_;
int state_size_;
std::vector<int> model_state_pos_;
std::vector<std::pair<int, int> > ranges_to_erase_;
};
#endif
|