1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
#ifndef _FF_H_
#define _FF_H_
#include <string>
#include <vector>
#include "sparse_vector.h"
namespace HG { struct Edge; struct Node; }
class Hypergraph;
class SentenceMetadata;
// if you want to develop a new feature, inherit from this class and
// override TraversalFeaturesImpl(...). If it's a feature that returns /
// depends on context, you may also need to implement
// FinalTraversalFeatures(...)
class FeatureFunction {
friend class ExternalFeature;
public:
std::string name_; // set by FF factory using usage()
FeatureFunction() : state_size_(), ignored_state_size_() {}
explicit FeatureFunction(int state_size, int ignored_state_size = 0)
: state_size_(state_size), ignored_state_size_(ignored_state_size) {}
virtual ~FeatureFunction();
bool IsStateful() const { return state_size_ > 0; }
int StateSize() const { return state_size_; }
// Returns the number of bytes in the state that should be ignored during
// search. When non-zero, the last N bytes in the state should be ignored when
// splitting a hypernode by the state. This allows the feature function to
// store some side data and later retrieve it via the state bytes.
int IgnoredStateSize() const { return ignored_state_size_; }
// override this. not virtual because we want to expose this to factory template for help before creating a FF
static std::string usage(bool show_params,bool show_details) {
return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details);
}
static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details);
// called once, per input, before any feature calls to TraversalFeatures, etc.
// used to initialize sentence-specific data structures
virtual void PrepareForInput(const SentenceMetadata& smeta);
// Compute the feature values and (if this applies) the estimates of the
// feature values when this edge is used incorporated into a larger context
inline void TraversalFeatures(const SentenceMetadata& smeta,
const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* out_state) const {
TraversalFeaturesImpl(smeta, edge, ant_contexts,
features, estimated_features, out_state);
// TODO it's easy for careless feature function developers to overwrite
// the end of their state and clobber someone else's memory. These bugs
// will be horrendously painful to track down. There should be some
// optional strict mode that's enforced here that adds some kind of
// barrier between the blocks reserved for the residual contexts
}
// if there's some state left when you transition to the goal state, score
// it here. For example, a language model might the cost of adding
// <s> and </s>.
virtual void FinalTraversalFeatures(const void* residual_state,
SparseVector<double>* final_features) const;
protected:
// context is a pointer to a buffer of size NumBytesContext() that the
// feature function can write its state to. It's up to the feature function
// to determine how much space it needs and to determine how to encode its
// residual contextual information since it is OPAQUE to all clients outside
// of the particular FeatureFunction class. There is one exception:
// equality of the contents (i.e., memcmp) is required to determine whether
// two states can be combined.
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
const HG::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* context) const;
// !!! ONLY call these from subclass *CONSTRUCTORS* !!!
void SetStateSize(size_t state_size) {
state_size_ = state_size;
}
void SetIgnoredStateSize(size_t ignored_state_size) {
ignored_state_size_ = ignored_state_size;
}
private:
int state_size_, ignored_state_size_;
};
#endif
|