1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
#ifndef _LM_FF_H_
#define _LM_FF_H_
#include <vector>
#include <string>
#include "hg.h"
#include "ff.h"
#include "config.h"
class LanguageModelInterface {
public:
double floor_;
LanguageModelInterface() : floor_(-100) { }
virtual ~LanguageModelInterface() { }
// not clamped to floor. log10prob
virtual double WordProb(WordID word, WordID const* context) = 0;
inline double WordProbFloored(WordID word, WordID const* context) {
return clamp(WordProb(word,context));
}
// may be shorter than actual null-terminated length. context must be null terminated. len is just to save effort for subclasses that don't support contextID
virtual int ContextSize(WordID const* context,int len) = 0;
// use this as additional logprob when shortening the context as above
virtual double ContextBOW(WordID const* context,int shortened_len) = 0; // unlikely that you'll ever need to floor a backoff cost. i'd say impossible.
inline double ShortenContext(WordID * context,int len) {
int slen=ContextSize(context,len);
double p=ContextBOW(context,slen);
while (len>slen) {
--len;
context[len]=TD::none;
}
return p;
}
/// should be worse prob = more negative. that's what SRI wordProb returns: log10(prob)
inline double clamp(double logp) const {
return logp < floor_ ? floor_ : logp;
}
};
struct LanguageModelImpl;
class LanguageModel : public FeatureFunction {
public:
// param = "filename.lm [-o n]"
LanguageModel(const std::string& param);
~LanguageModel();
virtual void FinalTraversalFeatures(const void* context,
SparseVector<double>* features) const;
std::string DebugStateToString(const void* state) const;
static std::string usage(bool param,bool verbose);
Features features() const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* out_context) const;
private:
int fid_; // conceptually const; mutable only to simplify constructor
//LanguageModelImpl &imp() { return *(LanguageModelImpl*)pimpl_; }
LanguageModelImpl & imp() const { return *(LanguageModelImpl*)pimpl_; }
/* mutable */ LanguageModelInterface* pimpl_;
};
#ifdef HAVE_RANDLM
class LanguageModelRandLM : public FeatureFunction {
public:
// param = "filename.lm [-o n]"
LanguageModelRandLM(const std::string& param);
~LanguageModelRandLM();
virtual void FinalTraversalFeatures(const void* context,
SparseVector<double>* features) const;
std::string DebugStateToString(const void* state) const;
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const std::vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* out_context) const;
private:
const int fid_;
mutable LanguageModelImpl* pimpl_;
};
#endif
#endif
|