1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
#ifndef AGRAMMAR_H_
#define AGRAMMAR_H_
#include "grammar.h"
#include "hg.h"
using namespace std;
class aTRule: public TRule{
public:
aTRule() : TRule(){ResetScore(0.00000001); }
aTRule(TRulePtr rule_);
void ResetScore(double initscore){//cerr<<"Reset Score "<<this->AsString()<<endl;
sum_scores_.set_value(FD::Convert("Prob"), initscore);}
void AddProb(double p ){
// cerr<<"in AddProb p="<<p<<endl;
// cerr<<"prob sumscores ="<<sum_scores_[FD::Convert("Prob")]<<endl;
sum_scores_.add_value(FD::Convert("Prob"), p);
// cerr<<"after AddProb\n";
}
void UpdateScore(double sumprob){
double minuslogp = 0 - log( sum_scores_.value(FD::Convert("Prob")) /sumprob);
if (sumprob< sum_scores_.value(FD::Convert("Prob"))){
cerr<<"UpdateScore sumprob="<<sumprob<< " sum_scores_.value(FD::Convert(\"Prob\"))="<< sum_scores_.value(FD::Convert("Prob"))<< this->AsString()<<endl;
exit(1);
}
this->scores_.set_value(FD::Convert("MinusLogP"), minuslogp);
}
private:
SparseVector<double> sum_scores_;
};
class aTGImpl;
struct NTRule{
NTRule(){};
NTRule(const TRulePtr & rule, WordID nt){
nt_ = nt;
rule_ = rule;
if (rule->lhs_ * -1 == nt)
ntPos_.push_back(-1);
for (int i=0; i< rule->f().size(); i++)
if (rule->f().at(i) * -1 == nt)
ntPos_.push_back(i);
}
TRulePtr rule_;
WordID nt_; //the labelID of the nt (nt_>0);
vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_()
//i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP
};
struct aTextGrammar : public Grammar {
aTextGrammar();
aTextGrammar(const std::string& file);
void SetMaxSpan(int m) { max_span_ = m; }
virtual const GrammarIter* GetRoot() const;
void AddRule(const TRulePtr& rule);
void ReadFromFile(const std::string& filename);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts);
void setMaxSplit(int max_split);
void splitNonterminal(WordID wordID);
void splitAllNonterminals();
void PrintAllRules(const string & filename) const;
void PrintNonterminalRules(WordID nt) const;
void SetGoalNT(const string & goal_str);
void ResetScore();
void UpdateScore();
void UpdateHgProsteriorProb(Hypergraph & hg);
void set_alpha(double alpha){alpha_ = alpha;}
private:
void RemoveRule(const TRulePtr & rule);
void RemoveNonterminal(WordID wordID);
int max_span_;
int max_split_;
boost::shared_ptr<aTGImpl> pimpl_;
map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0
map <WordID, vector<NTRule> > nt_rules_;
map <WordID, double> sum_probs_;
map <WordID, double> cnt_rules;
double alpha_;
// map<WordID, vector<WordID> > grSplitNonterminals;
WordID goalID;
};
#endif
|