1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#include "grammar.h"
#include <algorithm>
#include <utility>
#include <map>
#include <tr1/unordered_map>
#include <tr1/unordered_set>
#include "rule_lexer.h"
#include "filelib.h"
#include "tdict.h"
using namespace std;
using namespace std::tr1;
const vector<TRulePtr> Grammar::NO_RULES;
RuleBin::~RuleBin() {}
GrammarIter::~GrammarIter() {}
Grammar::~Grammar() {}
bool Grammar::HasRuleForSpan(int i, int j, int distance) const {
(void) i;
(void) j;
(void) distance;
return true; // always true by default
}
struct TextRuleBin : public RuleBin {
int GetNumRules() const {
return rules_.size();
}
TRulePtr GetIthRule(int i) const {
return rules_[i];
}
void AddRule(TRulePtr t) {
rules_.push_back(t);
}
int Arity() const {
return rules_.front()->Arity();
}
void Dump() const {
for (int i = 0; i < rules_.size(); ++i)
cerr << rules_[i]->AsString() << endl;
}
private:
vector<TRulePtr> rules_;
};
struct TextGrammarNode : public GrammarIter {
TextGrammarNode() : rb_(NULL) {}
~TextGrammarNode() {
delete rb_;
}
const GrammarIter* Extend(int symbol) const {
map<WordID, TextGrammarNode>::const_iterator i = tree_.find(symbol);
if (i == tree_.end()) return NULL;
return &i->second;
}
const RuleBin* GetRules() const {
if (rb_) {
//rb_->Dump();
}
return rb_;
}
map<WordID, TextGrammarNode> tree_;
TextRuleBin* rb_;
};
struct TGImpl {
TextGrammarNode root_;
};
TextGrammar::TextGrammar() : max_span_(10), pimpl_(new TGImpl) {}
TextGrammar::TextGrammar(const string& file) :
max_span_(10),
pimpl_(new TGImpl) {
ReadFromFile(file);
}
TextGrammar::TextGrammar(istream* in) :
max_span_(10),
pimpl_(new TGImpl) {
ReadFromStream(in);
}
const GrammarIter* TextGrammar::GetRoot() const {
return &pimpl_->root_;
}
void TextGrammar::AddRule(const TRulePtr& rule, const unsigned int ctf_level, const TRulePtr& coarse_rule) {
if (ctf_level > 0) {
// assume that coarse_rule is already in tree (would be safer to check)
if (coarse_rule->fine_rules_ == 0)
coarse_rule->fine_rules_.reset(new std::vector<TRulePtr>());
coarse_rule->fine_rules_->push_back(rule);
ctf_levels_ = std::max(ctf_levels_, ctf_level);
} else if (rule->IsUnary()) {
rhs2unaries_[rule->f().front()].push_back(rule);
unaries_.push_back(rule);
} else {
TextGrammarNode* cur = &pimpl_->root_;
for (int i = 0; i < rule->f_.size(); ++i)
cur = &cur->tree_[rule->f_[i]];
if (cur->rb_ == NULL)
cur->rb_ = new TextRuleBin;
cur->rb_->AddRule(rule);
}
}
static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
static_cast<TextGrammar*>(extra)->AddRule(new_rule, ctf_level, coarse_rule);
}
void TextGrammar::ReadFromFile(const string& filename) {
ReadFile in(filename);
ReadFromStream(in.stream());
}
void TextGrammar::ReadFromStream(istream* in) {
RuleLexer::ReadRules(in, &AddRuleHelper, this);
}
bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
return (max_span_ >= distance);
}
|