summaryrefslogtreecommitdiff
path: root/decoder/grammar.cc
blob: ee43f537cfcdf3bd5792d4961243febaff1322e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include "grammar.h"

#include <algorithm>
#include <utility>
#include <map>
#include <tr1/unordered_map>
#include <tr1/unordered_set>

#include "rule_lexer.h"
#include "filelib.h"
#include "tdict.h"

using namespace std;
using namespace std::tr1;

const vector<TRulePtr> Grammar::NO_RULES;

RuleBin::~RuleBin() {}
GrammarIter::~GrammarIter() {}
Grammar::~Grammar() {}

bool Grammar::HasRuleForSpan(int i, int j, int distance) const {
  (void) i;
  (void) j;
  (void) distance;
  return true;  // always true by default
}

struct TextRuleBin : public RuleBin {
  int GetNumRules() const {
    return rules_.size();
  }
  TRulePtr GetIthRule(int i) const {
    return rules_[i];
  }
  void AddRule(TRulePtr t) {
    rules_.push_back(t);
  }
  int Arity() const {
    return rules_.front()->Arity();
  }
  void Dump() const {
    for (int i = 0; i < rules_.size(); ++i)
      cerr << rules_[i]->AsString() << endl;
  }
 private:
  vector<TRulePtr> rules_;
};

struct TextGrammarNode : public GrammarIter {
  TextGrammarNode() : rb_(NULL) {}
  ~TextGrammarNode() {
    delete rb_;
  }
  const GrammarIter* Extend(int symbol) const {
    map<WordID, TextGrammarNode>::const_iterator i = tree_.find(symbol);
    if (i == tree_.end()) return NULL;
    return &i->second;
  }

  const RuleBin* GetRules() const {
    if (rb_) {
      //rb_->Dump();
    }
    return rb_;
  }

  map<WordID, TextGrammarNode> tree_;
  TextRuleBin* rb_;
};

struct TGImpl {
  TextGrammarNode root_;
};

TextGrammar::TextGrammar() : max_span_(10), pimpl_(new TGImpl) {}
TextGrammar::TextGrammar(const string& file) :
    max_span_(10),
    pimpl_(new TGImpl) {
  ReadFromFile(file);
}

TextGrammar::TextGrammar(istream* in) :
    max_span_(10),
    pimpl_(new TGImpl) {
  ReadFromStream(in);
}

const GrammarIter* TextGrammar::GetRoot() const {
  return &pimpl_->root_;
}

void TextGrammar::AddRule(const TRulePtr& rule, const unsigned int ctf_level, const TRulePtr& coarse_rule) {
  if (ctf_level > 0) {
    // assume that coarse_rule is already in tree (would be safer to check)
    if (coarse_rule->fine_rules_ == 0)
      coarse_rule->fine_rules_.reset(new std::vector<TRulePtr>());
    coarse_rule->fine_rules_->push_back(rule);
    ctf_levels_ = std::max(ctf_levels_, ctf_level);
  } else if (rule->IsUnary()) {
    rhs2unaries_[rule->f().front()].push_back(rule);
    unaries_.push_back(rule);
  } else {
    TextGrammarNode* cur = &pimpl_->root_;
    for (int i = 0; i < rule->f_.size(); ++i)
      cur = &cur->tree_[rule->f_[i]];
    if (cur->rb_ == NULL)
      cur->rb_ = new TextRuleBin;
    cur->rb_->AddRule(rule);
  }
}

static void AddRuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
  static_cast<TextGrammar*>(extra)->AddRule(new_rule, ctf_level, coarse_rule);
}

void TextGrammar::ReadFromFile(const string& filename) {
  ReadFile in(filename);
  ReadFromStream(in.stream());
}

void TextGrammar::ReadFromStream(istream* in) {
  RuleLexer::ReadRules(in, &AddRuleHelper, this);
}

bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const {
  return (max_span_ >= distance);
}