summaryrefslogtreecommitdiff
path: root/decoder/grammar.h
blob: f5d00817cee6c711164492b6ef744685b7c2180e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#ifndef GRAMMAR_H_
#define GRAMMAR_H_

#include <iostream>
#include <algorithm>
#include <vector>
#include <map>
#include <set>
#include <string>

#include <boost/shared_ptr.hpp>

#include "lattice.h"
#include "trule.h"

struct RuleBin {
  virtual ~RuleBin();
  virtual int GetNumRules() const = 0;
  virtual TRulePtr GetIthRule(int i) const = 0;
  virtual int Arity() const = 0;
};

struct GrammarIter {
  virtual ~GrammarIter();
  virtual const RuleBin* GetRules() const = 0;
  virtual const GrammarIter* Extend(int symbol) const = 0;
};

struct Grammar {
  //TODO: HASH_MAP?
  typedef std::map<WordID, std::vector<TRulePtr> > Cat2Rules;
  static const std::vector<TRulePtr> NO_RULES;

  Grammar(): ctf_levels_(0) {}
  virtual ~Grammar();
  virtual const GrammarIter* GetRoot() const = 0;
  virtual bool HasRuleForSpan(int i, int j, int distance) const;
  const std::string GetGrammarName(){return grammar_name_;}
  unsigned int GetCTFLevels(){ return ctf_levels_; }
  void SetGrammarName(std::string n) {grammar_name_ = n; }
  // cat is the category to be rewritten
  inline const std::vector<TRulePtr>& GetAllUnaryRules() const {
    return unaries_;
  }

  // get all the unary rules that rewrite category cat
  inline const std::vector<TRulePtr>& GetUnaryRulesForRHS(const WordID& cat) const {
    Cat2Rules::const_iterator found = rhs2unaries_.find(cat);
    if (found == rhs2unaries_.end())
      return NO_RULES;
    else
      return found->second;
  }

 protected:
  Cat2Rules rhs2unaries_;     // these must be filled in by subclasses!
  std::vector<TRulePtr> unaries_;
  std::string grammar_name_;
  unsigned int ctf_levels_;
};

typedef boost::shared_ptr<Grammar> GrammarPtr;

class TGImpl;
struct TextGrammar : public Grammar {
  TextGrammar();
  explicit TextGrammar(const std::string& file);
  explicit TextGrammar(std::istream* in);
  void SetMaxSpan(int m) { max_span_ = m; }

  virtual const GrammarIter* GetRoot() const;
  void AddRule(const TRulePtr& rule, const unsigned int ctf_level=0, const TRulePtr& coarse_parent=TRulePtr());
  void ReadFromFile(const std::string& filename);
  void ReadFromStream(std::istream* in);
  virtual bool HasRuleForSpan(int i, int j, int distance) const;
  const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;

 private:
  int max_span_;
  boost::shared_ptr<TGImpl> pimpl_;

};

struct GlueGrammar : public TextGrammar {
  // read glue grammar from file
  explicit GlueGrammar(const std::string& file);
  GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0);  // "S", "X"
  virtual bool HasRuleForSpan(int i, int j, int distance) const;
};

struct PassThroughGrammar : public TextGrammar {
  PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
  virtual bool HasRuleForSpan(int i, int j, int distance) const;
 private:
  std::vector<std::set<int> > has_rule_;  // index by [i][j]
};

void RefineRule(TRulePtr pt, const unsigned int ctf_level);

#endif