From 851e389dffdd6996ea32d70defb8906de80b9edc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 14 Dec 2009 20:35:11 -0500 Subject: few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec --- decoder/grammar.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 decoder/grammar.h (limited to 'decoder/grammar.h') diff --git a/decoder/grammar.h b/decoder/grammar.h new file mode 100644 index 00000000..3471e3f1 --- /dev/null +++ b/decoder/grammar.h @@ -0,0 +1,83 @@ +#ifndef GRAMMAR_H_ +#define GRAMMAR_H_ + +#include +#include +#include +#include + +#include "lattice.h" +#include "trule.h" + +struct RuleBin { + virtual ~RuleBin(); + virtual int GetNumRules() const = 0; + virtual TRulePtr GetIthRule(int i) const = 0; + virtual int Arity() const = 0; +}; + +struct GrammarIter { + virtual ~GrammarIter(); + virtual const RuleBin* GetRules() const = 0; + virtual const GrammarIter* Extend(int symbol) const = 0; +}; + +struct Grammar { + typedef std::map > Cat2Rules; + static const std::vector NO_RULES; + + virtual ~Grammar(); + virtual const GrammarIter* GetRoot() const = 0; + virtual bool HasRuleForSpan(int i, int j, int distance) const; + + // cat is the category to be rewritten + inline const std::vector& GetAllUnaryRules() const { + return unaries_; + } + + // get all the unary rules that rewrite category cat + inline const std::vector& GetUnaryRulesForRHS(const WordID& cat) const { + Cat2Rules::const_iterator found = rhs2unaries_.find(cat); + if (found == rhs2unaries_.end()) + return NO_RULES; + else + return found->second; + } + + protected: + Cat2Rules rhs2unaries_; // these must be filled in by subclasses! + std::vector unaries_; +}; + +typedef boost::shared_ptr GrammarPtr; + +class TGImpl; +struct TextGrammar : public Grammar { + TextGrammar(); + TextGrammar(const std::string& file); + void SetMaxSpan(int m) { max_span_ = m; } + virtual const GrammarIter* GetRoot() const; + void AddRule(const TRulePtr& rule); + void ReadFromFile(const std::string& filename); + virtual bool HasRuleForSpan(int i, int j, int distance) const; + const std::vector& GetUnaryRules(const WordID& cat) const; + private: + int max_span_; + boost::shared_ptr pimpl_; +}; + +struct GlueGrammar : public TextGrammar { + // read glue grammar from file + explicit GlueGrammar(const std::string& file); + GlueGrammar(const std::string& goal_nt, const std::string& default_nt); // "S", "X" + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +struct PassThroughGrammar : public TextGrammar { + PassThroughGrammar(const Lattice& input, const std::string& cat); + virtual bool HasRuleForSpan(int i, int j, int distance) const; + private: + std::vector > has_rule_; // index by [i][j] +}; + +#endif -- cgit v1.2.3