summaryrefslogtreecommitdiff
path: root/decoder/grammar.h
diff options
context:
space:
mode:
authoradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
committeradam.d.lopez <adam.d.lopez@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 03:27:59 +0000
commit47c75319638866609f669346e15663c5ba43af7f (patch)
tree50bc1867bdb1ce27e679fb8981b1b805c4897cd6 /decoder/grammar.h
parenta2360c873ba8b72744e16752a067276a46d63645 (diff)
cdec now supports coarse-to-fine decoding (for SCFG only).
CTF has several options: -coarse_to_fine_beam_prune=<double> (required to activate CTF) assign an alpha parameter for pruning the coarse foreast -ctf_beam_widen=<double> (optional, defaults to 2.0): ratio to widen coarse pruning beam if fine parse fails. -ctf_num_widenings=<int> (optional, defaults to 2): number of times to widen coarse beam before defaulting to exhaustive source parsing -ctf_no_exhaustive (optional) do not attempt exhaustive parse if CTF fails to find a parse. Additionally, script extools/coarsen_grammar.pl will create a coarse-to-fine grammar (for X?? categories *only*). cdec will read CTF grammars in a format identical to the original, in which refinements of a rule immediately follow the coarse projection, preceded by an additional whitespace character. Not fully tested, but should be backwards compatible. Also not yet integrated into pipelines, but should work on the command line. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@231 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/grammar.h')
-rw-r--r--decoder/grammar.h13
1 files changed, 9 insertions, 4 deletions
diff --git a/decoder/grammar.h b/decoder/grammar.h
index 46886d3a..b26eb912 100644
--- a/decoder/grammar.h
+++ b/decoder/grammar.h
@@ -1,6 +1,7 @@
#ifndef GRAMMAR_H_
#define GRAMMAR_H_
+#include <algorithm>
#include <vector>
#include <map>
#include <set>
@@ -26,11 +27,13 @@ struct GrammarIter {
struct Grammar {
typedef std::map<WordID, std::vector<TRulePtr> > Cat2Rules;
static const std::vector<TRulePtr> NO_RULES;
-
+
+ Grammar(): ctf_levels_(0) {}
virtual ~Grammar();
virtual const GrammarIter* GetRoot() const = 0;
virtual bool HasRuleForSpan(int i, int j, int distance) const;
const std::string GetGrammarName(){return grammar_name_;}
+ unsigned int GetCTFLevels(){ return ctf_levels_; }
void SetGrammarName(std::string n) {grammar_name_ = n; }
// cat is the category to be rewritten
inline const std::vector<TRulePtr>& GetAllUnaryRules() const {
@@ -50,6 +53,7 @@ struct Grammar {
Cat2Rules rhs2unaries_; // these must be filled in by subclasses!
std::vector<TRulePtr> unaries_;
std::string grammar_name_;
+ unsigned int ctf_levels_;
};
typedef boost::shared_ptr<Grammar> GrammarPtr;
@@ -61,7 +65,7 @@ struct TextGrammar : public Grammar {
void SetMaxSpan(int m) { max_span_ = m; }
virtual const GrammarIter* GetRoot() const;
- void AddRule(const TRulePtr& rule);
+ void AddRule(const TRulePtr& rule, const unsigned int ctf_level=0, const TRulePtr& coarse_parent=TRulePtr());
void ReadFromFile(const std::string& filename);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
@@ -75,15 +79,16 @@ struct TextGrammar : public Grammar {
struct GlueGrammar : public TextGrammar {
// read glue grammar from file
explicit GlueGrammar(const std::string& file);
- GlueGrammar(const std::string& goal_nt, const std::string& default_nt); // "S", "X"
+ GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X"
virtual bool HasRuleForSpan(int i, int j, int distance) const;
};
struct PassThroughGrammar : public TextGrammar {
- PassThroughGrammar(const Lattice& input, const std::string& cat);
+ PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
private:
std::vector<std::set<int> > has_rule_; // index by [i][j]
};
+void RefineRule(TRulePtr pt, const unsigned int ctf_level);
#endif