From 851e389dffdd6996ea32d70defb8906de80b9edc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 14 Dec 2009 20:35:11 -0500 Subject: few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec --- src/grammar.cc | 164 --------------------------------------------------------- 1 file changed, 164 deletions(-) delete mode 100644 src/grammar.cc (limited to 'src/grammar.cc') diff --git a/src/grammar.cc b/src/grammar.cc deleted file mode 100644 index e19bd344..00000000 --- a/src/grammar.cc +++ /dev/null @@ -1,164 +0,0 @@ -#include "grammar.h" - -#include -#include -#include - -#include "filelib.h" -#include "tdict.h" - -using namespace std; - -const vector Grammar::NO_RULES; - -RuleBin::~RuleBin() {} -GrammarIter::~GrammarIter() {} -Grammar::~Grammar() {} - -bool Grammar::HasRuleForSpan(int i, int j, int distance) const { - (void) i; - (void) j; - (void) distance; - return true; // always true by default -} - -struct TextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - int Arity() const { - return rules_.front()->Arity(); - } - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - private: - vector rules_; -}; - -struct TextGrammarNode : public GrammarIter { - TextGrammarNode() : rb_(NULL) {} - ~TextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - map tree_; - TextRuleBin* rb_; -}; - -struct TGImpl { - TextGrammarNode root_; -}; - -TextGrammar::TextGrammar() : max_span_(10), pimpl_(new TGImpl) {} -TextGrammar::TextGrammar(const string& file) : - max_span_(10), - pimpl_(new TGImpl) { - ReadFromFile(file); -} - -const GrammarIter* TextGrammar::GetRoot() const { - return &pimpl_->root_; -} - -void TextGrammar::AddRule(const TRulePtr& rule) { - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - } else { - TextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; - if (cur->rb_ == NULL) - cur->rb_ = new TextRuleBin; - cur->rb_->AddRule(rule); - } -} - -void TextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - istream& in_file = *in.stream(); - assert(in_file); - long long int rule_count = 0; - bool fl = false; - while(in_file) { - string line; - getline(in_file, line); - if (line.empty()) continue; - ++rule_count; - if (rule_count % 50000 == 0) { cerr << '.' << flush; fl = true; } - if (rule_count % 2000000 == 0) { cerr << " [" << rule_count << "]\n"; fl = false; } - TRulePtr rule(TRule::CreateRuleSynchronous(line)); - if (rule) { - AddRule(rule); - } else { - if (fl) { cerr << endl; } - cerr << "Skipping badly formatted rule in line " << rule_count << " of " << filename << endl; - fl = false; - } - } - if (fl) cerr << endl; - cerr << " " << rule_count << " rules read.\n"; -} - -bool TextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - -GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} - -GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) { - TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [" + default_nt + ",1]")); - TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] [" - + default_nt + ",2] ||| [" + goal_nt + ",1] [" + default_nt + ",2] ||| Glue=1")); - - AddRule(stop_glue); - AddRule(glue); - //cerr << "GLUE: " << stop_glue->AsString() << endl; - //cerr << "GLUE: " << glue->AsString() << endl; -} - -bool GlueGrammar::HasRuleForSpan(int i, int j, int distance) const { - (void) j; - return (i == 0); -} - -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat) : - has_rule_(input.size() + 1) { - for (int i = 0; i < input.size(); ++i) { - const vector& alts = input[i]; - for (int k = 0; k < alts.size(); ++k) { - const int j = alts[k].dist2next + i; - has_rule_[i].insert(j); - const string& src = TD::Convert(alts[k].label); - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); - AddRule(pt); -// cerr << "PT: " << pt->AsString() << endl; - } - } -} - -bool PassThroughGrammar::HasRuleForSpan(int i, int j, int distance) const { - const set& hr = has_rule_[i]; - if (i == j) { return !hr.empty(); } - return (hr.find(j) != hr.end()); -} -- cgit v1.2.3