From f305e7b0e23b952fb4b7299b2607176ab7409ef9 Mon Sep 17 00:00:00 2001 From: "linh.kitty" Date: Tue, 13 Jul 2010 20:20:55 +0000 Subject: linh added git-svn-id: https://ws10smt.googlecode.com/svn/trunk@241 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/scfg/abc/agrammar.cc | 378 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 gi/scfg/abc/agrammar.cc (limited to 'gi/scfg/abc/agrammar.cc') diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc new file mode 100644 index 00000000..585255e3 --- /dev/null +++ b/gi/scfg/abc/agrammar.cc @@ -0,0 +1,378 @@ +#include +#include +#include + +#include "rule_lexer.h" +#include "filelib.h" +#include "tdict.h" +#include "agrammar.h" +#include "../utils/Util.h" + +bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ + if (rule1->lhs_ != rule2->lhs_) return false; + if (rule1->f_.size() != rule2->f_.size()) return false; + if (rule1->e_.size() != rule2->e_.size()) return false; + + for (int i=0; if_.size(); i++) + if (rule1->f_.at(i) != rule2->f_.at(i)) return false; + for (int i=0; ie_.size(); i++) + if (rule1->e_.at(i) != rule2->e_.at(i)) return false; + return true; +} + +//const vector Grammar::NO_RULES; + +void aRemoveRule(vector & v, const TRulePtr & rule){ // remove rule from v if found + for (int i=0; i< v.size(); i++) + if (equal(v[i], rule )){ + cout<<"erase rule from vector:"<AsString()<Arity(); + } + void Dump() const { + for (int i = 0; i < rules_.size(); ++i) + cerr << rules_[i]->AsString() << endl; + } + private: + vector rules_; +}; + +struct aTextGrammarNode : public GrammarIter { + aTextGrammarNode() : rb_(NULL) {} + ~aTextGrammarNode() { + delete rb_; + } + const GrammarIter* Extend(int symbol) const { + map::const_iterator i = tree_.find(symbol); + if (i == tree_.end()) return NULL; + return &i->second; + } + + const RuleBin* GetRules() const { + if (rb_) { + //rb_->Dump(); + } + return rb_; + } + + map tree_; + aTextRuleBin* rb_; +}; + +struct aTGImpl { + aTextGrammarNode root_; +}; + +aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} +aTextGrammar::aTextGrammar(const string& file) : + max_span_(10), + pimpl_(new aTGImpl) { + ReadFromFile(file); +} + +const GrammarIter* aTextGrammar::GetRoot() const { + return &pimpl_->root_; +} + +void aTextGrammar::SetGoalNT(const string & goal_str){ + goalID = TD::Convert(goal_str); + +} +void getNTRule( const TRulePtr & rule, map & ntrule_map){ + + NTRule lhs_ntrule(rule, rule->lhs_ * -1); + ntrule_map[rule->lhs_ * -1] = lhs_ntrule; + + for (int i=0; i< (rule->f_).size(); i++) + if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ + NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); + ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; + } + + +} +void aTextGrammar::AddRule(const TRulePtr& rule) { + if (rule->IsUnary()) { + rhs2unaries_[rule->f().front()].push_back(rule); + unaries_.push_back(rule); + } else { + aTextGrammarNode* cur = &pimpl_->root_; + for (int i = 0; i < rule->f_.size(); ++i) + cur = &cur->tree_[rule->f_[i]]; + if (cur->rb_ == NULL) + cur->rb_ = new aTextRuleBin; + cur->rb_->AddRule(rule); + } + + //add the rule to lhs_rules_ + lhs_rules_[rule->lhs_* -1].push_back(rule); + + //add the rule to nt_rules_ + map ntrule_map; + getNTRule (rule, ntrule_map); + for (map::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ + nt_rules_[it->first].push_back(it->second); + } +} + +void aTextGrammar::RemoveRule(const TRulePtr & rule){ + cout<<"Remove rule: "<AsString()<IsUnary()) { + aRemoveRule(rhs2unaries_[rule->f().front()], rule); + aRemoveRule(unaries_, rule); + } else { + aTextGrammarNode* cur = &pimpl_->root_; + for (int i = 0; i < rule->f_.size(); ++i) + cur = &cur->tree_[rule->f_[i]]; +// if (cur->rb_ == NULL) +// cur->rb_ = new aTextRuleBin; + cur->rb_->RemoveRule(rule); + } + + //remove rules from lhs_rules_ + + aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); + +} + +void aTextGrammar::RemoveNonterminal(WordID wordID){ + vector rules = nt_rules_[wordID]; +// // remove the nonterminal from ntrules_ + nt_rules_.erase(wordID); + for (int i =0; i & nts){ + + vector rules = nt_rules_[nt_old]; + + // cout<<"\n\n\n start add splitting rules"< ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old + //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' + //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, + //hence the rules' new probabilities have to divide to cnt also + // cout<<"splitting NT in rule "<AsString()< e_ = (old_rule.rule_)->e_; + newrule -> f_ = old_rule.rule_->f_; + newrule->lhs_ = old_rule.rule_->lhs_; + newrule -> arity_ = old_rule.rule_->arity_; + newrule -> scores_ = old_rule.rule_->scores_; + + // cout<<"end up update score\n"; + if (ntPos[0] == -1){ //update the lhs + newrule->lhs_ = nts[j_vector[0]] * -1; + + //score has to randomly add/minus a small epsilon to break the balance + if (nts.size() >1 && ntPos.size() >1){ + // cout<<"start to add/minus epsilon"<lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon + newrule-> scores_ -= epsilon; + else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) + newrule-> scores_ += epsilon; + else{ + double random = rand()/RAND_MAX; + if (random > .5){ + newrule-> scores_ += epsilon; + cnt_addepsilon[newrule->lhs_]++; + } + else{ + newrule-> scores_ -= epsilon; + cnt_minusepsilon[newrule->lhs_]++; + } + } + } + + + for (int k=1; klhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon + newrule-> scores_ -= epsilon; + else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) + newrule-> scores_ += epsilon; + else{ + double random = rand()/RAND_MAX; + if (random > .5){ + newrule-> scores_ += epsilon; + cnt_addepsilon[newrule->lhs_]++; + } + else{ + newrule-> scores_ -= epsilon; + cnt_minusepsilon[newrule->lhs_]++; + } + } + } + + + for (int k=0; k X1; X->X2,... if X is the goal NT + for (int i =0; ilhs_ = goalID * -1; + rule ->f_.push_back(v_splits[i] * -1); + rule->e_.push_back(0); + + rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); + AddRule(rule); + } + + } + + +} + + + +void aTextGrammar::PrintAllRules() const{ + map >::const_iterator it; + for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ + + vector v = it-> second; + for (int i =0; i< v.size(); i++){ + cout<AsString()<<"\t"< v; + map >::const_iterator mit= nt_rules_.find(nt); + if (mit == nt_rules_.end()) + return; + + v = mit->second; + + for (vector::const_iterator it = v.begin(); it != v.end(); it++) + cout<rule_->AsString()<(extra)->AddRule(new_rule); +} + +void aTextGrammar::ReadFromFile(const string& filename) { + ReadFile in(filename); + RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); +} + +bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { + return (max_span_ >= distance); +} + -- cgit v1.2.3