#include #include #include #include "rule_lexer.h" #include "filelib.h" #include "tdict.h" #include "agrammar.h" #include "../utils/Util.h" bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ if (rule1->lhs_ != rule2->lhs_) return false; if (rule1->f_.size() != rule2->f_.size()) return false; if (rule1->e_.size() != rule2->e_.size()) return false; for (int i=0; if_.size(); i++) if (rule1->f_.at(i) != rule2->f_.at(i)) return false; for (int i=0; ie_.size(); i++) if (rule1->e_.at(i) != rule2->e_.at(i)) return false; return true; } //const vector Grammar::NO_RULES; void aRemoveRule(vector & v, const TRulePtr & rule){ // remove rule from v if found for (int i=0; i< v.size(); i++) if (equal(v[i], rule )){ cout<<"erase rule from vector:"<AsString()<Arity(); } void Dump() const { for (int i = 0; i < rules_.size(); ++i) cerr << rules_[i]->AsString() << endl; } private: vector rules_; }; struct aTextGrammarNode : public GrammarIter { aTextGrammarNode() : rb_(NULL) {} ~aTextGrammarNode() { delete rb_; } const GrammarIter* Extend(int symbol) const { map::const_iterator i = tree_.find(symbol); if (i == tree_.end()) return NULL; return &i->second; } const RuleBin* GetRules() const { if (rb_) { //rb_->Dump(); } return rb_; } map tree_; aTextRuleBin* rb_; }; struct aTGImpl { aTextGrammarNode root_; }; aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} aTextGrammar::aTextGrammar(const string& file) : max_span_(10), pimpl_(new aTGImpl) { ReadFromFile(file); } const GrammarIter* aTextGrammar::GetRoot() const { return &pimpl_->root_; } void aTextGrammar::SetGoalNT(const string & goal_str){ goalID = TD::Convert(goal_str); } void getNTRule( const TRulePtr & rule, map & ntrule_map){ NTRule lhs_ntrule(rule, rule->lhs_ * -1); ntrule_map[rule->lhs_ * -1] = lhs_ntrule; for (int i=0; i< (rule->f_).size(); i++) if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; } } void aTextGrammar::AddRule(const TRulePtr& rule) { if (rule->IsUnary()) { rhs2unaries_[rule->f().front()].push_back(rule); unaries_.push_back(rule); } else { aTextGrammarNode* cur = &pimpl_->root_; for (int i = 0; i < rule->f_.size(); ++i) cur = &cur->tree_[rule->f_[i]]; if (cur->rb_ == NULL) cur->rb_ = new aTextRuleBin; cur->rb_->AddRule(rule); } //add the rule to lhs_rules_ lhs_rules_[rule->lhs_* -1].push_back(rule); //add the rule to nt_rules_ map ntrule_map; getNTRule (rule, ntrule_map); for (map::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ nt_rules_[it->first].push_back(it->second); } } void aTextGrammar::RemoveRule(const TRulePtr & rule){ cout<<"Remove rule: "<AsString()<IsUnary()) { aRemoveRule(rhs2unaries_[rule->f().front()], rule); aRemoveRule(unaries_, rule); } else { aTextGrammarNode* cur = &pimpl_->root_; for (int i = 0; i < rule->f_.size(); ++i) cur = &cur->tree_[rule->f_[i]]; // if (cur->rb_ == NULL) // cur->rb_ = new aTextRuleBin; cur->rb_->RemoveRule(rule); } //remove rules from lhs_rules_ aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); } void aTextGrammar::RemoveNonterminal(WordID wordID){ vector rules = nt_rules_[wordID]; // // remove the nonterminal from ntrules_ nt_rules_.erase(wordID); for (int i =0; i & nts){ vector rules = nt_rules_[nt_old]; // cout<<"\n\n\n start add splitting rules"< ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, //hence the rules' new probabilities have to divide to cnt also // cout<<"splitting NT in rule "<AsString()< e_ = (old_rule.rule_)->e_; newrule -> f_ = old_rule.rule_->f_; newrule->lhs_ = old_rule.rule_->lhs_; newrule -> arity_ = old_rule.rule_->arity_; newrule -> scores_ = old_rule.rule_->scores_; // cout<<"end up update score\n"; if (ntPos[0] == -1){ //update the lhs newrule->lhs_ = nts[j_vector[0]] * -1; //score has to randomly add/minus a small epsilon to break the balance if (nts.size() >1 && ntPos.size() >1){ // cout<<"start to add/minus epsilon"<lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon newrule-> scores_ -= epsilon; else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) newrule-> scores_ += epsilon; else{ double random = rand()/RAND_MAX; if (random > .5){ newrule-> scores_ += epsilon; cnt_addepsilon[newrule->lhs_]++; } else{ newrule-> scores_ -= epsilon; cnt_minusepsilon[newrule->lhs_]++; } } } for (int k=1; klhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon newrule-> scores_ -= epsilon; else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) newrule-> scores_ += epsilon; else{ double random = rand()/RAND_MAX; if (random > .5){ newrule-> scores_ += epsilon; cnt_addepsilon[newrule->lhs_]++; } else{ newrule-> scores_ -= epsilon; cnt_minusepsilon[newrule->lhs_]++; } } } for (int k=0; k X1; X->X2,... if X is the goal NT for (int i =0; ilhs_ = goalID * -1; rule ->f_.push_back(v_splits[i] * -1); rule->e_.push_back(0); rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); AddRule(rule); } } } void aTextGrammar::PrintAllRules() const{ map >::const_iterator it; for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ vector v = it-> second; for (int i =0; i< v.size(); i++){ cout<AsString()<<"\t"< v; map >::const_iterator mit= nt_rules_.find(nt); if (mit == nt_rules_.end()) return; v = mit->second; for (vector::const_iterator it = v.begin(); it != v.end(); it++) cout<rule_->AsString()<(extra)->AddRule(new_rule); } void aTextGrammar::ReadFromFile(const string& filename) { ReadFile in(filename); RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); } bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { return (max_span_ >= distance); }