diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
commit | 6f6601111710aa67eee5169e5b7d89102cc33bb8 (patch) | |
tree | 0872544abd6bc76162f3f80eb3920999afbf2c34 /gi/scfg/abc/old_agrammar.cc | |
parent | 8cee8b565a9c56a7732365e9563f52ff3c4ff7fd (diff) | |
parent | 090a64e73f94a6a35e5364a9d416dcf75c0a2938 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'gi/scfg/abc/old_agrammar.cc')
-rw-r--r-- | gi/scfg/abc/old_agrammar.cc | 383 |
1 files changed, 0 insertions, 383 deletions
diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc deleted file mode 100644 index 33d70dfc..00000000 --- a/gi/scfg/abc/old_agrammar.cc +++ /dev/null @@ -1,383 +0,0 @@ -#include "agrammar.h" -#include "Util.h" - -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include <iostream> -#include <fstream> - -map<WordID, vector<WordID> > grSplitNonterminals; -//const vector<TRulePtr> Grammar::NO_RULES; - - -// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){ -// vector<TRulePtr> vRules; //outputs - -// vector<WordID> f = rule->f(); -// vector<vector<WordID> > newfvector; -// for (int i =0; i< f.size(); i++){ -// if (f[i] == wordID){ -// newfvector.push_back(v); -// } -// else -// newfvector.push_back(vector<WordID> (1, f[i])); -// } - -// //now creates new rules; - - -// return vRules; -// } - - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - int Arity() const { - return rules_.front()->Arity(); - } - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - - - vector<TRulePtr> getRules(){ return rules_;} - - - void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){ - //this substituteF method is different with substituteF procedure found in cdec code; - // - //aTextRuleBin has a collection of rules with the same f() on the rhs, - //substituteF() replaces the f_ of all the rules with f_path vector, - //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals - - vector <TRulePtr> newrules; - for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){ - assert(f_path.size() == (*it)->f_.size()); - - if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ - (*it)->f_ = f_path; - } - else{ // split the lhs NT, - vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; - for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ - TRulePtr newrule; - newrule -> e_ = (*it)->e_; - newrule -> f_ = (*it)->f_; - newrule->lhs_ = *vit; - newrule -> scores_ = (*it)->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - } - rules_.erase(it); - } - } - - //now add back newrules(output of splitting lhs_) to rules_ - rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); - } - -private: - vector<TRulePtr> rules_; -}; - - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - - aTextGrammarNode(const aTextGrammarNode & a){ - nonterminals_ = a.nonterminals_; - tree_ = a.tree_; - rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over - } - - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - void DFS(); - - void visit (); //todo: make this as a function pointer - - vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node; - set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0 - map<WordID, aTextGrammarNode> tree_; - aTextRuleBin* rb_; - - void print_path(){ //for debug only - cout<<"path="<<endl; - for (int i =0; i< path_.size(); i++) - cout<<path_[i]<<" "; - cout<<endl; - } -}; - -void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes - - visit(); - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){ - (it->second).DFS(); - } -} - - -void aTextGrammarNode::visit( ){ - - cout<<"start visit()"<<endl; - - cout<<"got grSplitNonterminals"<<endl; -// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal -// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits - - //iterate through next terminals/nonterminals in tree_ - vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){ - cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl; - - map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); - - if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split - cout<<"not split\n"; - tree_[it->first ].path_ = path_; - tree_[it->first ].path_.push_back(it->first); - cout<<"in visit() tree_[it->first ].path_= "; - tree_[it->first ].print_path(); - continue; - } - - - cout<<"tmp2"; - vector<WordID> vsplits = grSplitNonterminals[it->first * -1]; - // vector<WordID> vsplits = git->second; - cout<<"tmp3"; - // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; - cout <<"got vsplits"<<endl; - for (int i =0 ; i<vsplits.size(); i++){ - // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode - tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal - tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees - tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); - tree_[vsplits[i] * -1].print_path(); - } - - //remove the old node: - tobe_removedNTs.push_back(it->first); - - } - - for (int i =0; i<tobe_removedNTs.size(); i++) - tree_.erase(tobe_removedNTs[i]); - - if (tree_.size() ==0){ //the last (terminal/nonterminal - cout<<"inside visit(): the last terminal/nonterminal"<<endl; - rb_->substituteF(path_, grSplitNonterminals); - - } - cout<<"visit() end"<<endl; -} - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const std::string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - - -void aTextGrammar::addNonterminal(WordID wordID){ - //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar - //if the input parameter wordID<0 then do nothing - - if (wordID <0){ //it is a nonterminal - - map<WordID, int>::iterator it = nonterminals_.find(wordID * -1); - if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) - nonterminals_[wordID * -1] = 1; - } -} - - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - //add the LHS nonterminal to nonterminals_ map - - this->addNonterminal(rule->lhs_); - - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - if (rule->f().front() <0) - //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - this->addNonterminal(rule->f().front()); - - - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i){ - if (rule->f_[i] <0){ - cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set - this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals - } - cur = &cur->tree_[rule->f_[i]]; - - } - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - - } -} - -static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { - static_cast<aTextGrammar*>(extra)->AddRule(new_rule); -} - - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - - -////Linh added - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - -void aTextGrammar::printAllNonterminals() const{ - for (map<WordID, int>::const_iterator it =nonterminals_.begin(); - it != nonterminals_.end(); it++){ - if (it->second >0){ - cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl; - } - } - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - - //first added the splits nonterminal into the TD dictionary - - string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split - - vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - nonterminals_[splitID] = 1; - } - - grSplitNonterminals[wordID] = v_splits; - //set wordID to be an inactive nonterminal - nonterminals_[wordID] = 0; - - //print split nonterminas of wordID - v_splits = grSplitNonterminals[wordID]; - cout<<"print split nonterminals\n"; - for (int i =0; i<v_splits.size(); i++) - cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - - //now update in grammar rules and gramar tree: - vector<TRulePtr> newrules; - //first unary rules: - //iterate through unary rules - for (int i =0; i < unaries_.size(); i++){ - TRulePtr rule = unaries_[i]; - WordID lhs = rule.lhs_; - if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal - && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too - ){ - vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' - vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals' - for (int k =0; k <rhs_nonterminals.size(); k++) - for (int j =0; j <lhs_nonterminals.size(); j++){ - TRulePtr newrule; - newrule -> e_ = rule->e_; - newrule -> f_ = rhs_nonterminals[k]->f_; - newrule->lhs_ = lhs_nonterminals[j]->lhs_; - newrule -> scores_ = rule->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - - //update - } - } - else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal - - - } - } - - // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ - - // } - // if (rule->IsUnary()) { - // rhs2unaries_[rule->f().front()].push_back(rule); - // unaries_.push_back(rule); - // if (rule->f().front() <0) - // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - // this->addNonterminal(rule->f().front()); - - - pimpl_->root_.DFS(); - -} - - -// void aTextGrammar::splitNonterminal0(WordID wordID){ - -// TextGrammarNode* cur = &pimpl_->root_; -// for (int i = 0; i < rule->f_.size(); ++i) -// cur = &cur->tree_[rule->f_[i]]; - -// } - -void aTextGrammar::splitAllNonterminals(){ - - -} - |