summaryrefslogtreecommitdiff
path: root/gi/scfg/abc/old_agrammar.cc
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
commit6f6601111710aa67eee5169e5b7d89102cc33bb8 (patch)
tree0872544abd6bc76162f3f80eb3920999afbf2c34 /gi/scfg/abc/old_agrammar.cc
parent8cee8b565a9c56a7732365e9563f52ff3c4ff7fd (diff)
parent090a64e73f94a6a35e5364a9d416dcf75c0a2938 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'gi/scfg/abc/old_agrammar.cc')
-rw-r--r--gi/scfg/abc/old_agrammar.cc383
1 files changed, 0 insertions, 383 deletions
diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc
deleted file mode 100644
index 33d70dfc..00000000
--- a/gi/scfg/abc/old_agrammar.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-#include "agrammar.h"
-#include "Util.h"
-
-#include <algorithm>
-#include <utility>
-#include <map>
-
-#include "rule_lexer.h"
-#include "filelib.h"
-#include "tdict.h"
-#include <iostream>
-#include <fstream>
-
-map<WordID, vector<WordID> > grSplitNonterminals;
-//const vector<TRulePtr> Grammar::NO_RULES;
-
-
-// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){
-// vector<TRulePtr> vRules; //outputs
-
-// vector<WordID> f = rule->f();
-// vector<vector<WordID> > newfvector;
-// for (int i =0; i< f.size(); i++){
-// if (f[i] == wordID){
-// newfvector.push_back(v);
-// }
-// else
-// newfvector.push_back(vector<WordID> (1, f[i]));
-// }
-
-// //now creates new rules;
-
-
-// return vRules;
-// }
-
-
-struct aTextRuleBin : public RuleBin {
- int GetNumRules() const {
- return rules_.size();
- }
- TRulePtr GetIthRule(int i) const {
- return rules_[i];
- }
- void AddRule(TRulePtr t) {
- rules_.push_back(t);
- }
- int Arity() const {
- return rules_.front()->Arity();
- }
- void Dump() const {
- for (int i = 0; i < rules_.size(); ++i)
- cerr << rules_[i]->AsString() << endl;
- }
-
-
- vector<TRulePtr> getRules(){ return rules_;}
-
-
- void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){
- //this substituteF method is different with substituteF procedure found in cdec code;
- //
- //aTextRuleBin has a collection of rules with the same f() on the rhs,
- //substituteF() replaces the f_ of all the rules with f_path vector,
- //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals
-
- vector <TRulePtr> newrules;
- for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){
- assert(f_path.size() == (*it)->f_.size());
-
- if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){
- (*it)->f_ = f_path;
- }
- else{ // split the lhs NT,
- vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ];
- for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){
- TRulePtr newrule;
- newrule -> e_ = (*it)->e_;
- newrule -> f_ = (*it)->f_;
- newrule->lhs_ = *vit;
- newrule -> scores_ = (*it)->scores_;
- newrule -> arity_ = (*it)->arity_;
- newrules.push_back (newrule);
- }
- rules_.erase(it);
- }
- }
-
- //now add back newrules(output of splitting lhs_) to rules_
- rules_.insert(newrules.begin(),newrules.begin(), newrules.end());
- }
-
-private:
- vector<TRulePtr> rules_;
-};
-
-
-
-struct aTextGrammarNode : public GrammarIter {
- aTextGrammarNode() : rb_(NULL) {}
-
- aTextGrammarNode(const aTextGrammarNode & a){
- nonterminals_ = a.nonterminals_;
- tree_ = a.tree_;
- rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over
- }
-
- ~aTextGrammarNode() {
- delete rb_;
- }
- const GrammarIter* Extend(int symbol) const {
- map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol);
- if (i == tree_.end()) return NULL;
- return &i->second;
- }
-
- const RuleBin* GetRules() const {
- if (rb_) {
- //rb_->Dump();
- }
- return rb_;
- }
-
- void DFS();
-
- void visit (); //todo: make this as a function pointer
-
- vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node;
- set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0
- map<WordID, aTextGrammarNode> tree_;
- aTextRuleBin* rb_;
-
- void print_path(){ //for debug only
- cout<<"path="<<endl;
- for (int i =0; i< path_.size(); i++)
- cout<<path_[i]<<" ";
- cout<<endl;
- }
-};
-
-void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes
-
- visit();
-
- for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){
- (it->second).DFS();
- }
-}
-
-
-void aTextGrammarNode::visit( ){
-
- cout<<"start visit()"<<endl;
-
- cout<<"got grSplitNonterminals"<<endl;
-// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal
-// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits
-
- //iterate through next terminals/nonterminals in tree_
- vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_
-
- for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){
- cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl;
-
- map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 );
-
- if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split
- cout<<"not split\n";
- tree_[it->first ].path_ = path_;
- tree_[it->first ].path_.push_back(it->first);
- cout<<"in visit() tree_[it->first ].path_= ";
- tree_[it->first ].print_path();
- continue;
- }
-
-
- cout<<"tmp2";
- vector<WordID> vsplits = grSplitNonterminals[it->first * -1];
- // vector<WordID> vsplits = git->second;
- cout<<"tmp3";
- // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1];
- cout <<"got vsplits"<<endl;
- for (int i =0 ; i<vsplits.size(); i++){
- // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode
- tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal
- tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees
- tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1);
- tree_[vsplits[i] * -1].print_path();
- }
-
- //remove the old node:
- tobe_removedNTs.push_back(it->first);
-
- }
-
- for (int i =0; i<tobe_removedNTs.size(); i++)
- tree_.erase(tobe_removedNTs[i]);
-
- if (tree_.size() ==0){ //the last (terminal/nonterminal
- cout<<"inside visit(): the last terminal/nonterminal"<<endl;
- rb_->substituteF(path_, grSplitNonterminals);
-
- }
- cout<<"visit() end"<<endl;
-}
-
-struct aTGImpl {
- aTextGrammarNode root_;
-};
-
-aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {}
-aTextGrammar::aTextGrammar(const std::string& file) :
- max_span_(10),
- pimpl_(new aTGImpl) {
- ReadFromFile(file);
-}
-
-
-const GrammarIter* aTextGrammar::GetRoot() const {
- return &pimpl_->root_;
-}
-
-
-void aTextGrammar::addNonterminal(WordID wordID){
- //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar
- //if the input parameter wordID<0 then do nothing
-
- if (wordID <0){ //it is a nonterminal
-
- map<WordID, int>::iterator it = nonterminals_.find(wordID * -1);
- if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals)
- nonterminals_[wordID * -1] = 1;
- }
-}
-
-
-
-void aTextGrammar::AddRule(const TRulePtr& rule) {
- //add the LHS nonterminal to nonterminals_ map
-
- this->addNonterminal(rule->lhs_);
-
- if (rule->IsUnary()) {
- rhs2unaries_[rule->f().front()].push_back(rule);
- unaries_.push_back(rule);
- if (rule->f().front() <0)
- //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
- this->addNonterminal(rule->f().front());
-
-
- } else {
- aTextGrammarNode* cur = &pimpl_->root_;
- for (int i = 0; i < rule->f_.size(); ++i){
- if (rule->f_[i] <0){
- cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set
- this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals
- }
- cur = &cur->tree_[rule->f_[i]];
-
- }
- if (cur->rb_ == NULL)
- cur->rb_ = new aTextRuleBin;
- cur->rb_->AddRule(rule);
-
- }
-}
-
-static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) {
- static_cast<aTextGrammar*>(extra)->AddRule(new_rule);
-}
-
-
-void aTextGrammar::ReadFromFile(const string& filename) {
- ReadFile in(filename);
- RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this);
-}
-
-bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const {
- return (max_span_ >= distance);
-}
-
-
-////Linh added
-
-void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;}
-
-
-void aTextGrammar::printAllNonterminals() const{
- for (map<WordID, int>::const_iterator it =nonterminals_.begin();
- it != nonterminals_.end(); it++){
- if (it->second >0){
- cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl;
- }
- }
-
-}
-
-
-void aTextGrammar::splitNonterminal(WordID wordID){
-
- //first added the splits nonterminal into the TD dictionary
-
- string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split
-
- vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits
- for (int i =0; i< this->max_split_; i++){
- string split_str = old_str + "+" + itos(i);
- WordID splitID = TD::Convert(split_str);
- v_splits.push_back(splitID);
- nonterminals_[splitID] = 1;
- }
-
- grSplitNonterminals[wordID] = v_splits;
- //set wordID to be an inactive nonterminal
- nonterminals_[wordID] = 0;
-
- //print split nonterminas of wordID
- v_splits = grSplitNonterminals[wordID];
- cout<<"print split nonterminals\n";
- for (int i =0; i<v_splits.size(); i++)
- cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl;
-
-
- //now update in grammar rules and gramar tree:
- vector<TRulePtr> newrules;
- //first unary rules:
- //iterate through unary rules
- for (int i =0; i < unaries_.size(); i++){
- TRulePtr rule = unaries_[i];
- WordID lhs = rule.lhs_;
- if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal
- && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too
- ){
- vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals'
- vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals'
- for (int k =0; k <rhs_nonterminals.size(); k++)
- for (int j =0; j <lhs_nonterminals.size(); j++){
- TRulePtr newrule;
- newrule -> e_ = rule->e_;
- newrule -> f_ = rhs_nonterminals[k]->f_;
- newrule->lhs_ = lhs_nonterminals[j]->lhs_;
- newrule -> scores_ = rule->scores_;
- newrule -> arity_ = (*it)->arity_;
- newrules.push_back (newrule);
-
- //update
- }
- }
- else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal
-
-
- }
- }
-
- // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){
-
- // }
- // if (rule->IsUnary()) {
- // rhs2unaries_[rule->f().front()].push_back(rule);
- // unaries_.push_back(rule);
- // if (rule->f().front() <0)
- // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
- // this->addNonterminal(rule->f().front());
-
-
- pimpl_->root_.DFS();
-
-}
-
-
-// void aTextGrammar::splitNonterminal0(WordID wordID){
-
-// TextGrammarNode* cur = &pimpl_->root_;
-// for (int i = 0; i < rule->f_.size(); ++i)
-// cur = &cur->tree_[rule->f_[i]];
-
-// }
-
-void aTextGrammar::splitAllNonterminals(){
-
-
-}
-