From 9966c9c3bed1707fa5dbe48e8cb73c90db60cef2 Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 5 Jul 2010 16:01:51 +0000 Subject: TODO (mostly low priority) notes git-svn-id: https://ws10smt.googlecode.com/svn/trunk@128 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff.cc | 2 ++ decoder/ff_lm.cc | 2 ++ decoder/hg.cc | 2 ++ decoder/scfg_translator.cc | 20 ++++++++++++-------- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/decoder/ff.cc b/decoder/ff.cc index 61f4f0b6..95bd9665 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -1,3 +1,5 @@ +//TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context. identify truly rule-local features so if we want they can be added to grammar rules (minor speedup) + #include "ff.h" #include "tdict.h" diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 66b00448..21c05cf2 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -1,3 +1,5 @@ +//TODO: allow features to reorder by heuristic*weight the rules' terminal phrases (or of hyperedges'). if first pass has pruning, then compute over whole ruleset as part of heuristic + #include "ff_lm.h" #include diff --git a/decoder/hg.cc b/decoder/hg.cc index 4da0beb3..e57f7807 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -1,3 +1,5 @@ +//TODO: lazily generate feature vectors for hyperarcs (because some of them will be pruned). this means 1) storing ref to rule for those features 2) providing ff interface for regenerating its feature vector from hyperedge+states and probably 3) still caching feat. vect on hyperedge once it's been generated. ff would normally just contribute its weighted score and result state, not component features. + #include "hg.h" #include diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index c215eea6..866c2721 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -1,3 +1,7 @@ +//TODO: bottom-up pruning, with actual final models' (appropriately weighted) heuristics and local scores. + +//TODO: grammar heuristic (min cost of reachable rule set) for binarizations (active edges) if we wish to prune those also + #include "translator.h" #include @@ -26,11 +30,11 @@ struct SCFGTranslatorImpl { g->SetMaxSpan(max_span_limit); g->SetGrammarName(gfiles[i]); grammars.push_back(GrammarPtr(g)); - + } } if (!conf.count("scfg_no_hiero_glue_grammar")) - { + { GlueGrammar* g = new GlueGrammar(goal, default_nt); g->SetGrammarName("GlueGrammar"); grammars.push_back(GrammarPtr(g)); @@ -39,7 +43,7 @@ struct SCFGTranslatorImpl { if (conf.count("scfg_extra_glue_grammar")) { GlueGrammar* g = new GlueGrammar(conf["scfg_extra_glue_grammar"].as()); - g->SetGrammarName("ExtraGlueGrammar"); + g->SetGrammarName("ExtraGlueGrammar"); grammars.push_back(GrammarPtr(g)); cerr << "Adding extra glue grammar" << endl; } @@ -69,7 +73,7 @@ struct SCFGTranslatorImpl { if(printGrammarsUsed){ //Iterate trough grammars we have for this sentence and list them - for (int gi = 0; gi < glist.size(); ++gi) + for (int gi = 0; gi < glist.size(); ++gi) { cerr << "Using grammar::" << glist[gi]->GetGrammarName() << endl; } @@ -96,7 +100,7 @@ bool SCFGTranslator::TranslateImpl(const string& input, SentenceMetadata* smeta, const vector& weights, Hypergraph* minus_lm_forest) { - + return pimpl_->Translate(input, smeta, weights, minus_lm_forest); } @@ -105,10 +109,10 @@ Check for grammar pointer in the sentence markup, for use with sentence specific */ void SCFGTranslator::ProcessMarkupHintsImpl(const map& kv) { map::const_iterator it = kv.find("grammar"); - - + + if (it == kv.end()) { - usingSentenceGrammar= false; + usingSentenceGrammar= false; return; } //Create sentence specific grammar from specified file name and load grammar into list of grammars -- cgit v1.2.3