summaryrefslogtreecommitdiff
path: root/decoder/scfg_translator.cc
blob: 866c272145f7d3c2045d0f5c89e92c43eccfe6e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//TODO: bottom-up pruning, with actual final models' (appropriately weighted) heuristics and local scores.

//TODO: grammar heuristic (min cost of reachable rule set) for binarizations (active edges) if we wish to prune those also

#include "translator.h"

#include <vector>

#include "hg.h"
#include "grammar.h"
#include "bottom_up_parser.h"
#include "sentence_metadata.h"

using namespace std;
static bool usingSentenceGrammar = false;
static bool printGrammarsUsed = false;

struct SCFGTranslatorImpl {
  SCFGTranslatorImpl(const boost::program_options::variables_map& conf) :
      max_span_limit(conf["scfg_max_span_limit"].as<int>()),
      add_pass_through_rules(conf.count("add_pass_through_rules")),
      goal(conf["goal"].as<string>()),
      default_nt(conf["scfg_default_nt"].as<string>()) {
    if(conf.count("grammar"))
      {
	vector<string> gfiles = conf["grammar"].as<vector<string> >();
	for (int i = 0; i < gfiles.size(); ++i) {
	  cerr << "Reading SCFG grammar from " << gfiles[i] << endl;
	  TextGrammar* g = new TextGrammar(gfiles[i]);
	  g->SetMaxSpan(max_span_limit);
	  g->SetGrammarName(gfiles[i]);
	  grammars.push_back(GrammarPtr(g));

	}
      }
    if (!conf.count("scfg_no_hiero_glue_grammar"))
      {
	GlueGrammar* g = new GlueGrammar(goal, default_nt);
	g->SetGrammarName("GlueGrammar");
	grammars.push_back(GrammarPtr(g));
	cerr << "Adding glue grammar" << endl;
      }
    if (conf.count("scfg_extra_glue_grammar"))
      {
	GlueGrammar* g = new GlueGrammar(conf["scfg_extra_glue_grammar"].as<string>());
	g->SetGrammarName("ExtraGlueGrammar");
	grammars.push_back(GrammarPtr(g));
	cerr << "Adding extra glue grammar" << endl;
      }
  }

  const int max_span_limit;
  const bool add_pass_through_rules;
  const string goal;
  const string default_nt;
  vector<GrammarPtr> grammars;

  bool Translate(const string& input,
                 SentenceMetadata* smeta,
                 const vector<double>& weights,
                 Hypergraph* forest) {
    vector<GrammarPtr> glist = grammars;
    Lattice& lattice = smeta->src_lattice_;
    LatticeTools::ConvertTextOrPLF(input, &lattice);
    smeta->SetSourceLength(lattice.size());
    if (add_pass_through_rules){
      PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt);
      g->SetGrammarName("PassThrough");
      glist.push_back(GrammarPtr(g));
      cerr << "Adding pass through grammar" << endl;
    }



    if(printGrammarsUsed){    //Iterate trough grammars we have for this sentence and list them
      for (int gi = 0; gi < glist.size(); ++gi)
	{
	  cerr << "Using grammar::" << 	 glist[gi]->GetGrammarName() << endl;
	}
    }

    ExhaustiveBottomUpParser parser(goal, glist);
    if (!parser.Parse(lattice, forest))
      return false;
    forest->Reweight(weights);
    return true;
  }
};

/*
Called once from cdec.cc to setup the initial SCFG translation structure backend
*/
SCFGTranslator::SCFGTranslator(const boost::program_options::variables_map& conf) :
  pimpl_(new SCFGTranslatorImpl(conf)) {}

/*
Called for each sentence to perform translation using the SCFG backend
*/
bool SCFGTranslator::TranslateImpl(const string& input,
                               SentenceMetadata* smeta,
                               const vector<double>& weights,
                               Hypergraph* minus_lm_forest) {

  return pimpl_->Translate(input, smeta, weights, minus_lm_forest);
}

/*
Check for grammar pointer in the sentence markup, for use with sentence specific grammars
 */
void SCFGTranslator::ProcessMarkupHintsImpl(const map<string, string>& kv) {
  map<string,string>::const_iterator it = kv.find("grammar");


  if (it == kv.end()) {
    usingSentenceGrammar= false;
    return;
  }
  //Create sentence specific grammar from specified file name and load grammar into list of grammars
  cerr << "Loading sentence grammar from:" << it->second <<  endl;
  usingSentenceGrammar = true;
  TextGrammar* sentGrammar = new TextGrammar(it->second);
  sentGrammar->SetMaxSpan(pimpl_->max_span_limit);
  sentGrammar->SetGrammarName(it->second);
  pimpl_->grammars.push_back(GrammarPtr(sentGrammar));

}

void SCFGTranslator::SentenceCompleteImpl() {

  if(usingSentenceGrammar)      // Drop the last sentence grammar from the list of grammars
    {
      cerr << "Clearing grammar" << endl;
      pimpl_->grammars.pop_back();
    }
}