summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-07 16:58:55 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-07 16:58:55 +0200
commit715245dc7042ac0dca4fea94031d7c6de8058033 (patch)
tree3a7ff0b88f2e113a08aef663d2487edec0b5f67f /decoder
parent89211ab30937672d84a54fac8fa435805499e38d (diff)
parent6001b81eba37985d2e7dea6e6ebb488b787789a6 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r--decoder/aligner.cc4
-rw-r--r--decoder/decoder.cc31
-rw-r--r--decoder/earley_composer.cc4
-rw-r--r--decoder/ff_wordalign.cc1
-rw-r--r--decoder/grammar.cc24
-rw-r--r--decoder/grammar.h2
-rw-r--r--decoder/hg_io.cc20
-rw-r--r--decoder/hg_io.h1
-rw-r--r--decoder/phrasetable_fst.cc3
9 files changed, 53 insertions, 37 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 53e059fb..232e022a 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -11,7 +11,7 @@
#include "sentence_metadata.h"
#include "inside_outside.h"
#include "viterbi.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
using namespace std;
@@ -300,7 +300,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
cerr << grid << endl;
}
(*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
- AlignmentPharaoh::SerializePharaohFormat(grid, out);
+ AlignmentIO::SerializePharaohFormat(grid, out);
}
};
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 53c47d21..ec6f75f7 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -57,7 +57,6 @@ static const double kMINUS_EPSILON = -1e-6; // don't be too strict
using namespace std;
using namespace std::tr1;
-using boost::shared_ptr;
namespace po = boost::program_options;
static bool verbose_feature_functions=true;
@@ -101,7 +100,7 @@ inline string str(char const* name,po::variables_map const& conf) {
// print just the --long_opt names suitable for bash compgen
inline void print_options(std::ostream &out,po::options_description const& opts) {
- typedef std::vector< shared_ptr<po::option_description> > Ds;
+ typedef std::vector< boost::shared_ptr<po::option_description> > Ds;
Ds const& ds=opts.options();
out << '"';
for (unsigned i=0;i<ds.size();++i) {
@@ -120,13 +119,13 @@ inline bool store_conf(po::variables_map const& conf,std::string const& name,V *
return false;
}
-inline shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") {
+inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") {
string ff, param;
SplitCommandAndParam(ffp, &ff, &param);
cerr << pre << "feature: " << ff;
if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n";
else cerr << " (no config parameters)\n";
- shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param);
+ boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param);
if (!pf) exit(1);
int nbyte=pf->NumBytesContext();
if (verbose_feature_functions)
@@ -135,13 +134,13 @@ inline shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_featur
}
#ifdef FSA_RESCORING
-inline shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") {
+inline boost::shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") {
string ff, param;
SplitCommandAndParam(ffp, &ff, &param);
cerr << "FSA Feature: " << ff;
if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n";
else cerr << " (no config parameters)\n";
- shared_ptr<FsaFeatureFunction> pf = fsa_ff_registry.Create(ff, param);
+ boost::shared_ptr<FsaFeatureFunction> pf = fsa_ff_registry.Create(ff, param);
if (!pf) exit(1);
if (verbose_feature_functions)
cerr<<"State is "<<pf->state_bytes()<<" bytes for "<<pre<<"feature "<<ffp<<endl;
@@ -156,10 +155,10 @@ inline shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose
// passes are carried over into subsequent passes (where they may have different weights).
struct RescoringPass {
RescoringPass() : fid_summary(), density_prune(), beam_prune() {}
- shared_ptr<ModelSet> models;
- shared_ptr<IntersectionConfiguration> inter_conf;
+ boost::shared_ptr<ModelSet> models;
+ boost::shared_ptr<IntersectionConfiguration> inter_conf;
vector<const FeatureFunction*> ffs;
- shared_ptr<vector<weight_t> > weight_vector;
+ boost::shared_ptr<vector<weight_t> > weight_vector;
int fid_summary; // 0 == no summary feature
double density_prune; // 0 == don't density prune
double beam_prune; // 0 == don't beam prune
@@ -293,15 +292,15 @@ struct DecoderImpl {
po::variables_map& conf;
OracleBleu oracle;
string formalism;
- shared_ptr<Translator> translator;
- shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse
- vector<shared_ptr<FeatureFunction> > pffs;
+ boost::shared_ptr<Translator> translator;
+ boost::shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse
+ vector<boost::shared_ptr<FeatureFunction> > pffs;
#ifdef FSA_RESCORING
CFGOptions cfg_options;
- vector<shared_ptr<FsaFeatureFunction> > fsa_ffs;
+ vector<boost::shared_ptr<FsaFeatureFunction> > fsa_ffs;
vector<string> fsa_names;
#endif
- shared_ptr<RandomNumberGenerator<boost::mt19937> > rng;
+ boost::shared_ptr<RandomNumberGenerator<boost::mt19937> > rng;
int sample_max_trans;
bool aligner_mode;
bool graphviz;
@@ -310,7 +309,7 @@ struct DecoderImpl {
bool kbest;
bool unique_kbest;
bool get_oracle_forest;
- shared_ptr<WriteFile> extract_file;
+ boost::shared_ptr<WriteFile> extract_file;
int combine_size;
int sent_id;
SparseVector<prob_t> acc_vec; // accumulate gradient
@@ -622,7 +621,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
}
// set up weight vectors since later phases may reuse weights from earlier phases
- shared_ptr<vector<weight_t> > prev_weights = init_weights;
+ boost::shared_ptr<vector<weight_t> > prev_weights = init_weights;
for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
RescoringPass& rp = rescoring_passes[pass];
if (!rp.weight_vector) {
diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc
index b7af801a..385baf8b 100644
--- a/decoder/earley_composer.cc
+++ b/decoder/earley_composer.cc
@@ -16,8 +16,6 @@
#include "tdict.h"
#include "hg.h"
-using boost::shared_ptr;
-namespace po = boost::program_options;
using namespace std;
using namespace std::tr1;
@@ -111,7 +109,7 @@ struct Edge {
const Edge* const active_parent; // back pointer, NULL for PREDICT items
const Edge* const passive_parent; // back pointer, NULL for SCAN and PREDICT items
const TargetPhraseSet* const tps; // translations
- shared_ptr<SparseVector<double> > features; // features from CFG rule
+ boost::shared_ptr<SparseVector<double> > features; // features from CFG rule
bool IsPassive() const {
// when a rule is completed, this value will be set
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index 9e7c618e..decdf9bc 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -15,7 +15,6 @@
#include "factored_lexicon_helper.h"
#include "verbose.h"
-#include "alignment_pharaoh.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "hg.h"
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index 9e4065a6..714390f0 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -3,12 +3,14 @@
#include <algorithm>
#include <utility>
#include <map>
+#include <tr1/unordered_map>
#include "rule_lexer.h"
#include "filelib.h"
#include "tdict.h"
using namespace std;
+using namespace std::tr1;
const vector<TRulePtr> Grammar::NO_RULES;
@@ -148,24 +150,24 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const {
return (i == 0);
}
-PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) :
- has_rule_(input.size() + 1) {
+PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) {
+ unordered_set<WordID> ss;
for (int i = 0; i < input.size(); ++i) {
const vector<LatticeArc>& alts = input[i];
for (int k = 0; k < alts.size(); ++k) {
const int j = alts[k].dist2next + i;
- has_rule_[i].insert(j);
const string& src = TD::Convert(alts[k].label);
- TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
- pt->a_.push_back(AlignmentPoint(0,0));
- AddRule(pt);
- RefineRule(pt, ctf_level);
+ if (ss.count(alts[k].label) == 0) {
+ TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1"));
+ pt->a_.push_back(AlignmentPoint(0,0));
+ AddRule(pt);
+ RefineRule(pt, ctf_level);
+ ss.insert(alts[k].label);
+ }
}
}
}
-bool PassThroughGrammar::HasRuleForSpan(int i, int j, int /* distance */) const {
- const set<int>& hr = has_rule_[i];
- if (i == j) { return !hr.empty(); }
- return (hr.find(j) != hr.end());
+bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const {
+ return (distance < 2);
}
diff --git a/decoder/grammar.h b/decoder/grammar.h
index f5d00817..e6a15a69 100644
--- a/decoder/grammar.h
+++ b/decoder/grammar.h
@@ -91,8 +91,6 @@ struct GlueGrammar : public TextGrammar {
struct PassThroughGrammar : public TextGrammar {
PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0);
virtual bool HasRuleForSpan(int i, int j, int distance) const;
- private:
- std::vector<std::set<int> > has_rule_; // index by [i][j]
};
void RefineRule(TRulePtr pt, const unsigned int ctf_level);
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 9f0f50fa..d416dbf6 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -401,6 +401,26 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses
return os.str();
}
+string HypergraphIO::AsPLF(const Lattice& lat, bool include_global_parentheses) {
+ static bool first = true;
+ if (first) { InitEscapes(); first = false; }
+ if (lat.empty()) return "()";
+ ostringstream os;
+ if (include_global_parentheses) os << '(';
+ static const string EPS="*EPS*";
+ for (int i = 0; i < lat.size(); ++i) {
+ const vector<LatticeArc> arcs = lat[i];
+ os << '(';
+ for (int j = 0; j < arcs.size(); ++j) {
+ os << "('" << Escape(TD::Convert(arcs[j].label)) << "',"
+ << arcs[j].cost << ',' << arcs[j].dist2next << "),";
+ }
+ os << "),";
+ }
+ if (include_global_parentheses) os << ')';
+ return os.str();
+}
+
namespace PLF {
const string chars = "'\\";
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 44817157..4e502a0c 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -30,6 +30,7 @@ struct HypergraphIO {
static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0);
// return PLF string representation (undefined behavior on non-lattices)
static std::string AsPLF(const Hypergraph& hg, bool include_global_parentheses = true);
+ static std::string AsPLF(const Lattice& lat, bool include_global_parentheses = true);
static void PLFtoLattice(const std::string& plf, Lattice* pl);
static std::string Escape(const std::string& s); // PLF helper
};
diff --git a/decoder/phrasetable_fst.cc b/decoder/phrasetable_fst.cc
index f421e941..b3bec86b 100644
--- a/decoder/phrasetable_fst.cc
+++ b/decoder/phrasetable_fst.cc
@@ -9,7 +9,6 @@
#include "filelib.h"
#include "tdict.h"
-using boost::shared_ptr;
using namespace std;
TargetPhraseSet::~TargetPhraseSet() {}
@@ -46,7 +45,7 @@ class TextFSTNode : public FSTNode {
void ClearPassThroughTranslations();
private:
vector<WordID> passthroughs;
- shared_ptr<TargetPhraseSet> data;
+ boost::shared_ptr<TargetPhraseSet> data;
map<WordID, TextFSTNode> ptr;
};