summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorJonathan Clark <jon.h.clark@gmail.com>2011-03-24 09:51:40 -0400
committerJonathan Clark <jon.h.clark@gmail.com>2011-03-24 09:51:40 -0400
commiteb33700d1c868662b5d0abedaaf3fa47948a89d0 (patch)
treeed70be84820d243524bab0b59a84b8da033a9c41 /decoder
parentba4f147f84aa0d4623da640a2d0de7e6242a53af (diff)
parenta580faa8177331cf51138a2208e276b703470934 (diff)
Undo some silly local changes so we can pull
Diffstat (limited to 'decoder')
-rw-r--r--decoder/Makefile.am26
-rw-r--r--decoder/decoder.cc54
-rw-r--r--decoder/ff_klm.cc2
-rw-r--r--decoder/ff_wordset.h1
-rw-r--r--decoder/hg.cc5
-rw-r--r--decoder/hg.h2
-rw-r--r--decoder/trule.cc7
7 files changed, 77 insertions, 20 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 9cf4c3c4..244da2de 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -6,16 +6,13 @@ noinst_PROGRAMS = \
hg_test \
ff_test \
parser_test \
- grammar_test \
- cfg_test
-TESTS = trule_test ff_test parser_test grammar_test hg_test cfg_test
-endif
-
-cdec_SOURCES = cdec.cc
-cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
-
-cfg_test_SOURCES = cfg_test.cc
-cfg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
+ grammar_test
+
+ # cfg_test
+TESTS = trule_test ff_test parser_test grammar_test hg_test
+# cfg_test
+#cfg_test_SOURCES = cfg_test.cc
+#cfg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
parser_test_SOURCES = parser_test.cc
parser_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
ff_test_SOURCES = ff_test.cc
@@ -26,7 +23,12 @@ hg_test_SOURCES = hg_test.cc
hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
trule_test_SOURCES = trule_test.cc
trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
+endif
+
+cdec_SOURCES = cdec.cc
+cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+
+AM_CPPFLAGS = -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
rule_lexer.cc: rule_lexer.l
$(LEX) -s -CF -8 -o$@ $<
@@ -82,5 +84,5 @@ libcdec_a_SOURCES = \
if GLC
# Until we build GLC as a library...
- libcdec_a_SOURCES += ff_glc.cc
+ libcdec_a_SOURCES += ff_glc.cc string_util.cc feature-factory.cc
endif
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 95ff6270..b7774acc 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -141,12 +141,13 @@ inline shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose
// and then prune the resulting (rescored) hypergraph. All feature values from previous
// passes are carried over into subsequent passes (where they may have different weights).
struct RescoringPass {
- RescoringPass() : density_prune(), beam_prune() {}
+ RescoringPass() : fid_summary(), density_prune(), beam_prune() {}
shared_ptr<ModelSet> models;
shared_ptr<IntersectionConfiguration> inter_conf;
vector<const FeatureFunction*> ffs;
shared_ptr<Weights> w; // null == use previous weights
vector<double> weight_vector;
+ int fid_summary; // 0 == no summary feature
double density_prune; // 0 == don't density prune
double beam_prune; // 0 == don't beam prune
};
@@ -155,6 +156,7 @@ ostream& operator<<(ostream& os, const RescoringPass& rp) {
os << "[num_fn=" << rp.ffs.size();
if (rp.inter_conf) { os << " int_alg=" << *rp.inter_conf; }
if (rp.w) os << " new_weights";
+ if (rp.fid_summary) os << " summary_feature=" << FD::Convert(rp.fid_summary);
if (rp.density_prune) os << " density_prune=" << rp.density_prune;
if (rp.beam_prune) os << " beam_prune=" << rp.beam_prune;
os << ']';
@@ -361,18 +363,21 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("weights,w",po::value<string>(),"Feature weights file (initial forest / pass 1)")
("feature_function,F",po::value<vector<string> >()->composing(), "Pass 1 additional feature function(s) (-L for list)")
("intersection_strategy,I",po::value<string>()->default_value("cube_pruning"), "Pass 1 intersection strategy for incorporating finite-state features; values include Cube_pruning, Full")
+ ("summary_feature", po::value<string>(), "Compute a 'summary feature' at the end of the pass (before any pruning) with name=arg and value=inside-outside/Z")
("density_prune", po::value<double>(), "Pass 1 pruning: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
("beam_prune", po::value<double>(), "Pass 1 pruning: Prune paths from scored forest, keep paths within exp(alpha>=0)")
("weights2",po::value<string>(),"Optional pass 2")
("feature_function2",po::value<vector<string> >()->composing(), "Optional pass 2")
("intersection_strategy2",po::value<string>()->default_value("cube_pruning"), "Optional pass 2")
+ ("summary_feature2", po::value<string>(), "Optional pass 2")
("density_prune2", po::value<double>(), "Optional pass 2")
("beam_prune2", po::value<double>(), "Optional pass 2")
("weights3",po::value<string>(),"Optional pass 3")
("feature_function3",po::value<vector<string> >()->composing(), "Optional pass 3")
("intersection_strategy3",po::value<string>()->default_value("cube_pruning"), "Optional pass 3")
+ ("summary_feature3", po::value<string>(), "Optional pass 3")
("density_prune3", po::value<double>(), "Optional pass 3")
("beam_prune3", po::value<double>(), "Optional pass 3")
@@ -559,6 +564,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
for (int pass = 0; pass < MAX_PASSES; ++pass) {
string ws = "weights" + StringSuffixForRescoringPass(pass);
string ff = "feature_function" + StringSuffixForRescoringPass(pass);
+ string sf = "summary_feature" + StringSuffixForRescoringPass(pass);
string bp = "beam_prune" + StringSuffixForRescoringPass(pass);
string dp = "density_prune" + StringSuffixForRescoringPass(pass);
bool first_pass_condition = ((pass == 0) && (conf.count(ff) || conf.count(bp) || conf.count(dp)));
@@ -583,6 +589,11 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
if (p->IsStateful()) { has_stateful = true; }
}
}
+ if (conf.count(sf)) {
+ rp.fid_summary = FD::Convert(conf[sf].as<string>());
+ assert(rp.fid_summary > 0);
+ // TODO assert that weights for this pass have coef(fid_summary) == 0.0?
+ }
if (conf.count(bp)) { rp.beam_prune = conf[bp].as<double>(); }
if (conf.count(dp)) { rp.density_prune = conf[dp].as<double>(); }
int palg = (has_stateful ? 1 : 0); // if there are no stateful featueres, default to FULL
@@ -794,6 +805,47 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
cerr << " " << passtr << " partition log(Z): " << log(z) << endl;
}
+ if (rp.fid_summary) {
+#if 0
+ const prob_t z = forest.PushWeightsToGoal(1.0);
+ if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; }
+ if (!isfinite(log(z)) || isnan(log(z))) {
+ cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
+ } else {
+ for (int i = 0; i < forest.edges_.size(); ++i) {
+ const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
+ // head node by forest.PushWeightsToGoal
+ if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+ cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
+ abort();
+ }
+
+ forest.edges_[i].feature_values_.set_value(rp.fid_summary, log_prob_transition);
+ }
+ forest.Reweight(cur_weights); // reset weights
+ }
+#endif
+ Hypergraph::EdgeProbs posts;
+ const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
+ if (!isfinite(log(z)) || isnan(log(z))) {
+ cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
+ } else {
+ for (int i = 0; i < forest.nodes_.size(); ++i) {
+ const Hypergraph::EdgesVector& in_edges = forest.nodes_[i].in_edges_;
+ prob_t node_post = prob_t(0);
+ for (int j = 0; j < in_edges.size(); ++j)
+ node_post += (posts[in_edges[j]] / z);
+ const double log_np = log(node_post);
+ if (!isfinite(log_np) || isnan(log_np)) {
+ cerr << "got bad posterior prob for node " << i << endl;
+ abort();
+ }
+ for (int j = 0; j < in_edges.size(); ++j)
+ forest.edges_[in_edges[j]].feature_values_.set_value(rp.fid_summary, exp(log_np));
+ }
+ }
+ }
+
string fullbp = "beam_prune" + StringSuffixForRescoringPass(pass);
string fulldp = "density_prune" + StringSuffixForRescoringPass(pass);
maybe_prune(forest,conf,fullbp.c_str(),fulldp.c_str(),passtr,srclen);
diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc
index adc2c8bf..62908cdc 100644
--- a/decoder/ff_klm.cc
+++ b/decoder/ff_klm.cc
@@ -21,7 +21,7 @@ static const unsigned char MASK = 7;
// -n NAME : feature id is NAME
bool ParseLMArgs(string const& in, string* filename, string* mapfile, bool* explicit_markers, string* featname) {
vector<string> const& argv=SplitOnWhitespace(in);
- *explicit_markers = true;
+ *explicit_markers = false;
*featname="LanguageModel";
*mapfile = "";
#define LMSPEC_NEXTARG if (i==argv.end()) { \
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 00e1145b..643097ef 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -32,6 +32,7 @@ class WordSet : public FeatureFunction {
~WordSet() {
}
+ Features features() const { return single_feature(fid_); }
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 39ac5132..a4028b0e 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -226,9 +226,9 @@ prob_t Hypergraph::PushViterbiWeightsToGoal(int fid) {
}
-void Hypergraph::PushWeightsToGoal(double scale) {
+prob_t Hypergraph::PushWeightsToGoal(double scale) {
vector<prob_t> posts;
- ComputeEdgePosteriors(scale, &posts);
+ const prob_t inside_z = ComputeEdgePosteriors(scale, &posts);
for (int i = 0; i < nodes_.size(); ++i) {
const Hypergraph::Node& node = nodes_[i];
prob_t z = prob_t::Zero();
@@ -238,6 +238,7 @@ void Hypergraph::PushWeightsToGoal(double scale) {
edges_[node.in_edges_[j]].edge_prob_ = posts[node.in_edges_[j]] / z;
}
}
+ return inside_z;
}
struct EdgeExistsWeightFunction {
diff --git a/decoder/hg.h b/decoder/hg.h
index aa1202b1..e5ef05f8 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -449,7 +449,7 @@ public:
void PushWeightsToSource(double scale = 1.0);
// same, except weights are pushed to the goal, works for HGs,
// not just lattices
- void PushWeightsToGoal(double scale = 1.0);
+ prob_t PushWeightsToGoal(double scale = 1.0);
// contrary to PushWeightsToGoal, use viterbi semiring; store log(p) to fid. note that p_viterbi becomes 1; k*p_viterbi becomes k. also modifies edge_prob_ (note that the fid stored log(p) will stick around even if you reweight)
// afterwards, product of edge_prob_ for a derivation will equal 1 for the viterbi (p_v before, 1 after), and in general (k*p_v before, k after). returns inside(goal)
diff --git a/decoder/trule.cc b/decoder/trule.cc
index 9820e6d5..40235542 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -5,7 +5,6 @@
#include "stringlib.h"
#include "tdict.h"
#include "rule_lexer.h"
-#include "threadlocal.h"
using namespace std;
@@ -99,7 +98,7 @@ TRule* TRule::CreateRuleMonolingual(const string& rule) {
namespace {
// callback for lexer
-THREADLOCAL int n_assigned=0;
+int n_assigned=0;
void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
TRule *assignto=(TRule *)extra;
*assignto=*new_rule;
@@ -145,7 +144,9 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) {
getline(is, ss);
//cerr << "L: " << ss << endl;
int start = 0;
- const int len = ss.size();
+ int len = ss.size();
+ const size_t ppos = ss.find(" |||");
+ if (ppos != string::npos) { len = ppos; }
while (start < len) {
while(start < len && (ss[start] == ' ' || ss[start] == ';'))
++start;