summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/ff.cc2
-rw-r--r--decoder/ff_lm.cc16
-rw-r--r--decoder/hg.cc2
-rw-r--r--decoder/inside_outside.h4
4 files changed, 18 insertions, 6 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 7186d776..261e9a17 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -93,7 +93,7 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
}
void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
- const Hypergraph& hg,
+ const Hypergraph& /* hg */,
const vector<string>& node_states,
Hypergraph::Edge* edge,
string* context,
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index a12a2667..8333bf7b 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -4,6 +4,13 @@
//NOTE: if ngram order is bigger than lm state's, then the longest possible ngram scores are still used. if you really want a lower order, a truncated copy of the LM should be small enough. otherwise, an option to null out words outside of the order's window would need to be implemented.
+//#define UNIGRAM_DEBUG
+#ifdef UNIGRAM_DEBUG
+# define UNIDBG(x) do { cerr << x; } while(0)
+#else
+# define UNIDBG(x)
+#endif
+
#include "ff_lm.h"
#include <sstream>
@@ -168,7 +175,7 @@ class LanguageModelImpl {
kNONE(-1),
kSTAR(TD::Convert("<{STAR}>"))
, unigram(order<=1) {}
-
+//TODO: show that unigram special case (0 state) computes what it should.
LanguageModelImpl(int order, const string& f) :
ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),
floor_(-100.0),
@@ -300,14 +307,19 @@ class LanguageModelImpl {
/// just how SRILM likes it: [rbegin,rend) is a phrase in reverse word order and null terminated so *rend=kNONE. return unigram score for rend[-1] plus
/// cost returned is some kind of log prob (who cares, we're just adding)
double stateless_cost(WordID *rbegin,WordID *rend) {
+ UNIDBG("p(");
double sum=0;
- for (;rend>rbegin;--rend)
+ for (;rend>rbegin;--rend) {
sum+=clamp(WordProb(rend[-1],rend));
+ UNIDBG(","<<TD::Convert(rend[-1]));
+ }
+ UNIDBG(")="<<sum<<endl);
return sum;
}
//TODO: this would be a fine rule heuristic (for reordering hyperedges prior to rescoring. for now you can just use a same-lm-file -o 1 prelm-rescore :(
double stateless_cost(TRule const& rule) {
+ //TODO: make sure this is correct.
int len = rule.ELength(); // use a gap for each variable
buffer_.resize(len + 1);
buffer_[len] = kNONE;
diff --git a/decoder/hg.cc b/decoder/hg.cc
index b6b9d8bd..2cff17af 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -148,7 +148,7 @@ void Hypergraph::PruneEdges(const std::vector<bool>& prune_edge, bool run_inside
// I dislike. If you know of a better way that doesn't involve specialization,
// fix this!
vector<Boolean> reachable;
- bool goal_derivable = Inside/* <Boolean, EdgeExistsWeightFunction> */(*this, &reachable, wf);
+ bool goal_derivable = Inside/* <Boolean, EdgeExistsWeightFunction> */(*this, &reachable, wf).get();
if (!goal_derivable) {
edges_.clear();
nodes_.clear();
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index 9f7ce526..62daca1f 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -10,8 +10,8 @@ struct Boolean {
bool x;
Boolean() : x() { }
Boolean(bool i) : x(i) { }
- operator bool() const { return x; }
- // normally you'd use the logical (short circuit) || && operators, but bool really is guaranteed to be 0 or 1 numerically.
+ operator bool() const { return x; } // careful - this might cause a disaster with (bool)a + Boolean(b).
+ // normally you'd use the logical (short circuit) || && operators, but bool really is guaranteed to be 0 or 1 numerically. also note that | and & have equal precedence (!)
void operator+=(Boolean o) { x|=o.x; }
friend inline Boolean operator +(Boolean a,Boolean b) {
return Boolean(a.x|b.x);