summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-18 21:26:55 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-18 21:26:55 +0000
commit8ec14f00d1078f0fa7ab3ba2a01954b1f6ca5260 (patch)
tree2c065af3a449a50eccb1c30d140bf21a324c850d
parent0017c952afb4076025098165414d8ca745107f2b (diff)
ValueArray instead of string for state - same LM decode scores
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@593 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xdecoder/apply_fsa_models.cc76
-rw-r--r--decoder/apply_models.cc20
-rw-r--r--decoder/cdec.cc23
-rwxr-xr-xdecoder/decode.sh10
-rw-r--r--decoder/ff.cc8
-rw-r--r--decoder/ff.h9
-rwxr-xr-xdecoder/fsa-decode.sh3
-rwxr-xr-xdecoder/perro.sh2
-rw-r--r--decoder/weights-fsa1
-rwxr-xr-xgraehl/NOTES.earley17
-rw-r--r--utils/filelib.h14
-rwxr-xr-xutils/value_array.h (renamed from decoder/value_array.h)0
12 files changed, 155 insertions, 28 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 8771863c..b43c02a4 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -11,6 +11,10 @@
#include "hg_cfg.h"
#include "utoa.h"
#include "hash.h"
+#include "value_array.h"
+
+#define DFSA(x) x
+#define DPFSA(x) x
using namespace std;
@@ -26,31 +30,91 @@ typedef CFG::RuleHandle RuleHandle;
namespace {
+/*
+
+1) A -> x . * (trie)
+
+this is somewhat nice. cost pushed for best first, of course. similar benefit as left-branching binarization without the explicit predict/complete steps?
+
+vs. just
+
+2) * -> x . y
+
+here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete. of course, the prediction of items -> . x y can occur lazy best-first.
+
+vs.
+
+3) * -> x . *
+
+with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse. this is not a good option at all.
+*/
+
+#define TRIE_START_LHS 1
+// option 1) above. 0 would be option 3), which is dumb
+
// if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course.
template <class K,class V,class Hash>
-struct prefix_map_type {
+struct fsa_map_type {
typedef std::map<K,V> type;
};
//template typedef
-#define PREFIX_MAP(k,v) prefix_map_type<k,v,boost::hash<k> >::type
-typedef NTHandle LHS;
+#define FSA_MAP(k,v) fsa_map_type<k,v,boost::hash<k> >::type
+typedef WordID LHS; // so this will be -NTHandle.
+
+
struct PrefixTrieNode {
prob_t backward; // (viterbi) backward prob (for cost pushing)
- typedef PREFIX_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS
+#
+#if TRIE_START_LHS
+ typedef FSA_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS
Completed completed;
- typedef PREFIX_MAP(WordID,PrefixTrieNode *) Adj;
+#else
+ bool complete; // may also have successors, of course
+#endif
+ // instead of completed map, we have trie start w/ lhs.?
+
+ // outgoing edges will be ordered highest p to worst p
+ struct Edge {
+ DPFSA(prob_t p;) // we can probably just store deltas, but for debugging remember the full p
+ prob_t delta; // p[i]=delta*p[i-1], with p(-1)=1
+ PrefixTrieNode *dest;
+ WordID w;
+ };
+ typedef FSA_MAP(WordID,Edge) BuildAdj;
+ BuildAdj build_adj; //TODO: move builder elsewhere?
+ typedef ValueArray<Edge> Adj;
+// typedef vector<Edge> Adj;
Adj adj;
//TODO:
};
+#if TRIE_START_LHS
+//Trie starts with lhs, then continues w/ rhs
+#else
+// just rhs. i think item names should exclude lhs if possible (most sharing). get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok?
+#endif
+
+// costs are pushed.
struct PrefixTrie {
CFG const* cfgp;
CFG const& cfg() const { return *cfgp; }
PrefixTrie(CFG const& cfg) : cfgp(&cfg) {
+
//TODO:
}
};
+// these should go in a global best-first queue
+struct Item {
+ prob_t forward;
+ /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all
+ constrained paths of length that end in state X[k]->x.y*/
+ prob_t inner;
+ /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all
+ paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */
+
+};
+
}//anon ns
@@ -102,6 +166,8 @@ void ApplyFsa::ApplyBottomUp()
void ApplyFsa::ApplyEarley()
{
hgcfg.GiveCFG(cfg);
+ cfg.SortLocalBestFirst();
+ // don't need to uniq - option to do that already exists in cfg_options
//TODO:
}
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 635f1a9c..60b6f498 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -43,7 +43,7 @@ struct Candidate {
// into the +LM forest
const Hypergraph::Edge* in_edge_; // in -LM forest
Hypergraph::Edge out_edge_;
- string state_;
+ FFState state_;
const JVector j_;
prob_t vit_prob_; // these are fixed until the cand
// is popped, then they may be updated
@@ -53,7 +53,7 @@ struct Candidate {
const JVector& j,
const Hypergraph& out_hg,
const vector<CandidateList>& D,
- const vector<string>& node_states,
+ const FFStates& node_states,
const SentenceMetadata& smeta,
const ModelSet& models,
bool is_goal) :
@@ -74,7 +74,7 @@ struct Candidate {
void InitializeCandidate(const Hypergraph& out_hg,
const SentenceMetadata& smeta,
const vector<vector<Candidate*> >& D,
- const vector<string>& node_states,
+ const FFStates& node_states,
const ModelSet& models,
const bool is_goal) {
const Hypergraph::Edge& in_edge = *in_edge_;
@@ -97,7 +97,7 @@ struct Candidate {
prob_t edge_estimate = prob_t::One();
if (is_goal) {
assert(tail.size() == 1);
- const string& ant_state = node_states[tail.front()];
+ const FFState& ant_state = node_states[tail.front()];
models.AddFinalFeatures(ant_state, &out_edge_, smeta);
} else {
models.AddFeaturesToEdge(smeta, out_hg, node_states, &out_edge_, &state_, &edge_estimate);
@@ -154,7 +154,7 @@ struct CandidateUniquenessEquals {
};
typedef unordered_set<const Candidate*, CandidateUniquenessHash, CandidateUniquenessEquals> UniqueCandidateSet;
-typedef unordered_map<string, Candidate*, boost::hash<string> > State2Node;
+typedef unordered_map<FFState, Candidate*, boost::hash<FFState> > State2Node;
class CubePruningRescorer {
@@ -304,7 +304,7 @@ public:
vector<CandidateList> D; // maps nodes in in-HG to the
// equivalent nodes (many due to state
// splits) in the out-HG.
- vector<string> node_states_; // for each node in the out-HG what is
+ FFStates node_states_; // for each node in the out-HG what is
// its q function value?
const int pop_limit_;
};
@@ -320,7 +320,7 @@ struct NoPruningRescorer {
node_states_.reserve(kRESERVE_NUM_NODES);
}
- typedef unordered_map<string, int, boost::hash<string> > State2NodeIndex;
+ typedef unordered_map<FFState, int, boost::hash<FFState> > State2NodeIndex;
void ExpandEdge(const Hypergraph::Edge& in_edge, bool is_goal, State2NodeIndex* state2node) {
const int arity = in_edge.Arity();
@@ -335,10 +335,10 @@ struct NoPruningRescorer {
for (int i = 0; i < arity; ++i)
tail[i] = nodemap[in_edge.tail_nodes_[i]][tail_iter[i]];
Hypergraph::Edge* new_edge = out.AddEdge(in_edge, tail);
- string head_state;
+ FFState head_state;
if (is_goal) {
assert(tail.size() == 1);
- const string& ant_state = node_states_[tail.front()];
+ const FFState& ant_state = node_states_[tail.front()];
models.AddFinalFeatures(ant_state, new_edge,smeta);
} else {
prob_t edge_estimate; // this is a full intersection, so we disregard this
@@ -394,7 +394,7 @@ struct NoPruningRescorer {
Hypergraph& out;
vector<vector<int> > nodemap;
- vector<string> node_states_; // for each node in the out-HG what is
+ FFStates node_states_; // for each node in the out-HG what is
// its q function value?
};
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 6b5543f8..a4c3613b 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -46,6 +46,7 @@ using namespace std::tr1;
using boost::shared_ptr;
namespace po = boost::program_options;
+vector<string> cfg_files;
bool show_config=false;
bool show_weights=false;
bool verbose_feature_functions=true;
@@ -147,7 +148,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
("scfg_no_hiero_glue_grammar,n", "No Hiero glue grammar (nb. by default the SCFG decoder adds Hiero glue rules)")
("scfg_default_nt,d",po::value<string>()->default_value("X"),"Default non-terminal symbol in SCFG")
("scfg_max_span_limit,S",po::value<int>()->default_value(10),"Maximum non-terminal span limit (except \"glue\" grammar)")
- ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files. note: this will have to appear before the -c argument to take effect")
+ ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files.")
("show_weights", po::bool_switch(&show_weights), "show effective feature weights")
("show_joshua_visualization,J", "Produce output compatible with the Joshua visualization tools")
("show_tree_structure", "Show the Viterbi derivation structure")
@@ -187,7 +188,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
cfg_options.AddOptions(&cfgo);
po::options_description clo("Command line options");
clo.add_options()
- ("config,c", po::value<vector<string> >(), "Configuration file(s) - latest has priority")
+ ("config,c", po::value<vector<string> >(&cfg_files), "Configuration file(s) - latest has priority")
("help,h", "Print this help message and exit")
("usage,u", po::value<string>(), "Describe a feature function type")
("compgen", "Print just option names suitable for bash command line completion builtin 'compgen'")
@@ -205,19 +206,33 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
exit(0);
}
ShowBanner();
+ if (conf.count("show_config")) // special handling needed because we only want to notify() once.
+ show_config=true;
if (conf.count("config")) {
typedef vector<string> Cs;
Cs cs=conf["config"].as<Cs>();
for (int i=0;i<cs.size();++i) {
string cfg=cs[i];
cerr << "Configuration file: " << cfg << endl;
- if (show_config)
- CopyFile(cfg,cerr);
ReadFile conff(cfg);
po::store(po::parse_config_file(*conff, dconfig_options), conf);
}
}
po::notify(conf);
+ if (show_config && !cfg_files.empty()) {
+ cerr<< "\nConfig files:\n\n";
+ for (int i=0;i<cfg_files.size();++i) {
+ string cfg=cfg_files[i];
+ cerr << "Configuration file: " << cfg << endl;
+ CopyFile(cfg,cerr);
+ cerr << "(end config "<<cfg<<"\n\n";
+ }
+ cerr <<"Command line:";
+ for (int i=0;i<argc;++i)
+ cerr<<" "<<argv[i];
+ cerr << "\n\n";
+ }
+
if (conf.count("list_feature_functions")) {
cerr << "Available feature functions (specify with -F; describe with -u FeatureName):\n";
diff --git a/decoder/decode.sh b/decoder/decode.sh
new file mode 100755
index 00000000..13cc6620
--- /dev/null
+++ b/decoder/decode.sh
@@ -0,0 +1,10 @@
+d=$(dirname `readlink -f $0`)/
+decode() {
+if [ "$lm" ] ; then
+ lmargs0=-F
+ lmargs1="LanguageModel lm.gz -n LM"
+fi
+set -x
+$gdb $d/cdec -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@"
+set +x
+}
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 68249b42..7bdd21e3 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -165,13 +165,13 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
const Hypergraph& /* hg */,
- const vector<string>& node_states,
+ const FFStates& node_states,
Hypergraph::Edge* edge,
- string* context,
+ FFState* context,
prob_t* combination_cost_estimate) const {
edge->reset_info();
context->resize(state_size_);
- memset(&(*context)[0], 0, state_size_); //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use SmallVector<char>? ValueArray? (higher performance perhaps, fixed size)
+ memset(&(*context)[0], 0, state_size_);
SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL
if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
for (int i = 0; i < models_.size(); ++i) {
@@ -193,7 +193,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
}
-void ModelSet::AddFinalFeatures(const std::string& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
+void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
assert(1 == edge->rule_->Arity());
edge->reset_info();
for (int i = 0; i < models_.size(); ++i) {
diff --git a/decoder/ff.h b/decoder/ff.h
index fe4411cd..726845c4 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -14,6 +14,7 @@
#include "fdict.h"
#include "hg.h"
#include "feature_vector.h"
+#include "value_array.h"
class SentenceMetadata;
class FeatureFunction; // see definition below
@@ -242,6 +243,8 @@ void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weight
return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt);
}
+typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
+typedef std::vector<FFState> FFStates;
// this class is a set of FeatureFunctions that can be used to score, rescore,
// etc. a (translation?) forest
@@ -257,13 +260,13 @@ class ModelSet {
// must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
void AddFeaturesToEdge(const SentenceMetadata& smeta,
const Hypergraph& hg,
- const std::vector<std::string>& node_states,
+ const FFStates& node_states,
Hypergraph::Edge* edge,
- std::string* residual_context,
+ FFState* residual_context,
prob_t* combination_cost_estimate = NULL) const;
//this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
- void AddFinalFeatures(const std::string& residual_context,
+ void AddFinalFeatures(const FFState& residual_context,
Hypergraph::Edge* edge,
SentenceMetadata const& smeta) const;
diff --git a/decoder/fsa-decode.sh b/decoder/fsa-decode.sh
index c53867e3..66879523 100755
--- a/decoder/fsa-decode.sh
+++ b/decoder/fsa-decode.sh
@@ -1,2 +1,3 @@
d=$(dirname `readlink -f $0`)/
-$gdb $d/cdec -c $d/cdec-fsa.ini -i $d/1dev.ur "$@"
+. $d/decode.sh
+in=1dev.ur cfg=cdec-fsa decode
diff --git a/decoder/perro.sh b/decoder/perro.sh
index 836ad07f..3e54ac71 100755
--- a/decoder/perro.sh
+++ b/decoder/perro.sh
@@ -1 +1 @@
-$gdb $cdec -k 30 --show_features -c fsa-hiero.ini -i perro.ur "$@"
+$gdb $cdec "$@" -k 30 --show_features -c fsa-hiero.ini -i perro.ur
diff --git a/decoder/weights-fsa b/decoder/weights-fsa
index fe01d13a..3cc96c2f 100644
--- a/decoder/weights-fsa
+++ b/decoder/weights-fsa
@@ -3,6 +3,7 @@ Arity_1 1.12426238048012
Arity_2 1.14986187839554
Glue -0.04589037041388
LanguageModel 1.09051
+LM 1.09051
PassThrough -3.66226367902928
PhraseModel_0 -1.94633451863252
PhraseModel_1 -0.1475347695476
diff --git a/graehl/NOTES.earley b/graehl/NOTES.earley
index 6f94f898..4156063a 100755
--- a/graehl/NOTES.earley
+++ b/graehl/NOTES.earley
@@ -88,3 +88,20 @@ then X[k,i]->rY.s (a',b') with a' += a*b'', b' += b*b''
==========
is forward cost viterbi fine? i.e. can i have items whose names ignore the lhs NT (look up predictions that i finish lazily / graph structured?)
+======
+
+1) A -> x . * (trie)
+
+this is somewhat nice. cost pushed for best first, of course. similar benefit as left-branching binarization without the explicit predict/complete steps?
+
+vs. just
+
+2) * -> x . y
+
+here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete. of course, the prediction of items -> . x y can occur lazy best-first.
+
+vs.
+
+3) * -> x . *
+
+with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse. this is not a good option at all.
diff --git a/utils/filelib.h b/utils/filelib.h
index b9fef9a7..dda98671 100644
--- a/utils/filelib.h
+++ b/utils/filelib.h
@@ -103,4 +103,18 @@ class WriteFile : public BaseFile<std::ostream> {
}
};
+inline void CopyFile(std::istream &in,std::ostream &out) {
+ out << in.rdbuf();
+}
+
+inline void CopyFile(std::string const& inf,std::ostream &out) {
+ ReadFile r(inf);
+ CopyFile(*r,out);
+}
+
+inline void CopyFile(std::string const& inf,std::string const& outf) {
+ WriteFile w(outf);
+ CopyFile(inf,*w);
+}
+
#endif
diff --git a/decoder/value_array.h b/utils/value_array.h
index cdf1d697..cdf1d697 100755
--- a/decoder/value_array.h
+++ b/utils/value_array.h