From 769aadfc431f2eec68c889b65b8939a4d35f56e9 Mon Sep 17 00:00:00 2001 From: graehl Date: Wed, 18 Aug 2010 21:26:55 +0000 Subject: ValueArray instead of string for state - same LM decode scores git-svn-id: https://ws10smt.googlecode.com/svn/trunk@593 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/apply_fsa_models.cc | 76 +++++++++++++- decoder/apply_models.cc | 20 ++-- decoder/cdec.cc | 23 ++++- decoder/decode.sh | 10 ++ decoder/ff.cc | 8 +- decoder/ff.h | 9 +- decoder/fsa-decode.sh | 3 +- decoder/perro.sh | 2 +- decoder/value_array.h | 241 -------------------------------------------- decoder/weights-fsa | 1 + 10 files changed, 124 insertions(+), 269 deletions(-) create mode 100755 decoder/decode.sh delete mode 100755 decoder/value_array.h (limited to 'decoder') diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index 8771863c..b43c02a4 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -11,6 +11,10 @@ #include "hg_cfg.h" #include "utoa.h" #include "hash.h" +#include "value_array.h" + +#define DFSA(x) x +#define DPFSA(x) x using namespace std; @@ -26,31 +30,91 @@ typedef CFG::RuleHandle RuleHandle; namespace { +/* + +1) A -> x . * (trie) + +this is somewhat nice. cost pushed for best first, of course. similar benefit as left-branching binarization without the explicit predict/complete steps? + +vs. just + +2) * -> x . y + +here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete. of course, the prediction of items -> . x y can occur lazy best-first. + +vs. + +3) * -> x . * + +with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse. this is not a good option at all. +*/ + +#define TRIE_START_LHS 1 +// option 1) above. 0 would be option 3), which is dumb + // if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course. template -struct prefix_map_type { +struct fsa_map_type { typedef std::map type; }; //template typedef -#define PREFIX_MAP(k,v) prefix_map_type >::type -typedef NTHandle LHS; +#define FSA_MAP(k,v) fsa_map_type >::type +typedef WordID LHS; // so this will be -NTHandle. + + struct PrefixTrieNode { prob_t backward; // (viterbi) backward prob (for cost pushing) - typedef PREFIX_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS +# +#if TRIE_START_LHS + typedef FSA_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS Completed completed; - typedef PREFIX_MAP(WordID,PrefixTrieNode *) Adj; +#else + bool complete; // may also have successors, of course +#endif + // instead of completed map, we have trie start w/ lhs.? + + // outgoing edges will be ordered highest p to worst p + struct Edge { + DPFSA(prob_t p;) // we can probably just store deltas, but for debugging remember the full p + prob_t delta; // p[i]=delta*p[i-1], with p(-1)=1 + PrefixTrieNode *dest; + WordID w; + }; + typedef FSA_MAP(WordID,Edge) BuildAdj; + BuildAdj build_adj; //TODO: move builder elsewhere? + typedef ValueArray Adj; +// typedef vector Adj; Adj adj; //TODO: }; +#if TRIE_START_LHS +//Trie starts with lhs, then continues w/ rhs +#else +// just rhs. i think item names should exclude lhs if possible (most sharing). get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok? +#endif + +// costs are pushed. struct PrefixTrie { CFG const* cfgp; CFG const& cfg() const { return *cfgp; } PrefixTrie(CFG const& cfg) : cfgp(&cfg) { + //TODO: } }; +// these should go in a global best-first queue +struct Item { + prob_t forward; + /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all + constrained paths of length that end in state X[k]->x.y*/ + prob_t inner; + /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all + paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */ + +}; + }//anon ns @@ -102,6 +166,8 @@ void ApplyFsa::ApplyBottomUp() void ApplyFsa::ApplyEarley() { hgcfg.GiveCFG(cfg); + cfg.SortLocalBestFirst(); + // don't need to uniq - option to do that already exists in cfg_options //TODO: } diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 635f1a9c..60b6f498 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -43,7 +43,7 @@ struct Candidate { // into the +LM forest const Hypergraph::Edge* in_edge_; // in -LM forest Hypergraph::Edge out_edge_; - string state_; + FFState state_; const JVector j_; prob_t vit_prob_; // these are fixed until the cand // is popped, then they may be updated @@ -53,7 +53,7 @@ struct Candidate { const JVector& j, const Hypergraph& out_hg, const vector& D, - const vector& node_states, + const FFStates& node_states, const SentenceMetadata& smeta, const ModelSet& models, bool is_goal) : @@ -74,7 +74,7 @@ struct Candidate { void InitializeCandidate(const Hypergraph& out_hg, const SentenceMetadata& smeta, const vector >& D, - const vector& node_states, + const FFStates& node_states, const ModelSet& models, const bool is_goal) { const Hypergraph::Edge& in_edge = *in_edge_; @@ -97,7 +97,7 @@ struct Candidate { prob_t edge_estimate = prob_t::One(); if (is_goal) { assert(tail.size() == 1); - const string& ant_state = node_states[tail.front()]; + const FFState& ant_state = node_states[tail.front()]; models.AddFinalFeatures(ant_state, &out_edge_, smeta); } else { models.AddFeaturesToEdge(smeta, out_hg, node_states, &out_edge_, &state_, &edge_estimate); @@ -154,7 +154,7 @@ struct CandidateUniquenessEquals { }; typedef unordered_set UniqueCandidateSet; -typedef unordered_map > State2Node; +typedef unordered_map > State2Node; class CubePruningRescorer { @@ -304,7 +304,7 @@ public: vector D; // maps nodes in in-HG to the // equivalent nodes (many due to state // splits) in the out-HG. - vector node_states_; // for each node in the out-HG what is + FFStates node_states_; // for each node in the out-HG what is // its q function value? const int pop_limit_; }; @@ -320,7 +320,7 @@ struct NoPruningRescorer { node_states_.reserve(kRESERVE_NUM_NODES); } - typedef unordered_map > State2NodeIndex; + typedef unordered_map > State2NodeIndex; void ExpandEdge(const Hypergraph::Edge& in_edge, bool is_goal, State2NodeIndex* state2node) { const int arity = in_edge.Arity(); @@ -335,10 +335,10 @@ struct NoPruningRescorer { for (int i = 0; i < arity; ++i) tail[i] = nodemap[in_edge.tail_nodes_[i]][tail_iter[i]]; Hypergraph::Edge* new_edge = out.AddEdge(in_edge, tail); - string head_state; + FFState head_state; if (is_goal) { assert(tail.size() == 1); - const string& ant_state = node_states_[tail.front()]; + const FFState& ant_state = node_states_[tail.front()]; models.AddFinalFeatures(ant_state, new_edge,smeta); } else { prob_t edge_estimate; // this is a full intersection, so we disregard this @@ -394,7 +394,7 @@ struct NoPruningRescorer { Hypergraph& out; vector > nodemap; - vector node_states_; // for each node in the out-HG what is + FFStates node_states_; // for each node in the out-HG what is // its q function value? }; diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 6b5543f8..a4c3613b 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -46,6 +46,7 @@ using namespace std::tr1; using boost::shared_ptr; namespace po = boost::program_options; +vector cfg_files; bool show_config=false; bool show_weights=false; bool verbose_feature_functions=true; @@ -147,7 +148,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c ("scfg_no_hiero_glue_grammar,n", "No Hiero glue grammar (nb. by default the SCFG decoder adds Hiero glue rules)") ("scfg_default_nt,d",po::value()->default_value("X"),"Default non-terminal symbol in SCFG") ("scfg_max_span_limit,S",po::value()->default_value(10),"Maximum non-terminal span limit (except \"glue\" grammar)") - ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files. note: this will have to appear before the -c argument to take effect") + ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files.") ("show_weights", po::bool_switch(&show_weights), "show effective feature weights") ("show_joshua_visualization,J", "Produce output compatible with the Joshua visualization tools") ("show_tree_structure", "Show the Viterbi derivation structure") @@ -187,7 +188,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c cfg_options.AddOptions(&cfgo); po::options_description clo("Command line options"); clo.add_options() - ("config,c", po::value >(), "Configuration file(s) - latest has priority") + ("config,c", po::value >(&cfg_files), "Configuration file(s) - latest has priority") ("help,h", "Print this help message and exit") ("usage,u", po::value(), "Describe a feature function type") ("compgen", "Print just option names suitable for bash command line completion builtin 'compgen'") @@ -205,19 +206,33 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c exit(0); } ShowBanner(); + if (conf.count("show_config")) // special handling needed because we only want to notify() once. + show_config=true; if (conf.count("config")) { typedef vector Cs; Cs cs=conf["config"].as(); for (int i=0;i& w, const vector void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, const Hypergraph& /* hg */, - const vector& node_states, + const FFStates& node_states, Hypergraph::Edge* edge, - string* context, + FFState* context, prob_t* combination_cost_estimate) const { edge->reset_info(); context->resize(state_size_); - memset(&(*context)[0], 0, state_size_); //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use SmallVector? ValueArray? (higher performance perhaps, fixed size) + memset(&(*context)[0], 0, state_size_); SparseVector est_vals; // only computed if combination_cost_estimate is non-NULL if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); for (int i = 0; i < models_.size(); ++i) { @@ -193,7 +193,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); } -void ModelSet::AddFinalFeatures(const std::string& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { +void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { assert(1 == edge->rule_->Arity()); edge->reset_info(); for (int i = 0; i < models_.size(); ++i) { diff --git a/decoder/ff.h b/decoder/ff.h index fe4411cd..726845c4 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -14,6 +14,7 @@ #include "fdict.h" #include "hg.h" #include "feature_vector.h" +#include "value_array.h" class SentenceMetadata; class FeatureFunction; // see definition below @@ -242,6 +243,8 @@ void show_all_features(std::vector const& models_,DenseWeightVector &weight return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); } +typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef std::vector FFStates; // this class is a set of FeatureFunctions that can be used to score, rescore, // etc. a (translation?) forest @@ -257,13 +260,13 @@ class ModelSet { // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone void AddFeaturesToEdge(const SentenceMetadata& smeta, const Hypergraph& hg, - const std::vector& node_states, + const FFStates& node_states, Hypergraph::Edge* edge, - std::string* residual_context, + FFState* residual_context, prob_t* combination_cost_estimate = NULL) const; //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) - void AddFinalFeatures(const std::string& residual_context, + void AddFinalFeatures(const FFState& residual_context, Hypergraph::Edge* edge, SentenceMetadata const& smeta) const; diff --git a/decoder/fsa-decode.sh b/decoder/fsa-decode.sh index c53867e3..66879523 100755 --- a/decoder/fsa-decode.sh +++ b/decoder/fsa-decode.sh @@ -1,2 +1,3 @@ d=$(dirname `readlink -f $0`)/ -$gdb $d/cdec -c $d/cdec-fsa.ini -i $d/1dev.ur "$@" +. $d/decode.sh +in=1dev.ur cfg=cdec-fsa decode diff --git a/decoder/perro.sh b/decoder/perro.sh index 836ad07f..3e54ac71 100755 --- a/decoder/perro.sh +++ b/decoder/perro.sh @@ -1 +1 @@ -$gdb $cdec -k 30 --show_features -c fsa-hiero.ini -i perro.ur "$@" +$gdb $cdec "$@" -k 30 --show_features -c fsa-hiero.ini -i perro.ur diff --git a/decoder/value_array.h b/decoder/value_array.h deleted file mode 100755 index cdf1d697..00000000 --- a/decoder/value_array.h +++ /dev/null @@ -1,241 +0,0 @@ -#ifndef VALUE_ARRAY_H -#define VALUE_ARRAY_H - -//TODO: option for non-constructed version (type_traits pod?), option for small array optimization (if sz < N, store inline in union, see small_vector.h) - -#include -#include -#include -#include -#include -#include -#include -#ifdef USE_BOOST_SERIALIZE -# include -# include -#endif - -//TODO: use awesome type traits (and/or policy typelist argument) to provide these only when possible? -#define VALUE_ARRAY_ADD 1 -#define VALUE_ARRAY_MUL 1 -#define VALUE_ARRAY_BITWISE 0 -#define VALUE_ARRAY_OSTREAM 1 - -#if VALUE_ARRAY_OSTREAM -# include -#endif - -// valarray like in that size is fixed (so saves space compared to vector), but same interface as vector (less resize/push_back/insert, of course) -template > -class ValueArray : A // private inheritance so stateless allocator adds no size. -{ - typedef ValueArray Self; -public: -#if VALUE_ARRAY_OSTREAM - friend inline std::ostream & operator << (std::ostream &o,Self const& s) { - o<<'['; - for (unsigned i=0,e=s.size();i1?sizeof(T)/sizeof(T*):1; - //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data. todo in the far future if bored. - typedef T value_type; - typedef T& reference; - typedef T const& const_reference; - typedef T* iterator; - typedef T const* const_iterator; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - typedef T* pointer; - - size_type size() const { return sz; } - bool empty() const { return size() == 0; } - - iterator begin() { return array; } - iterator end() { return array + size(); } - const_iterator begin() const { return array; } - const_iterator end() const { return array + size(); } - - reference operator[](size_type pos) { return array[pos]; } - const_reference operator[](size_type pos) const { return array[pos]; } - - reference at(size_type pos) { return array[pos]; } - const_reference at(size_type pos) const { return array[pos]; } - - reference front() { return array[0]; } - reference back() { return array[sz-1]; } - - const_reference front() const { return array[0]; } - const_reference back() const { return array[sz-1]; } - - ValueArray() : sz(0), array(NULL) {} - - explicit ValueArray(size_type s, const_reference t = T()) - { - init(s,t); - } - -protected: - inline void init(size_type s, const_reference t = T()) { - sz=s; - array=s ? A::allocate(s) : 0; - for (size_type i = 0; i != sz; ++i) { A::construct(array + i,t); } - } -public: - void resize(size_type s, const_reference t = T()) { - clear(); - init(s,t); - } - - template - ValueArray(I itr, I end) - : sz(std::distance(itr,end)) - , array(A::allocate(sz)) - { - copy_construct(itr,end,array); - } - - ~ValueArray() { - clear(); - } - -#undef VALUE_ARRAY_OPEQ -#define VALUE_ARRAY_OPEQ(op) template Self & operator op (ValueArray const& o) { assert(sz==o.sz); for (int i=0,e=sz;i<=e;++i) array[i] op o.array[i]; return *this; } -#if VALUE_ARRAY_ADD - VALUE_ARRAY_OPEQ(+=) - VALUE_ARRAY_OPEQ(-=) -#endif -#if VALUE_ARRAY_MUL - VALUE_ARRAY_OPEQ(*=) - VALUE_ARRAY_OPEQ(/=) -#endif -#if VALUE_ARRAY_BITWISE - VALUE_ARRAY_OPEQ(|=) - VALUE_ARRAY_OPEQ(*=) -#endif -#undef VALUE_ARRAY_OPEQ -#undef VALUE_ARRAY_BINOP -#define VALUE_ARRAY_BINOP(op,opeq) template friend inline Self operator op (Self x,ValueArray const& y) { x opeq y; return x; } -#if VALUE_ARRAY_ADD - VALUE_ARRAY_BINOP(+,+=) - VALUE_ARRAY_BINOP(-,-=) -#endif -#if VALUE_ARRAY_MUL - VALUE_ARRAY_BINOP(*,*=) - VALUE_ARRAY_BINOP(/,/=) -#endif -#if VALUE_ARRAY_BITWISE - VALUE_ARRAY_BINOP(|,|=) - VALUE_ARRAY_BINOP(*,*=) -#endif - -#undef VALUE_ARRAY_BINOP - void clear() - { - for (size_type i = sz; i != 0; --i) { - A::destroy(array + (i - 1)); - } - if (array != NULL) A::deallocate(array,sz); - } - - void swap(ValueArray& other) - { - std::swap(sz,other.sz); - std::swap(array,other.array); - } - - ValueArray(ValueArray const& other) - : A(other) - , sz(other.sz) - , array(A::allocate(sz)) - - { - copy_construct(other.begin(),other.end(),array); - } - - ValueArray& operator=(ValueArray const& other) - { - ValueArray(other).swap(*this); - return *this; - } - - template - ValueArray( Range const& v - , typename boost::disable_if< boost::is_integral >::type* = 0) - : sz(boost::size(v)) - , array(A::allocate(sz)) - { - copy_construct(boost::begin(v),boost::end(v),array); - } - - template typename - boost::disable_if< - boost::is_integral - , ValueArray>::type& operator=(Range const& other) - { - ValueArray(other).swap(*this); - return *this; - } - -private: -//friend class boost::serialization::access; - -template -void copy_construct(I1 itr, I1 end, I2 into) -{ - for (; itr != end; ++itr, ++into) A::construct(into,*itr); -} - -template -void save(Archive& ar, unsigned int version) const -{ - ar << sz; - for (size_type i = 0; i != sz; ++i) ar << at(i); -} - -template -void load(Archive& ar, unsigned int version) -{ - size_type s; - ar >> s; - ValueArray v(s); - for (size_type i = 0; i != s; ++i) ar >> v[i]; - this->swap(v); -} -#ifdef USE_BOOST_SERIALIZE -BOOST_SERIALIZATION_SPLIT_MEMBER() -#endif -size_type sz; -pointer array; -}; - - -template -bool operator==(ValueArray const& v1, ValueArray const& v2) -{ - return (v1.size() == v2.size()) and - std::equal(v1.begin(),v1.end(),v2.begin()); -} - - -template -bool operator< (ValueArray const& v1, ValueArray const& v2) -{ - return std::lexicographical_compare( v1.begin() - , v1.end() - , v2.begin() - , v2.end() ); -} - -template -void memcpy(void *out,ValueArray const& v) { - std::memcpy(out,v.begin(),v.size()*sizeof(T)); -} - - -#endif diff --git a/decoder/weights-fsa b/decoder/weights-fsa index fe01d13a..3cc96c2f 100644 --- a/decoder/weights-fsa +++ b/decoder/weights-fsa @@ -3,6 +3,7 @@ Arity_1 1.12426238048012 Arity_2 1.14986187839554 Glue -0.04589037041388 LanguageModel 1.09051 +LM 1.09051 PassThrough -3.66226367902928 PhraseModel_0 -1.94633451863252 PhraseModel_1 -0.1475347695476 -- cgit v1.2.3