From 769aadfc431f2eec68c889b65b8939a4d35f56e9 Mon Sep 17 00:00:00 2001
From: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Wed, 18 Aug 2010 21:26:55 +0000
Subject: ValueArray instead of string for state - same LM decode scores

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@593 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 decoder/apply_fsa_models.cc |  76 +++++++++++++-
 decoder/apply_models.cc     |  20 ++--
 decoder/cdec.cc             |  23 ++++-
 decoder/decode.sh           |  10 ++
 decoder/ff.cc               |   8 +-
 decoder/ff.h                |   9 +-
 decoder/fsa-decode.sh       |   3 +-
 decoder/perro.sh            |   2 +-
 decoder/value_array.h       | 241 --------------------------------------------
 decoder/weights-fsa         |   1 +
 graehl/NOTES.earley         |  17 ++++
 utils/filelib.h             |  14 +++
 utils/value_array.h         | 241 ++++++++++++++++++++++++++++++++++++++++++++
 13 files changed, 396 insertions(+), 269 deletions(-)
 create mode 100755 decoder/decode.sh
 delete mode 100755 decoder/value_array.h
 create mode 100755 utils/value_array.h
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 8771863c..b43c02a4 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -11,6 +11,10 @@
 #include "hg_cfg.h"
 #include "utoa.h"
 #include "hash.h"
+#include "value_array.h"
+
+#define DFSA(x) x
+#define DPFSA(x) x
 
 using namespace std;
 
@@ -26,31 +30,91 @@ typedef CFG::RuleHandle RuleHandle;
 
 namespace {
 
+/*
+
+1) A -> x . * (trie)
+
+this is somewhat nice.  cost pushed for best first, of course.  similar benefit as left-branching binarization without the explicit predict/complete steps?
+
+vs. just
+
+2) * -> x . y
+
+here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete.  of course, the prediction of items -> . x y can occur lazy best-first.
+
+vs.
+
+3) * -> x . *
+
+with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse.  this is not a good option at all.
+*/
+
+#define TRIE_START_LHS 1
+// option 1) above.  0 would be option 3), which is dumb
+
 // if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently.  if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion.  this is effectively left-branching binarization, of course.
 template <class K,class V,class Hash>
-struct prefix_map_type {
+struct fsa_map_type {
   typedef std::map<K,V> type;
 };
 //template typedef
-#define PREFIX_MAP(k,v) prefix_map_type<k,v,boost::hash<k> >::type
-typedef NTHandle LHS;
+#define FSA_MAP(k,v) fsa_map_type<k,v,boost::hash<k> >::type
+typedef WordID LHS; // so this will be -NTHandle.
+
+
 struct PrefixTrieNode {
   prob_t backward; // (viterbi) backward prob (for cost pushing)
-  typedef PREFIX_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG).  but there may be multiple rules, with different LHS
+#
+#if TRIE_START_LHS
+  typedef FSA_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG).  but there may be multiple rules, with different LHS
   Completed completed;
-  typedef PREFIX_MAP(WordID,PrefixTrieNode *) Adj;
+#else
+  bool complete; // may also have successors, of course
+#endif
+  // instead of completed map, we have trie start w/ lhs.?
+
+  // outgoing edges will be ordered highest p to worst p
+  struct Edge {
+    DPFSA(prob_t p;) // we can probably just store deltas, but for debugging remember the full p
+    prob_t delta; // p[i]=delta*p[i-1], with p(-1)=1
+    PrefixTrieNode *dest;
+    WordID w;
+  };
+  typedef FSA_MAP(WordID,Edge) BuildAdj;
+  BuildAdj build_adj; //TODO: move builder elsewhere?
+  typedef ValueArray<Edge>  Adj;
+//  typedef vector<Edge> Adj;
   Adj adj;
   //TODO:
 };
 
+#if TRIE_START_LHS
+//Trie starts with lhs, then continues w/ rhs
+#else
+// just rhs.  i think item names should exclude lhs if possible (most sharing).  get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok?
+#endif
+
+// costs are pushed.
 struct PrefixTrie {
   CFG const* cfgp;
   CFG const& cfg() const { return *cfgp; }
   PrefixTrie(CFG const& cfg) : cfgp(&cfg) {
+
 //TODO:
   }
 };
 
+// these should go in a global best-first queue
+struct Item {
+  prob_t forward;
+  /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all
+     constrained paths of length that end in state X[k]->x.y*/
+  prob_t inner;
+  /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all
+     paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */
+
+};
+
 }//anon ns
 
 
@@ -102,6 +166,8 @@ void ApplyFsa::ApplyBottomUp()
 void ApplyFsa::ApplyEarley()
 {
   hgcfg.GiveCFG(cfg);
+  cfg.SortLocalBestFirst();
+  // don't need to uniq - option to do that already exists in cfg_options
   //TODO:
 }
 
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 635f1a9c..60b6f498 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -43,7 +43,7 @@ struct Candidate {
                                        // into the +LM forest
   const Hypergraph::Edge* in_edge_;    // in -LM forest
   Hypergraph::Edge out_edge_;
-  string state_;
+  FFState state_;
   const JVector j_;
   prob_t vit_prob_;            // these are fixed until the cand
                                // is popped, then they may be updated
@@ -53,7 +53,7 @@ struct Candidate {
             const JVector& j,
             const Hypergraph& out_hg,
             const vector<CandidateList>& D,
-            const vector<string>& node_states,
+            const FFStates& node_states,
             const SentenceMetadata& smeta,
             const ModelSet& models,
             bool is_goal) :
@@ -74,7 +74,7 @@ struct Candidate {
   void InitializeCandidate(const Hypergraph& out_hg,
                            const SentenceMetadata& smeta,
                            const vector<vector<Candidate*> >& D,
-                           const vector<string>& node_states,
+                           const FFStates& node_states,
                            const ModelSet& models,
                            const bool is_goal) {
     const Hypergraph::Edge& in_edge = *in_edge_;
@@ -97,7 +97,7 @@ struct Candidate {
     prob_t edge_estimate = prob_t::One();
     if (is_goal) {
       assert(tail.size() == 1);
-      const string& ant_state = node_states[tail.front()];
+      const FFState& ant_state = node_states[tail.front()];
       models.AddFinalFeatures(ant_state, &out_edge_, smeta);
     } else {
       models.AddFeaturesToEdge(smeta, out_hg, node_states, &out_edge_, &state_, &edge_estimate);
@@ -154,7 +154,7 @@ struct CandidateUniquenessEquals {
 };
 
 typedef unordered_set<const Candidate*, CandidateUniquenessHash, CandidateUniquenessEquals> UniqueCandidateSet;
-typedef unordered_map<string, Candidate*, boost::hash<string> > State2Node;
+typedef unordered_map<FFState, Candidate*, boost::hash<FFState> > State2Node;
 
 class CubePruningRescorer {
 
@@ -304,7 +304,7 @@ public:
   vector<CandidateList> D;   // maps nodes in in-HG to the
                              // equivalent nodes (many due to state
                              // splits) in the out-HG.
-  vector<string> node_states_;  // for each node in the out-HG what is
+  FFStates node_states_;  // for each node in the out-HG what is
                              // its q function value?
   const int pop_limit_;
 };
@@ -320,7 +320,7 @@ struct NoPruningRescorer {
     node_states_.reserve(kRESERVE_NUM_NODES);
   }
 
-  typedef unordered_map<string, int, boost::hash<string> > State2NodeIndex;
+  typedef unordered_map<FFState, int, boost::hash<FFState> > State2NodeIndex;
 
   void ExpandEdge(const Hypergraph::Edge& in_edge, bool is_goal, State2NodeIndex* state2node) {
     const int arity = in_edge.Arity();
@@ -335,10 +335,10 @@ struct NoPruningRescorer {
       for (int i = 0; i < arity; ++i)
         tail[i] = nodemap[in_edge.tail_nodes_[i]][tail_iter[i]];
       Hypergraph::Edge* new_edge = out.AddEdge(in_edge, tail);
-      string head_state;
+      FFState head_state;
       if (is_goal) {
         assert(tail.size() == 1);
-        const string& ant_state = node_states_[tail.front()];
+        const FFState& ant_state = node_states_[tail.front()];
         models.AddFinalFeatures(ant_state, new_edge,smeta);
       } else {
         prob_t edge_estimate; // this is a full intersection, so we disregard this
@@ -394,7 +394,7 @@ struct NoPruningRescorer {
   Hypergraph& out;
 
   vector<vector<int> > nodemap;
-  vector<string> node_states_;  // for each node in the out-HG what is
+  FFStates node_states_;  // for each node in the out-HG what is
                              // its q function value?
 };
 
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 6b5543f8..a4c3613b 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -46,6 +46,7 @@ using namespace std::tr1;
 using boost::shared_ptr;
 namespace po = boost::program_options;
 
+vector<string> cfg_files;
 bool show_config=false;
 bool show_weights=false;
 bool verbose_feature_functions=true;
@@ -147,7 +148,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
         ("scfg_no_hiero_glue_grammar,n", "No Hiero glue grammar (nb. by default the SCFG decoder adds Hiero glue rules)")
         ("scfg_default_nt,d",po::value<string>()->default_value("X"),"Default non-terminal symbol in SCFG")
         ("scfg_max_span_limit,S",po::value<int>()->default_value(10),"Maximum non-terminal span limit (except \"glue\" grammar)")
-    ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files.  note: this will have to appear before the -c argument to take effect")
+    ("show_config", po::bool_switch(&show_config), "show contents of loaded -c config files.")
     ("show_weights", po::bool_switch(&show_weights), "show effective feature weights")
         ("show_joshua_visualization,J", "Produce output compatible with the Joshua visualization tools")
         ("show_tree_structure", "Show the Viterbi derivation structure")
@@ -187,7 +188,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
   cfg_options.AddOptions(&cfgo);
   po::options_description clo("Command line options");
   clo.add_options()
-    ("config,c", po::value<vector<string> >(), "Configuration file(s) - latest has priority")
+    ("config,c", po::value<vector<string> >(&cfg_files), "Configuration file(s) - latest has priority")
         ("help,h", "Print this help message and exit")
     ("usage,u", po::value<string>(), "Describe a feature function type")
     ("compgen", "Print just option names suitable for bash command line completion builtin 'compgen'")
@@ -205,19 +206,33 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
     exit(0);
   }
   ShowBanner();
+  if (conf.count("show_config")) // special handling needed because we only want to notify() once.
+    show_config=true;
   if (conf.count("config")) {
     typedef vector<string> Cs;
     Cs cs=conf["config"].as<Cs>();
     for (int i=0;i<cs.size();++i) {
       string cfg=cs[i];
       cerr << "Configuration file: " << cfg << endl;
-      if (show_config)
-        CopyFile(cfg,cerr);
       ReadFile conff(cfg);
       po::store(po::parse_config_file(*conff, dconfig_options), conf);
     }
   }
   po::notify(conf);
+  if (show_config && !cfg_files.empty()) {
+    cerr<< "\nConfig files:\n\n";
+    for (int i=0;i<cfg_files.size();++i) {
+      string cfg=cfg_files[i];
+      cerr << "Configuration file: " << cfg << endl;
+      CopyFile(cfg,cerr);
+      cerr << "(end config "<<cfg<<"\n\n";
+    }
+    cerr <<"Command line:";
+    for (int i=0;i<argc;++i)
+      cerr<<" "<<argv[i];
+    cerr << "\n\n";
+  }
+
 
   if (conf.count("list_feature_functions")) {
     cerr << "Available feature functions (specify with -F; describe with -u FeatureName):\n";
diff --git a/decoder/decode.sh b/decoder/decode.sh
new file mode 100755
index 00000000..13cc6620
--- /dev/null
+++ b/decoder/decode.sh
@@ -0,0 +1,10 @@
+d=$(dirname `readlink -f $0`)/
+decode() {
+if [ "$lm" ] ; then
+    lmargs0=-F
+    lmargs1="LanguageModel lm.gz -n LM"
+fi
+set -x
+$gdb $d/cdec -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@"
+set +x
+}
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 68249b42..7bdd21e3 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -165,13 +165,13 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
 
 void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
                                  const Hypergraph& /* hg */,
-                                 const vector<string>& node_states,
+                                 const FFStates& node_states,
                                  Hypergraph::Edge* edge,
-                                 string* context,
+                                 FFState* context,
                                  prob_t* combination_cost_estimate) const {
   edge->reset_info();
   context->resize(state_size_);
-  memset(&(*context)[0], 0, state_size_); //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use SmallVector<char>?  ValueArray? (higher performance perhaps, fixed size)
+  memset(&(*context)[0], 0, state_size_);
   SparseVector<double> est_vals;  // only computed if combination_cost_estimate is non-NULL
   if (combination_cost_estimate) *combination_cost_estimate = prob_t::One();
   for (int i = 0; i < models_.size(); ++i) {
@@ -193,7 +193,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
   edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
 }
 
-void ModelSet::AddFinalFeatures(const std::string& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
+void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
   assert(1 == edge->rule_->Arity());
   edge->reset_info();
   for (int i = 0; i < models_.size(); ++i) {
diff --git a/decoder/ff.h b/decoder/ff.h
index fe4411cd..726845c4 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -14,6 +14,7 @@
 #include "fdict.h"
 #include "hg.h"
 #include "feature_vector.h"
+#include "value_array.h"
 
 class SentenceMetadata;
 class FeatureFunction;  // see definition below
@@ -242,6 +243,8 @@ void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weight
   return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt);
 }
 
+typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation.  use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
+typedef std::vector<FFState> FFStates;
 
 // this class is a set of FeatureFunctions that can be used to score, rescore,
 // etc. a (translation?) forest
@@ -257,13 +260,13 @@ class ModelSet {
   // must be.  edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone
   void AddFeaturesToEdge(const SentenceMetadata& smeta,
                          const Hypergraph& hg,
-                         const std::vector<std::string>& node_states,
+                         const FFStates& node_states,
                          Hypergraph::Edge* edge,
-                         std::string* residual_context,
+                         FFState* residual_context,
                          prob_t* combination_cost_estimate = NULL) const;
 
   //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM))
-  void AddFinalFeatures(const std::string& residual_context,
+  void AddFinalFeatures(const FFState& residual_context,
                         Hypergraph::Edge* edge,
                         SentenceMetadata const& smeta) const;
 
diff --git a/decoder/fsa-decode.sh b/decoder/fsa-decode.sh
index c53867e3..66879523 100755
--- a/decoder/fsa-decode.sh
+++ b/decoder/fsa-decode.sh
@@ -1,2 +1,3 @@
 d=$(dirname `readlink -f $0`)/
-$gdb $d/cdec -c $d/cdec-fsa.ini -i $d/1dev.ur "$@"
+. $d/decode.sh
+in=1dev.ur cfg=cdec-fsa decode
diff --git a/decoder/perro.sh b/decoder/perro.sh
index 836ad07f..3e54ac71 100755
--- a/decoder/perro.sh
+++ b/decoder/perro.sh
@@ -1 +1 @@
-$gdb $cdec -k 30 --show_features -c fsa-hiero.ini -i perro.ur "$@"
+$gdb $cdec "$@" -k 30 --show_features -c fsa-hiero.ini -i perro.ur 
diff --git a/decoder/value_array.h b/decoder/value_array.h
deleted file mode 100755
index cdf1d697..00000000
--- a/decoder/value_array.h
+++ /dev/null
@@ -1,241 +0,0 @@
-#ifndef VALUE_ARRAY_H
-#define VALUE_ARRAY_H
-
-//TODO: option for non-constructed version (type_traits pod?), option for small array optimization (if sz < N, store inline in union, see small_vector.h)
-
-#include <cstdlib>
-#include <algorithm>
-#include <new>
-#include <boost/range.hpp>
-#include <boost/utility/enable_if.hpp>
-#include <boost/type_traits.hpp>
-#include <cstring>
-#ifdef USE_BOOST_SERIALIZE
-# include <boost/serialization/split_member.hpp>
-# include <boost/serialization/access.hpp>
-#endif
-
-//TODO: use awesome type traits (and/or policy typelist argument) to provide these only when possible?
-#define VALUE_ARRAY_ADD 1
-#define VALUE_ARRAY_MUL 1
-#define VALUE_ARRAY_BITWISE 0
-#define VALUE_ARRAY_OSTREAM 1
-
-#if VALUE_ARRAY_OSTREAM
-# include <iostream>
-#endif
-
-// valarray like in that size is fixed (so saves space compared to vector), but same interface as vector (less resize/push_back/insert, of course)
-template <class T, class A = std::allocator<T> >
-class ValueArray : A // private inheritance so stateless allocator adds no size.
-{
-  typedef ValueArray<T,A> Self;
-public:
-#if VALUE_ARRAY_OSTREAM
-  friend inline std::ostream & operator << (std::ostream &o,Self const& s) {
-    o<<'[';
-    for (unsigned i=0,e=s.size();i<e;++i) {
-        if (i) o<<' ';
-        o<<s[i];
-    }
-    o<<']';
-    return o;
-  }
-#endif
-  static const int SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1;
-  //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data.  todo in the far future if bored.
-  typedef T value_type;
-  typedef T& reference;
-  typedef T const& const_reference;
-  typedef T* iterator;
-  typedef T const* const_iterator;
-  typedef std::size_t size_type;
-  typedef std::ptrdiff_t difference_type;
-  typedef T* pointer;
-
-  size_type size() const { return sz; }
-  bool empty() const { return size() == 0; }
-
-  iterator begin() { return array; }
-  iterator end() { return array + size(); }
-  const_iterator begin() const { return array; }
-  const_iterator end() const { return array + size(); }
-
-  reference operator[](size_type pos) { return array[pos]; }
-  const_reference operator[](size_type pos) const { return array[pos]; }
-
-  reference at(size_type pos) { return array[pos]; }
-  const_reference at(size_type pos) const { return array[pos]; }
-
-  reference front() { return array[0]; }
-  reference back() { return array[sz-1]; }
-
-  const_reference front() const { return array[0]; }
-  const_reference back() const { return array[sz-1]; }
-
-  ValueArray() : sz(0), array(NULL) {}
-
-  explicit ValueArray(size_type s, const_reference t = T())
-  {
-    init(s,t);
-  }
-
-protected:
-  inline void init(size_type s, const_reference t = T()) {
-    sz=s;
-    array=s ? A::allocate(s) : 0;
-    for (size_type i = 0; i != sz; ++i) { A::construct(array + i,t); }
-  }
-public:
-  void resize(size_type s, const_reference t = T()) {
-    clear();
-    init(s,t);
-  }
-
-  template <class I>
-  ValueArray(I itr, I end)
-    : sz(std::distance(itr,end))
-    , array(A::allocate(sz))
-  {
-    copy_construct(itr,end,array);
-  }
-
-  ~ValueArray() {
-    clear();
-  }
-
-#undef VALUE_ARRAY_OPEQ
-#define VALUE_ARRAY_OPEQ(op) template <class T2,class A2> Self & operator op (ValueArray<T2,A2> const& o) { assert(sz==o.sz); for (int i=0,e=sz;i<=e;++i) array[i] op o.array[i]; return *this; }
-#if VALUE_ARRAY_ADD
-  VALUE_ARRAY_OPEQ(+=)
-  VALUE_ARRAY_OPEQ(-=)
-#endif
-#if VALUE_ARRAY_MUL
-  VALUE_ARRAY_OPEQ(*=)
-  VALUE_ARRAY_OPEQ(/=)
-#endif
-#if VALUE_ARRAY_BITWISE
-  VALUE_ARRAY_OPEQ(|=)
-  VALUE_ARRAY_OPEQ(*=)
-#endif
-#undef VALUE_ARRAY_OPEQ
-#undef VALUE_ARRAY_BINOP
-#define VALUE_ARRAY_BINOP(op,opeq) template <class T2,class A2> friend inline Self operator op (Self x,ValueArray<T2,A2> const& y) { x opeq y; return x; }
-#if VALUE_ARRAY_ADD
-  VALUE_ARRAY_BINOP(+,+=)
-  VALUE_ARRAY_BINOP(-,-=)
-#endif
-#if VALUE_ARRAY_MUL
-  VALUE_ARRAY_BINOP(*,*=)
-  VALUE_ARRAY_BINOP(/,/=)
-#endif
-#if VALUE_ARRAY_BITWISE
-  VALUE_ARRAY_BINOP(|,|=)
-  VALUE_ARRAY_BINOP(*,*=)
-#endif
-
-#undef VALUE_ARRAY_BINOP
-  void clear()
-  {
-    for (size_type i = sz; i != 0; --i) {
-      A::destroy(array + (i - 1));
-    }
-    if (array != NULL) A::deallocate(array,sz);
-  }
-
-  void swap(ValueArray& other)
-  {
-    std::swap(sz,other.sz);
-    std::swap(array,other.array);
-  }
-
-  ValueArray(ValueArray const& other)
-    : A(other)
-    , sz(other.sz)
-    , array(A::allocate(sz))
-
-  {
-    copy_construct(other.begin(),other.end(),array);
-  }
-
-  ValueArray& operator=(ValueArray const& other)
-  {
-    ValueArray(other).swap(*this);
-    return *this;
-  }
-
-  template <class Range>
-  ValueArray( Range const& v
-              , typename boost::disable_if< boost::is_integral<Range> >::type* = 0)
-    : sz(boost::size(v))
-    , array(A::allocate(sz))
-  {
-    copy_construct(boost::begin(v),boost::end(v),array);
-  }
-
-  template <class Range> typename
-  boost::disable_if<
-    boost::is_integral<Range>
-   , ValueArray>::type& operator=(Range const& other)
-  {
-    ValueArray(other).swap(*this);
-    return *this;
-  }
-
-private:
-//friend class boost::serialization::access;
-
-template <class I1, class I2>
-void copy_construct(I1 itr, I1 end, I2 into)
-{
-  for (; itr != end; ++itr, ++into) A::construct(into,*itr);
-}
-
-template <class Archive>
-void save(Archive& ar, unsigned int version) const
-{
-  ar << sz;
-  for (size_type i = 0; i != sz; ++i) ar << at(i);
-}
-
-template <class Archive>
-void load(Archive& ar, unsigned int version)
-{
-  size_type s;
-  ar >> s;
-  ValueArray v(s);
-  for (size_type i = 0; i != s; ++i) ar >> v[i];
-  this->swap(v);
-}
-#ifdef USE_BOOST_SERIALIZE
-BOOST_SERIALIZATION_SPLIT_MEMBER()
-#endif
-size_type sz;
-pointer array;
-};
-
-
-template <class T, class A>
-bool operator==(ValueArray<T,A> const& v1, ValueArray<T,A> const& v2)
-{
-  return (v1.size() == v2.size()) and
-    std::equal(v1.begin(),v1.end(),v2.begin());
-}
-
-
-template <class T,class A>
-bool operator< (ValueArray<T,A> const& v1, ValueArray<T,A> const& v2)
-{
-  return std::lexicographical_compare( v1.begin()
-                                       , v1.end()
-                                       , v2.begin()
-                                       , v2.end() );
-}
-
-template <class T,class A>
-void memcpy(void *out,ValueArray<T,A> const& v) {
-  std::memcpy(out,v.begin(),v.size()*sizeof(T));
-}
-
-
-#endif
diff --git a/decoder/weights-fsa b/decoder/weights-fsa
index fe01d13a..3cc96c2f 100644
--- a/decoder/weights-fsa
+++ b/decoder/weights-fsa
@@ -3,6 +3,7 @@ Arity_1 1.12426238048012
 Arity_2 1.14986187839554
 Glue -0.04589037041388
 LanguageModel 1.09051
+LM 1.09051
 PassThrough -3.66226367902928
 PhraseModel_0 -1.94633451863252
 PhraseModel_1 -0.1475347695476
diff --git a/graehl/NOTES.earley b/graehl/NOTES.earley
index 6f94f898..4156063a 100755
--- a/graehl/NOTES.earley
+++ b/graehl/NOTES.earley
@@ -88,3 +88,20 @@ then X[k,i]->rY.s (a',b') with a' += a*b'', b' += b*b''
 ==========
 
 is forward cost viterbi fine?  i.e. can i have items whose names ignore the lhs NT (look up predictions that i finish lazily / graph structured?)
+======
+
+1) A -> x . * (trie)
+
+this is somewhat nice.  cost pushed for best first, of course.  similar benefit as left-branching binarization without the explicit predict/complete steps?
+
+vs. just
+
+2) * -> x . y
+
+here you have to potentially list out all A -> . x y as items * -> . x y immediately, and shared rhs seqs won't be shared except at the usual single-NT predict/complete.  of course, the prediction of items -> . x y can occur lazy best-first.
+
+vs.
+
+3) * -> x . *
+
+with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse.  this is not a good option at all.
diff --git a/utils/filelib.h b/utils/filelib.h
index b9fef9a7..dda98671 100644
--- a/utils/filelib.h
+++ b/utils/filelib.h
@@ -103,4 +103,18 @@ class WriteFile : public BaseFile<std::ostream> {
   }
 };
 
+inline void CopyFile(std::istream &in,std::ostream &out) {
+  out << in.rdbuf();
+}
+
+inline void CopyFile(std::string const& inf,std::ostream &out) {
+  ReadFile r(inf);
+  CopyFile(*r,out);
+}
+
+inline void CopyFile(std::string const& inf,std::string const& outf) {
+  WriteFile w(outf);
+  CopyFile(inf,*w);
+}
+
 #endif
diff --git a/utils/value_array.h b/utils/value_array.h
new file mode 100755
index 00000000..cdf1d697
--- /dev/null
+++ b/utils/value_array.h
@@ -0,0 +1,241 @@
+#ifndef VALUE_ARRAY_H
+#define VALUE_ARRAY_H
+
+//TODO: option for non-constructed version (type_traits pod?), option for small array optimization (if sz < N, store inline in union, see small_vector.h)
+
+#include <cstdlib>
+#include <algorithm>
+#include <new>
+#include <boost/range.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/type_traits.hpp>
+#include <cstring>
+#ifdef USE_BOOST_SERIALIZE
+# include <boost/serialization/split_member.hpp>
+# include <boost/serialization/access.hpp>
+#endif
+
+//TODO: use awesome type traits (and/or policy typelist argument) to provide these only when possible?
+#define VALUE_ARRAY_ADD 1
+#define VALUE_ARRAY_MUL 1
+#define VALUE_ARRAY_BITWISE 0
+#define VALUE_ARRAY_OSTREAM 1
+
+#if VALUE_ARRAY_OSTREAM
+# include <iostream>
+#endif
+
+// valarray like in that size is fixed (so saves space compared to vector), but same interface as vector (less resize/push_back/insert, of course)
+template <class T, class A = std::allocator<T> >
+class ValueArray : A // private inheritance so stateless allocator adds no size.
+{
+  typedef ValueArray<T,A> Self;
+public:
+#if VALUE_ARRAY_OSTREAM
+  friend inline std::ostream & operator << (std::ostream &o,Self const& s) {
+    o<<'[';
+    for (unsigned i=0,e=s.size();i<e;++i) {
+        if (i) o<<' ';
+        o<<s[i];
+    }
+    o<<']';
+    return o;
+  }
+#endif
+  static const int SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1;
+  //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data.  todo in the far future if bored.
+  typedef T value_type;
+  typedef T& reference;
+  typedef T const& const_reference;
+  typedef T* iterator;
+  typedef T const* const_iterator;
+  typedef std::size_t size_type;
+  typedef std::ptrdiff_t difference_type;
+  typedef T* pointer;
+
+  size_type size() const { return sz; }
+  bool empty() const { return size() == 0; }
+
+  iterator begin() { return array; }
+  iterator end() { return array + size(); }
+  const_iterator begin() const { return array; }
+  const_iterator end() const { return array + size(); }
+
+  reference operator[](size_type pos) { return array[pos]; }
+  const_reference operator[](size_type pos) const { return array[pos]; }
+
+  reference at(size_type pos) { return array[pos]; }
+  const_reference at(size_type pos) const { return array[pos]; }
+
+  reference front() { return array[0]; }
+  reference back() { return array[sz-1]; }
+
+  const_reference front() const { return array[0]; }
+  const_reference back() const { return array[sz-1]; }
+
+  ValueArray() : sz(0), array(NULL) {}
+
+  explicit ValueArray(size_type s, const_reference t = T())
+  {
+    init(s,t);
+  }
+
+protected:
+  inline void init(size_type s, const_reference t = T()) {
+    sz=s;
+    array=s ? A::allocate(s) : 0;
+    for (size_type i = 0; i != sz; ++i) { A::construct(array + i,t); }
+  }
+public:
+  void resize(size_type s, const_reference t = T()) {
+    clear();
+    init(s,t);
+  }
+
+  template <class I>
+  ValueArray(I itr, I end)
+    : sz(std::distance(itr,end))
+    , array(A::allocate(sz))
+  {
+    copy_construct(itr,end,array);
+  }
+
+  ~ValueArray() {
+    clear();
+  }
+
+#undef VALUE_ARRAY_OPEQ
+#define VALUE_ARRAY_OPEQ(op) template <class T2,class A2> Self & operator op (ValueArray<T2,A2> const& o) { assert(sz==o.sz); for (int i=0,e=sz;i<=e;++i) array[i] op o.array[i]; return *this; }
+#if VALUE_ARRAY_ADD
+  VALUE_ARRAY_OPEQ(+=)
+  VALUE_ARRAY_OPEQ(-=)
+#endif
+#if VALUE_ARRAY_MUL
+  VALUE_ARRAY_OPEQ(*=)
+  VALUE_ARRAY_OPEQ(/=)
+#endif
+#if VALUE_ARRAY_BITWISE
+  VALUE_ARRAY_OPEQ(|=)
+  VALUE_ARRAY_OPEQ(*=)
+#endif
+#undef VALUE_ARRAY_OPEQ
+#undef VALUE_ARRAY_BINOP
+#define VALUE_ARRAY_BINOP(op,opeq) template <class T2,class A2> friend inline Self operator op (Self x,ValueArray<T2,A2> const& y) { x opeq y; return x; }
+#if VALUE_ARRAY_ADD
+  VALUE_ARRAY_BINOP(+,+=)
+  VALUE_ARRAY_BINOP(-,-=)
+#endif
+#if VALUE_ARRAY_MUL
+  VALUE_ARRAY_BINOP(*,*=)
+  VALUE_ARRAY_BINOP(/,/=)
+#endif
+#if VALUE_ARRAY_BITWISE
+  VALUE_ARRAY_BINOP(|,|=)
+  VALUE_ARRAY_BINOP(*,*=)
+#endif
+
+#undef VALUE_ARRAY_BINOP
+  void clear()
+  {
+    for (size_type i = sz; i != 0; --i) {
+      A::destroy(array + (i - 1));
+    }
+    if (array != NULL) A::deallocate(array,sz);
+  }
+
+  void swap(ValueArray& other)
+  {
+    std::swap(sz,other.sz);
+    std::swap(array,other.array);
+  }
+
+  ValueArray(ValueArray const& other)
+    : A(other)
+    , sz(other.sz)
+    , array(A::allocate(sz))
+
+  {
+    copy_construct(other.begin(),other.end(),array);
+  }
+
+  ValueArray& operator=(ValueArray const& other)
+  {
+    ValueArray(other).swap(*this);
+    return *this;
+  }
+
+  template <class Range>
+  ValueArray( Range const& v
+              , typename boost::disable_if< boost::is_integral<Range> >::type* = 0)
+    : sz(boost::size(v))
+    , array(A::allocate(sz))
+  {
+    copy_construct(boost::begin(v),boost::end(v),array);
+  }
+
+  template <class Range> typename
+  boost::disable_if<
+    boost::is_integral<Range>
+   , ValueArray>::type& operator=(Range const& other)
+  {
+    ValueArray(other).swap(*this);
+    return *this;
+  }
+
+private:
+//friend class boost::serialization::access;
+
+template <class I1, class I2>
+void copy_construct(I1 itr, I1 end, I2 into)
+{
+  for (; itr != end; ++itr, ++into) A::construct(into,*itr);
+}
+
+template <class Archive>
+void save(Archive& ar, unsigned int version) const
+{
+  ar << sz;
+  for (size_type i = 0; i != sz; ++i) ar << at(i);
+}
+
+template <class Archive>
+void load(Archive& ar, unsigned int version)
+{
+  size_type s;
+  ar >> s;
+  ValueArray v(s);
+  for (size_type i = 0; i != s; ++i) ar >> v[i];
+  this->swap(v);
+}
+#ifdef USE_BOOST_SERIALIZE
+BOOST_SERIALIZATION_SPLIT_MEMBER()
+#endif
+size_type sz;
+pointer array;
+};
+
+
+template <class T, class A>
+bool operator==(ValueArray<T,A> const& v1, ValueArray<T,A> const& v2)
+{
+  return (v1.size() == v2.size()) and
+    std::equal(v1.begin(),v1.end(),v2.begin());
+}
+
+
+template <class T,class A>
+bool operator< (ValueArray<T,A> const& v1, ValueArray<T,A> const& v2)
+{
+  return std::lexicographical_compare( v1.begin()
+                                       , v1.end()
+                                       , v2.begin()
+                                       , v2.end() );
+}
+
+template <class T,class A>
+void memcpy(void *out,ValueArray<T,A> const& v) {
+  std::memcpy(out,v.begin(),v.size()*sizeof(T));
+}
+
+
+#endif
-- 
cgit v1.2.3