merge with upstream

author: Patrick Simianer <p@simianer.de> 2012-03-13 09:24:47 +0100
committer: Patrick Simianer <p@simianer.de> 2012-03-13 09:24:47 +0100
commit: c3a9ea64251605532c7954959662643a6a927bb7 (patch)
tree: fed6048a5acdaf3834740107771c2bc48f26fd4d /decoder
parent: 867bca3e5fa0cdd63bf032e5859fb5092d9a4ca1 (diff)
parent: a45af4a3704531a8382cd231f6445b3a33b598a3 (diff)
47 files changed, 247 insertions, 303 deletions
diff --git a/decoder/1dev.ur b/decoder/1dev.ur
deleted file mode 100755
index adeaa101..00000000
--- a/decoder/1dev.ur
+++ /dev/null
@@ -1 +0,0 @@
-krAcy ( AstRAf rpwrtRr ) krAcy myN pyr kw mxtlf HAdvAt myN xAtwn smyt 4 AfrAd hlAk hw gyY jbkh smndr sY Ayk $xS ky lA$ mly .
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 30eaf04d..ec51d643 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -63,6 +63,7 @@ libcdec_a_SOURCES = \
   ff.cc \
   ff_rules.cc \
   ff_wordset.cc \
+  ff_context.cc \
   ff_charset.cc \
   ff_lm.cc \
   ff_klm.cc \
@@ -75,7 +76,6 @@ libcdec_a_SOURCES = \
   ff_source_syntax.cc \
   ff_bleu.cc \
   ff_factory.cc \
-  freqdict.cc \
   lexalign.cc \
   lextrans.cc \
   tagger.cc \
diff --git a/decoder/apply_fsa_models.README b/decoder/apply_fsa_models.README
deleted file mode 100755
index 7e116a62..00000000
--- a/decoder/apply_fsa_models.README
+++ /dev/null
@@ -1,21 +0,0 @@
-trie root and trie lhs2[lhs-nodeid] -> trie node
-
-trie node edges (adj) - list of w,dest,p.  dest==0 means it's a completed rule (note: p is redundant with node e.dest->p-p, except in case of dest=0).  we will also use null_wordid (max_int) for dest=0 edges, but that doesn't matter
-
-we intersect by iterating over adj and scoring w/ fsa.  TODO: index for sparse fsa; for now we assume smoothed ngram fsa where all items are scorable.
-
-predicted items: we don't make copies of the pending predictions as we scan toward completion; instead, item backpointers are followed until the prediction (where backpointer=0).  such backpointer=0 items have a queue of prediction-originating items.
-
-reusing completed items using a lookup on pair [NT,a] -> all [NT,a,b] lazy best-first.  b-next (right state) index in lazy index.
-
-perhaps predictors need to register the # of items it has already mated with. (b-next index)
-
-comb-like (cube) t-next (position in trie node edge list), b-next?  or just check chart and don't redup.  depends on whether we want just 1best or kbest deriv - diff. ways of reaching same result are good in kbest.
-
-types of chart items:
-
-A->t.*,a,b (trie node t) with mutable state t-next for generating successor lazily (vs. all at once)
-
-A->t.B,a,b (t-next of A->t.* points to (B,t')): mutable state b-next for choosing which B->b,? to use.  note: such an item can't be queued immediately on its own, but can be added to the pending list of B->b,? ; once any B->b,? is completed then we see if any more b-next are already known; if they're exhausted then we add back to pending list?
-
-A->a,? - list of all known (b,inside prob) such that A[a,b].  we may also choose to represent this as A->.*,a,a.
diff --git a/decoder/apply_fsa_models.h b/decoder/apply_fsa_models.h
index 6561c70c..6561c70c 100755..100644
--- a/decoder/apply_fsa_models.h
+++ b/decoder/apply_fsa_models.h
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 40fd27e4..9ba59d1b 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -270,7 +270,8 @@ public:
       const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
       const JVector j(edge.tail_nodes_.size(), 0);
       cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
-      assert(unique_cands.insert(cand.back()).second);  // these should all be unique!
+      bool is_new = unique_cands.insert(cand.back()).second;
+      assert(is_new);  // these should all be unique!
     }
 //    cerr << "  making heap of " << cand.size() << " candidates\n";
     make_heap(cand.begin(), cand.end(), HeapCandCompare());
@@ -378,7 +379,8 @@ public:
 		  pop_heap(cand.begin(), cand.end(), HeapCandCompare());
 		  Candidate* item = cand.back();
 		  cand.pop_back();
-		  assert(unique_accepted.insert(item).second); // these should all be unique!
+                  bool is_new = unique_accepted.insert(item).second;
+		  assert(is_new); // these should all be unique!
 		  // cerr << "POPPED: " << *item << endl;
 
 		  PushSuccFast2(*item, is_goal, &cand, &unique_accepted);
@@ -419,7 +421,8 @@ public:
           Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
           cand.push_back(new_cand);
           push_heap(cand.begin(), cand.end(), HeapCandCompare());
-          assert(cs->insert(new_cand).second);  // insert into uniqueness set, sanity check
+          bool is_new = cs->insert(new_cand).second;
+          assert(is_new);  // insert into uniqueness set, sanity check
         }
       }
     }
diff --git a/decoder/cdec-gz.ini b/decoder/cdec-gz.ini
deleted file mode 100755
index f9b15420..00000000
--- a/decoder/cdec-gz.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-cubepruning_pop_limit=200
-feature_function=WordPenalty
-feature_function=ArityPenalty
-add_pass_through_rules=true
-formalism=scfg
-grammar=mt09.grammar.gz
-weights=weights.tune.nolm
diff --git a/decoder/cdec-nolm-tuned.ini b/decoder/cdec-nolm-tuned.ini
deleted file mode 100755
index 5ebab747..00000000
--- a/decoder/cdec-nolm-tuned.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-cubepruning_pop_limit=200
-feature_function=WordPenalty
-feature_function=ArityPenalty
-add_pass_through_rules=true
-formalism=scfg
-grammar=mt09.grammar
-weights=weights.tune.nolm
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 4ce5749e..b516c386 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -1,6 +1,7 @@
 #include <boost/shared_ptr.hpp>
 
 #include "ff.h"
+#include "ff_context.h"
 #include "ff_spans.h"
 #include "ff_lm.h"
 #include "ff_klm.h"
@@ -42,6 +43,7 @@ void register_feature_functions() {
 #endif
   ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());
   ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());
+  ff_registry.Register("RuleContextFeatures", new FFFactory<RuleContextFeatures>());
   ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>());
   ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>);
   ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>);
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index cd7e66e9..cd7e66e9 100755..100644
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 8cb29bb9..8cb29bb9 100755..100644
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
diff --git a/decoder/cfg_binarize.h b/decoder/cfg_binarize.h
index ae06f8bf..ae06f8bf 100755..100644
--- a/decoder/cfg_binarize.h
+++ b/decoder/cfg_binarize.h
diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h
index 2f40d483..2f40d483 100755..100644
--- a/decoder/cfg_format.h
+++ b/decoder/cfg_format.h
diff --git a/decoder/cfg_options.h b/decoder/cfg_options.h
index 7b59c05c..7b59c05c 100755..100644
--- a/decoder/cfg_options.h
+++ b/decoder/cfg_options.h
diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index c61f9f2c..c61f9f2c 100755..100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
diff --git a/decoder/decode.sh b/decoder/decode.sh
deleted file mode 100755
index 677e64ad..00000000
--- a/decoder/decode.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-d=$(dirname `readlink -f $0`)/
-decode() {
-if [ "$lm" ] ; then
-    lmargs0=-F
-    lmargs1="LanguageModel lm.gz -n LM"
-fi
-set -x
-$gdb ${cdec:=$d/cdec} -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@"
-set +x
-}
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index b93925d1..53c47d21 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -408,6 +408,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("show_partition,z", "Compute and show the partition (inside score)")
         ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")
         ("show_cfg_search_space", "Show the search space as a CFG")
+        ("show_target_graph", "Output the target hypergraph")
         ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")
         ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")
         ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse")
@@ -815,6 +816,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     abort();
   }
 
+  if (conf.count("show_target_graph"))
+    HypergraphIO::WriteTarget(forest);
+
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     const RescoringPass& rp = rescoring_passes[pass];
     const vector<weight_t>& cur_weights = *rp.weight_vector;
diff --git a/decoder/do.tests.sh b/decoder/do.tests.sh
deleted file mode 100755
index b3ddeb18..00000000
--- a/decoder/do.tests.sh
+++ /dev/null
@@ -1 +0,0 @@
-for f in *_test; do ./$f; done
diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc
index 48e94a31..b7af801a 100644
--- a/decoder/earley_composer.cc
+++ b/decoder/earley_composer.cc
@@ -329,7 +329,10 @@ class EarleyComposerImpl {
     forest->ReserveNodes(kMAX_NODES);
     assert(sit != g.end());
     Edge* init = new Edge(start_cat_, &sit->second, q_0_);
-    assert(IncorporateNewEdge(init));
+    if (!IncorporateNewEdge(init)) {
+      cerr << "Failed to create initial edge!\n";
+      abort();
+    }
     while (exp_agenda.HasWork() || agenda.HasWork()) {
       while(exp_agenda.HasWork()) {
         const Edge* edge = exp_agenda.Next();
diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc
new file mode 100644
index 00000000..19f9a413
--- /dev/null
+++ b/decoder/ff_context.cc
@@ -0,0 +1,99 @@
+#include "ff_context.h"
+
+#include <sstream>
+#include <cassert>
+#include <cmath>
+
+#include "filelib.h"
+#include "stringlib.h"
+#include "sentence_metadata.h"
+#include "lattice.h"
+#include "fdict.h"
+#include "verbose.h"
+
+using namespace std;
+
+namespace {
+  string Escape(const string& x) {
+    string y = x;
+    for (int i = 0; i < y.size(); ++i) {
+      if (y[i] == '=') y[i]='_';
+      if (y[i] == ';') y[i]='_';
+    }
+    return y;
+  }
+}
+
+RuleContextFeatures::RuleContextFeatures(const std::string& param) {
+  kSOS = TD::Convert("<s>");
+  kEOS = TD::Convert("</s>");
+
+  // TODO param lets you pass in a string from the cdec.ini file
+}
+
+void RuleContextFeatures::PrepareForInput(const SentenceMetadata& smeta) {
+  const Lattice& sl = smeta.GetSourceLattice();
+  current_input.resize(sl.size());
+  for (unsigned i = 0; i < sl.size(); ++i) {
+    if (sl[i].size() != 1) {
+      cerr << "Context features not supported with lattice inputs!\nid=" << smeta.GetSentenceId() << endl;
+      abort();
+    }
+    current_input[i] = sl[i][0].label;
+  }
+}
+
+void RuleContextFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                                const Hypergraph::Edge& edge,
+                                                const vector<const void*>& ant_contexts,
+                                                SparseVector<double>* features,
+                                                SparseVector<double>* estimated_features,
+                                                void* context) const {
+  const TRule& rule = *edge.rule_;
+
+  if (rule.Arity() != 0 || // arity = 0, no nonterminals
+      rule.e_.size() != 1) return; // size = 1, predicted label is a single token
+
+
+  // you can see the current label "for free"
+  const WordID cur_label = rule.e_[0];
+  // (if you want to see more labels, you have to be very careful, and muck
+  //  about with contexts and ant_contexts)
+
+  // but... you can look at as much of the source as you want!
+  const int from_src_index = edge.i_;   // start of the span in the input being labeled
+  const int to_src_index = edge.j_;     // end of the span in the input
+  // (note: in the case of tagging the size of the spans being labeled will
+  //  always be 1, but in other formalisms, you can have bigger spans.)
+
+  // this is the current token being labeled:
+  const WordID cur_input = current_input[from_src_index];
+
+  // let's get the previous token in the input (may be to the left of the start
+  // of the sentence!)
+  WordID prev_input = kSOS;
+  if (from_src_index > 0) { prev_input = current_input[from_src_index - 1]; }
+  // let's get the next token (may be to the left of the start of the sentence!)
+  WordID next_input = kEOS;
+  if (to_src_index < current_input.size()) { next_input = current_input[to_src_index]; }
+
+  // now, build a feature string
+  ostringstream os;
+  // TD::Convert converts from the internal integer representation of a token
+  // to the actual token
+  os << "C1:" << TD::Convert(prev_input) << '_' 
+     << TD::Convert(cur_input) << '|' << TD::Convert(cur_label);
+  // C1 is just to prevent a name clash
+
+  // pick a value
+  double fval = 1.0; // can be any real value
+
+  // add it to the feature vector FD::Convert converts the feature string to a
+  // feature int, Escape makes sure the feature string doesn't have any bad
+  // symbols that could confuse a parser somewhere
+  features->add_value(FD::Convert(Escape(os.str())), fval);
+  // that's it!
+
+  // create more features if you like...
+}
+
diff --git a/decoder/ff_context.h b/decoder/ff_context.h
new file mode 100644
index 00000000..0d22b027
--- /dev/null
+++ b/decoder/ff_context.h
@@ -0,0 +1,23 @@
+#ifndef _FF_CONTEXT_H_
+#define _FF_CONTEXT_H_
+
+#include <vector>
+#include "ff.h"
+
+class RuleContextFeatures : public FeatureFunction {
+ public:
+  RuleContextFeatures(const std::string& param);
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const Hypergraph::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+  virtual void PrepareForInput(const SentenceMetadata& smeta);
+ private:
+  std::vector<WordID> current_input;
+  WordID kSOS, kEOS;
+};
+
+#endif
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index 3991d38f..c9ed996c 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -72,7 +72,7 @@ struct BasicCSplitFeaturesImpl {
   const int fl1_;
   const int fl2_;
   const int bad_;
-  FreqDict freq_dict_;
+  FreqDict<float> freq_dict_;
   set<WordID> bad_words_;
 };
 
diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc
index 04dd1906..d6d79f5e 100644
--- a/decoder/ff_ngrams.cc
+++ b/decoder/ff_ngrams.cc
@@ -57,6 +57,39 @@ namespace {
   }
 }
 
+static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order) {
+  vector<string> const& argv=SplitOnWhitespace(in);
+  *explicit_markers = false;
+  *order = 3;
+#define LMSPEC_NEXTARG if (i==argv.end()) {            \
+    cerr << "Missing argument for "<<*last<<". "; goto usage; \
+    } else { ++i; }
+
+  for (vector<string>::const_iterator last,i=argv.begin(),e=argv.end();i!=e;++i) {
+    string const& s=*i;
+    if (s[0]=='-') {
+      if (s.size()>2) goto fail;
+      switch (s[1]) {
+      case 'x':
+        *explicit_markers = true;
+        break;
+      case 'o':
+        LMSPEC_NEXTARG; *order=atoi((*i).c_str());
+        break;
+#undef LMSPEC_NEXTARG
+      default:
+      fail:
+        cerr<<"Unknown option on NgramFeatures "<<s<<" ; ";
+        goto usage;
+      }
+    }
+  }
+  return true;
+usage:
+  cerr << "NgramFeatures is incorrect!\n";
+  return false;
+}
+
 class NgramDetectorImpl {
 
   // returns the number of unscored words at the left edge of a span
@@ -264,10 +297,10 @@ class NgramDetectorImpl {
   }
 
  public:
-  explicit NgramDetectorImpl(bool explicit_markers) :
+  explicit NgramDetectorImpl(bool explicit_markers, unsigned order) :
       kCDEC_UNK(TD::Convert("<unk>")) ,
       add_sos_eos_(!explicit_markers) {
-    order_ = 3;
+    order_ = order;
     state_size_ = (order_ - 1) * sizeof(WordID) + 2 + (order_ - 1) * sizeof(WordID);
     unscored_size_offset_ = (order_ - 1) * sizeof(WordID);
     is_complete_offset_ = unscored_size_offset_ + 1;
@@ -316,8 +349,10 @@ class NgramDetectorImpl {
 
 NgramDetector::NgramDetector(const string& param) {
   string filename, mapfile, featname;
-  bool explicit_markers = (param == "-x");
-  pimpl_ = new NgramDetectorImpl(explicit_markers);
+  bool explicit_markers = false;
+  unsigned order = 3;
+  ParseArgs(param, &explicit_markers, &order);
+  pimpl_ = new NgramDetectorImpl(explicit_markers, order);
   SetStateSize(pimpl_->ReserveStateSize());
 }
 
diff --git a/decoder/ff_register.h b/decoder/ff_register.h
index 80b1457e..80b1457e 100755..100644
--- a/decoder/ff_register.h
+++ b/decoder/ff_register.h
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 74d71b6a..74d71b6a 100755..100644
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
diff --git a/decoder/freqdict.cc b/decoder/freqdict.cc
deleted file mode 100644
index 9e25d346..00000000
--- a/decoder/freqdict.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <cassert>
-#include "freqdict.h"
-#include "tdict.h"
-#include "filelib.h"
-
-using namespace std;
-
-void FreqDict::Load(const std::string& fname) {
-  cerr << "Reading word frequencies: " << fname << endl;
-  ReadFile rf(fname);
-  istream& ifs = *rf.stream();
-  int cc=0;
-  while (ifs) {
-    std::string word;
-    ifs >> word;
-    if (word.size() == 0) continue;
-    if (word[0] == '#') continue;
-    double count = 0;
-    ifs >> count;
-    assert(count > 0.0);  // use -log(f)
-    counts_[TD::Convert(word)]=count;
-    ++cc;
-    if (cc % 10000 == 0) { std::cerr << "."; }
-  }
-  std::cerr << "\n";
-  std::cerr << "Loaded " << cc << " words\n";
-}
diff --git a/decoder/freqdict.h b/decoder/freqdict.h
index 9acf0c33..4e03fadd 100644
--- a/decoder/freqdict.h
+++ b/decoder/freqdict.h
@@ -1,20 +1,47 @@
 #ifndef _FREQDICT_H_
 #define _FREQDICT_H_
 
+#include <iostream>
 #include <map>
 #include <string>
 #include "wordid.h"
+#include "filelib.h"
+#include "tdict.h"
 
+template <typename T = float>
 class FreqDict {
  public:
-  void Load(const std::string& fname);
-  float LookUp(const WordID& word) const {
-    std::map<WordID,float>::const_iterator i = counts_.find(word);
-    if (i == counts_.end()) return 0;
+  FreqDict() : max_() {}
+  T Max() const { return max_; }
+  void Load(const std::string& fname) {
+    std::cerr << "Reading word statistics from: " << fname << std::endl;
+    ReadFile rf(fname);
+    std::istream& ifs = *rf.stream();
+    int cc=0;
+    std::string word;
+    while (ifs) {
+      ifs >> word;
+      if (word.size() == 0) continue;
+      if (word[0] == '#') continue;
+      T count = 0;
+      ifs >> count;
+      if (count > max_) max_ = count;
+      counts_[TD::Convert(word)]=count;
+      ++cc;
+      if (cc % 10000 == 0) { std::cerr << "."; }
+    }
+    std::cerr << "\n";
+    std::cerr << "Loaded " << cc << " words\n";
+  }
+
+  T LookUp(const WordID& word) const {
+    typename std::map<WordID,T>::const_iterator i = counts_.find(word);
+    if (i == counts_.end()) return T();
     return i->second;
   }
  private:
-  std::map<WordID, float> counts_;
+  T max_;
+  std::map<WordID, T> counts_;
 };
 
 #endif
diff --git a/decoder/fsa-decode.sh b/decoder/fsa-decode.sh
deleted file mode 100755
index 66879523..00000000
--- a/decoder/fsa-decode.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-d=$(dirname `readlink -f $0`)/
-. $d/decode.sh
-in=1dev.ur cfg=cdec-fsa decode
diff --git a/decoder/fsa-hiero.ini b/decoder/fsa-hiero.ini
deleted file mode 100755
index 7c7d0347..00000000
--- a/decoder/fsa-hiero.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-formalism=scfg
-scfg_extra_glue_grammar=glue-lda.scfg
-grammar=grammar.hiero
-show_tree_structure=true
-weights=weights.hiero
diff --git a/decoder/fsa.ini b/decoder/fsa.ini
deleted file mode 100755
index 571a2e34..00000000
--- a/decoder/fsa.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-feature_function=ShorterThanPrev
-feature_function=LongerThanPrev
diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc
index 38dbd717..074de4c9 100644
--- a/decoder/fst_translator.cc
+++ b/decoder/fst_translator.cc
@@ -30,7 +30,10 @@ struct FSTTranslatorImpl {
     if (input.find("{\"rules\"") == 0) {
       istringstream is(input);
       Hypergraph src_cfg_hg;
-      assert(HypergraphIO::ReadFromJSON(&is, &src_cfg_hg));
+      if (!HypergraphIO::ReadFromJSON(&is, &src_cfg_hg)) {
+        cerr << "Failed to read HG from JSON.\n";
+        abort();
+      }
       if (add_pass_through_rules) {
         SparseVector<double> feats;
         feats.set_value(FD::Convert("PassThrough"), 1);
diff --git a/decoder/glue-lda.scfg b/decoder/glue-lda.scfg
deleted file mode 100755
index 27489817..00000000
--- a/decoder/glue-lda.scfg
+++ /dev/null
@@ -1,8 +0,0 @@
-[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1
-[S] ||| [X0,1] ||| [1] ||| GlueTop=1
-[S] ||| [S,1] [X1,2] ||| [1] [2] ||| Glue=1
-[S] ||| [X1,1] ||| [1] ||| GlueTop=1
-[S] ||| [S,1] [X2,2] ||| [1] [2] ||| Glue=1
-[S] ||| [X2,1] ||| [1] ||| GlueTop=1
-[S] ||| [S,1] [X3,2] ||| [1] [2] ||| Glue=1
-[S] ||| [X3,1] ||| [1] ||| GlueTop=1
diff --git a/decoder/grammar.hiero b/decoder/grammar.hiero
deleted file mode 100755
index 79adf33a..00000000
--- a/decoder/grammar.hiero
+++ /dev/null
@@ -1,151 +0,0 @@
-[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato [X,2] ||| [1] [2] cat ||| EgivenF=0 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato [X,2] ||| [1] cat [2] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato [X,2] . ||| [1] [2] cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato negro ||| [1] black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato negro . ||| [1] black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato negro [X,2] ||| [1] black cat [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] grande ||| big [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] grande . ||| big [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] grande [X,2] ||| big [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] negro ||| black [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] negro . ||| black [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] negro [X,2] ||| black [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] oruga ||| [1] caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] oruga . ||| [1] caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] oruga [X,2] ||| [1] caterpiller [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] patito [X,2] ||| [1] [2] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] patito [X,2] . ||| [1] [2] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] patito feo ||| [1] ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] patito feo . ||| [1] ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] patito feo [X,2] ||| [1] ugly duckling [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] peces ||| [1] fish ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] peces . ||| [1] fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] peces [X,2] ||| [1] fish [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro ||| [1] dog ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro . ||| [1] dog . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro [X,2] ||| [1] dog [2] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro [X,2] ||| [1] [2] dog ||| EgivenF=0 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro [X,2] . ||| [1] [2] dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro grande ||| [1] big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro grande . ||| [1] big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] perro grande [X,2] ||| [1] big dog [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] pájaro [X,2] ||| [1] [2] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] pájaro [X,2] . ||| [1] [2] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] pájaro negro ||| [1] black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] pájaro negro . ||| [1] black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] pájaro negro [X,2] ||| [1] black bird [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| anciano ||| old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| anciano . ||| old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| anciano [X,1] ||| old man [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] feo ||| the ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] feo . ||| the ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] feo [X,2] ||| the ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] grande ||| the big [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] grande . ||| the big [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] grande [X,2] ||| the big [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] negro ||| the black [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] negro . ||| the black [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] negro [X,2] ||| the black [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato ||| the cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato . ||| the cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato [X,1] ||| the [1] cat ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato [X,1] ||| the cat [1] ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato [X,1] . ||| the [1] cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato negro ||| the black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato negro . ||| the black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el gato negro [X,1] ||| the black cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el patito [X,1] ||| the [1] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el patito [X,1] . ||| the [1] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el patito feo ||| the ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el patito feo . ||| the ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el patito feo [X,1] ||| the ugly duckling [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro ||| the dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro . ||| the dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro [X,1] ||| the [1] dog ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro [X,1] ||| the dog [1] ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro [X,1] . ||| the [1] dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro grande ||| the big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro grande . ||| the big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el perro grande [X,1] ||| the big dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el pájaro [X,1] ||| the [1] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el pájaro [X,1] . ||| the [1] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el pájaro negro ||| the black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el pájaro negro . ||| the black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el pájaro negro [X,1] ||| the black bird [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| eso ||| that ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| eso [X,1] ||| that [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| eso [X,1] . ||| that [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| eso perro ||| that dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| eso perro . ||| that dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| eso perro [X,1] ||| that dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este ||| this ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este [X,1] ||| this [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este [X,1] . ||| this [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este anciano ||| this old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| este anciano . ||| this old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| este anciano [X,1] ||| this old man [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| este gato ||| this cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este gato . ||| this cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| este gato [X,1] ||| this cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| feo ||| ugly ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato ||| cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato . ||| cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato [X,1] ||| [1] cat ||| EgivenF=1.09861 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato [X,1] ||| cat [1] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato [X,1] . ||| [1] cat . ||| EgivenF=1.09861 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato negro ||| black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato negro . ||| black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| gato negro [X,1] ||| black cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| grande ||| big ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| la ||| the ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| la [X,1] ||| the [1] ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| la [X,1] . ||| the [1] . ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| la oruga ||| the caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| la oruga . ||| the caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| la oruga [X,1] ||| the caterpiller [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los ||| the ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los [X,1] ||| the [1] ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los [X,1] . ||| the [1] . ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los peces ||| the fish ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los peces . ||| the fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| los peces [X,1] ||| the fish [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0
-[X] ||| negro ||| black ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| oruga ||| caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| oruga . ||| caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| oruga [X,1] ||| caterpiller [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito ||| duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito [X,1] ||| [1] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito [X,1] . ||| [1] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito feo ||| ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito feo . ||| ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| patito feo [X,1] ||| ugly duckling [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| peces ||| fish ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| peces . ||| fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| peces [X,1] ||| fish [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro ||| dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro . ||| dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro [X,1] ||| [1] dog ||| EgivenF=1.09861 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro [X,1] ||| dog [1] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro [X,1] . ||| [1] dog . ||| EgivenF=1.09861 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro grande ||| big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro grande . ||| big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| perro grande [X,1] ||| big dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro ||| bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro [X,1] ||| [1] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro [X,1] . ||| [1] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro negro ||| black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro negro . ||| black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| pájaro negro [X,1] ||| black bird [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h
index b90aca47..b90aca47 100755..100644
--- a/decoder/hg_cfg.h
+++ b/decoder/hg_cfg.h
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index c1c93933..9f0f50fa 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -624,3 +624,30 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
   }
 }
 
+/* Output format:
+ * #vertices
+ * for each vertex in bottom-up topological order:
+ *   #downward_edges
+ *   for each downward edge:
+ *     RHS with [vertex_index] for NTs ||| scores
+ */
+void HypergraphIO::WriteTarget(const Hypergraph& hg) {
+  cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';
+  for (unsigned int i = 0; i < hg.nodes_.size(); ++i) {
+    const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_;
+    cout << edges.size() << '\n';
+    for (unsigned int j = 0; j < edges.size(); ++j) {
+      const Hypergraph::Edge &edge = hg.edges_[edges[j]];
+      const std::vector<WordID> &e = edge.rule_->e();
+      for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) {
+        if (*word <= 0) {
+          cout << '[' << edge.tail_nodes_[-*word] << "] ";
+        } else {
+          cout << TD::Convert(*word) << ' ';
+        }
+      }
+      cout << "||| " << edge.rule_->scores_ << '\n';
+    }
+  }
+}
+
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 082489d8..44817157 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -23,6 +23,9 @@ struct HypergraphIO {
 
   static void WriteAsCFG(const Hypergraph& hg);
 
+  // Write only the target size information in bottom-up order.  
+  static void WriteTarget(const Hypergraph& hg);
+
   // serialization utils
   static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0);
   // return PLF string representation (undefined behavior on non-lattices)
diff --git a/decoder/hg_test.h b/decoder/hg_test.h
index 3da6533c..3da6533c 100755..100644
--- a/decoder/hg_test.h
+++ b/decoder/hg_test.h
diff --git a/decoder/lattice.cc b/decoder/lattice.cc
index e3631e59..89da3cd0 100644
--- a/decoder/lattice.cc
+++ b/decoder/lattice.cc
@@ -46,6 +46,7 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) {
   Lattice& l = *pl;
   vector<WordID> ids;
   TD::ConvertSentence(text, &ids);
+  l.clear();
   l.resize(ids.size());
   for (int i = 0; i < l.size(); ++i)
     l[i].push_back(LatticeArc(ids[i], 0.0, 1));
diff --git a/decoder/nt_span.h b/decoder/nt_span.h
index a918f301..a918f301 100755..100644
--- a/decoder/nt_span.h
+++ b/decoder/nt_span.h
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index b603e27a..b603e27a 100755..100644
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
diff --git a/decoder/perro.sh b/decoder/perro.sh
deleted file mode 100755
index 3e54ac71..00000000
--- a/decoder/perro.sh
+++ /dev/null
@@ -1 +0,0 @@
-$gdb $cdec "$@" -k 30 --show_features -c fsa-hiero.ini -i perro.ur 
diff --git a/decoder/perro.ur b/decoder/perro.ur
deleted file mode 100755
index 6c5da6d7..00000000
--- a/decoder/perro.ur
+++ /dev/null
@@ -1 +0,0 @@
-eso perro feo
diff --git a/decoder/program_options.h b/decoder/program_options.h
index 87afb320..87afb320 100755..100644
--- a/decoder/program_options.h
+++ b/decoder/program_options.h
diff --git a/decoder/sentences.h b/decoder/sentences.h
index 54b5ffb3..54b5ffb3 100755..100644
--- a/decoder/sentences.h
+++ b/decoder/sentences.h
diff --git a/decoder/short.ur b/decoder/short.ur
deleted file mode 100755
index 48612801..00000000
--- a/decoder/short.ur
+++ /dev/null
@@ -1 +0,0 @@
-krAcy myN pyr kw mxtlf HAdvAt
diff --git a/decoder/trule.cc b/decoder/trule.cc
index 40235542..141b8faa 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -232,16 +232,6 @@ void TRule::ComputeArity() {
   arity_ = 1 - min;
 }
 
-static string AnonymousStrVar(int i) {
-  string res("[v]");
-  if(!(i <= 0 && i >= -8)) {
-    cerr << "Can't handle more than 9 non-terminals: index=" << (-i) << endl;
-    abort();
-  }
-  res[1] = '1' - i;
-  return res;
-}
-
 string TRule::AsString(bool verbose) const {
   ostringstream os;
   int idx = 0;
@@ -259,15 +249,11 @@ string TRule::AsString(bool verbose) const {
     }
   }
   os << " ||| ";
-  if (idx > 9) {
-    cerr << "Too many non-terminals!\n partial: " << os.str() << endl;
-    exit(1);
-  }
   for (int i =0; i<e_.size(); ++i) {
     if (i) os << ' ';
     const WordID& w = e_[i];
     if (w < 1)
-      os << AnonymousStrVar(w);
+      os << '[' << (1-w) << ']';
     else
       os << TD::Convert(w);
   }
diff --git a/decoder/weights-fsa b/decoder/weights-fsa
deleted file mode 100644
index 3cc96c2f..00000000
--- a/decoder/weights-fsa
+++ /dev/null
@@ -1,14 +0,0 @@
-Arity_0 1.70741473606976
-Arity_1 1.12426238048012
-Arity_2 1.14986187839554
-Glue -0.04589037041388
-LanguageModel 1.09051
-LM 1.09051
-PassThrough -3.66226367902928
-PhraseModel_0 -1.94633451863252
-PhraseModel_1 -0.1475347695476
-PhraseModel_2 -1.614818994946
-WordPenalty -3.0
-WordPenaltyFsa -0.56028442964748
-ShorterThanPrev -10
-LongerThanPrev -10
diff --git a/decoder/weights.hiero b/decoder/weights.hiero
deleted file mode 100755
index 6747f059..00000000
--- a/decoder/weights.hiero
+++ /dev/null
@@ -1,10 +0,0 @@
-SameFirstLetter 1
-LongerThanPrev 1
-ShorterThanPrev 1
-GlueTop 0.0
-Glue -1.0
-EgivenF -0.5
-FgivenE -0.5
-LexEgivenF -0.5
-LexFgivenE -0.5
-LM 1
author	Patrick Simianer <p@simianer.de>	2012-03-13 09:24:47 +0100
committer	Patrick Simianer <p@simianer.de>	2012-03-13 09:24:47 +0100
commit	c3a9ea64251605532c7954959662643a6a927bb7 (patch)
tree	fed6048a5acdaf3834740107771c2bc48f26fd4d /decoder
parent	867bca3e5fa0cdd63bf032e5859fb5092d9a4ca1 (diff)
parent	a45af4a3704531a8382cd231f6445b3a33b598a3 (diff)