28 files changed, 0 insertions, 3545 deletions
diff --git a/extools/Makefile.am b/extools/Makefile.am
deleted file mode 100644
index ee363264..00000000
--- a/extools/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-bin_PROGRAMS = \
-  extractor \
-  mr_stripe_rule_reduce \
-  filter_grammar \
-  featurize_grammar \
-  extractor_monolingual
-
-noinst_PROGRAMS =
-
-sg_lexer.cc: sg_lexer.l
-	$(LEX) -s -CF -8 -o$@ $<
-
-filter_grammar_SOURCES = filter_grammar.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-filter_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
-#filter_grammar_LDFLAGS = -all-static
-
-featurize_grammar_SOURCES = featurize_grammar.cc extract.cc sentence_pair.cc sg_lexer.cc striped_grammar.cc
-featurize_grammar_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-mr_stripe_rule_reduce_SOURCES = mr_stripe_rule_reduce.cc extract.cc sentence_pair.cc striped_grammar.cc sg_lexer.cc
-mr_stripe_rule_reduce_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-extractor_SOURCES = sentence_pair.cc extract.cc extractor.cc striped_grammar.cc
-extractor_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-extractor_monolingual_SOURCES = extractor_monolingual.cc
-extractor_monolingual_LDADD = $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils
-
diff --git a/extools/README b/extools/README
deleted file mode 100644
index af91ce79..00000000
--- a/extools/README
+++ /dev/null
@@ -1,32 +0,0 @@
-
-Categories have the format i-j:CAT where i and j are the indices of the spaces
-between words in the TARGET language.  For example, slash categories can be written:
-
-  the blue house
-  0-1:DT 1-2:JJ 2-3:NN 1-3:NBAR 0-2:NP/NN 0-3:NP
-
-
-You may multiply label each span, e.g.
-
-  NP
-   |
- NBAR
-   |
-  NN
-   |
- John
-  0-1:NP 0-1:NBAR 0-1:NP
-
-However, this may result in a very large number of rules being extracted.
-
-
-****
-* Filtering and Scoring of Unscored and Unfiltered Grammars
-****
-
-Take the unfiltered grammar, and a test set, and run:
-./filter_grammar <test set> < unfiltered.grammar > filter.grammar
-
-Then, to score the new filtered grammar, run:
-./score_grammar <alignment> < filtered.grammar > scored.grammar
-
diff --git a/extools/coarsen_grammar.pl b/extools/coarsen_grammar.pl
deleted file mode 100755
index f2dd6689..00000000
--- a/extools/coarsen_grammar.pl
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/perl
-
-# dumb grammar coarsener that maps every nonterminal to X (except S).
-
-use strict;
-
-unless (@ARGV > 1){ 
-  die "Usage: $0 <weight file> <grammar file> [<grammar file> ... <grammar file>] \n";
-}
-my $weight_file = shift @ARGV;
-
-$ENV{"LC_ALL"} = "C";
-local(*GRAMMAR, *OUT_GRAMMAR, *WEIGHTS);
-
-my %weights;
-unless (open(WEIGHTS, $weight_file)) {die "Could not open weight file $weight_file\n" }
-while (<WEIGHTS>){
-  if (/(.+) (.+)$/){
-    $weights{$1} = $2;
-  } 
-}
-close(WEIGHTS);
-unless (keys(%weights)){
-  die "Could not find any PhraseModel features in weight file (perhaps you specified the wrong file?)\n\n".
-    "Usage: $0 <weight file> <grammar file> [<grammar file> ... <grammar file>] \n";
-}
-
-sub cleanup_and_die;
-$SIG{INT} = "cleanup_and_die";
-$SIG{TERM} = "cleanup_and_die"; 
-$SIG{HUP} = "cleanup_and_die";
-
-open(OUT_GRAMMAR, ">grammar.tmp");
-while (my $grammar_file = shift @ARGV){
-  unless (open(GRAMMAR, $grammar_file)) {die "Could not open grammar file $grammar_file\n"}
-  while (<GRAMMAR>){
-    if (/^((.*\|{3}){3})(.*)$/){
-      my $rule = $1;
-      my $rest = $3;
-      my $coarse_rule = $rule;
-      $coarse_rule =~ s/\[X[^\],]*/[X/g;
-      print OUT_GRAMMAR "$coarse_rule $rule $rest\n";
-    } else {
-      die "Unrecognized rule format: $_\n";
-    }
-  }
-  close(GRAMMAR);
-}
-close(OUT_GRAMMAR);
-
-`sort grammar.tmp > grammar.tmp.sorted`;
-sub dump_rules;
-sub compute_score;
-unless (open(GRAMMAR, "grammar.tmp.sorted")){ die "Something went wrong; could not open intermediate file grammar.tmp.sorted\n"};
-my $prev_coarse_rule = "";
-my $best_features = "";
-my $best_score = 0;
-my @rules = ();
-while (<GRAMMAR>){
-  if (/^\s*((\S.*\|{3}\s*){3})((\S.*\|{3}\s*){3})(.*)$/){
-    my $coarse_rule = $1;
-    my $fine_rule = $3;
-    my $features = $5;  # This code does not correctly handle rules with other info (e.g. alignments)
-    if ($coarse_rule eq $prev_coarse_rule){
-      my $score = compute_score($features, %weights);
-      if ($score > $best_score){
-        $best_score = $score;
-        $best_features = $features;
-      }
-    } else {
-      dump_rules($prev_coarse_rule, $best_features, @rules);
-      $prev_coarse_rule = $coarse_rule;
-      $best_features = $features;
-      $best_score = compute_score($features, %weights);
-      @rules = ();
-    }
-    push(@rules, "$fine_rule$features\n");
-  } else {
-    die "Something went wrong during grammar projection: $_\n";
-  }
-}
-dump_rules($prev_coarse_rule, $best_features, @rules);
-close(GRAMMAR);
-cleanup();
-
-sub compute_score {
-  my($features, %weights) = @_;
-  my $score = 0;
-  if ($features =~ s/^\s*(\S.*\S)\s*$/$1/) { 
-    my @features = split(/\s+/, $features);
-    my $pm=0;
-    for my $feature (@features) {
-      my $feature_name; 
-      my $feature_val;
-      if ($feature =~ /(.*)=(.*)/){
-        $feature_name = $1;
-        $feature_val= $2;
-      } else {
-        $feature_name = "PhraseModel_" . $pm;
-        $feature_val= $feature;
-      }
-      $pm++;
-      if ($weights{$feature_name}){
-        $score += $weights{$feature_name} * $feature_val;
-      } 
-    }  
-  } else {
-    die "Unexpected feature value format: $features\n";
-  }
-  return $score;
-}
-
-sub dump_rules {
-  my($coarse_rule, $coarse_rule_scores, @fine_rules) = @_;
-  unless($coarse_rule){ return; }
-  print "$coarse_rule $coarse_rule_scores\n";
-  for my $rule (@fine_rules){
-    print "\t$rule";
-  }
-}
-
-sub cleanup_and_die {
-  cleanup();
-  die "\n";
-}
-
-sub cleanup {
- `rm -rf grammar.tmp grammar.tmp.sorted`;
-}
-
-
-
-
diff --git a/extools/extract.cc b/extools/extract.cc
deleted file mode 100644
index 49542fed..00000000
--- a/extools/extract.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-#include "extract.h"
-
-#include <queue>
-#include <vector>
-#include <utility>
-#include <tr1/unordered_map>
-#include <set>
-#include <boost/tuple/tuple_comparison.hpp>
-
-#include <boost/functional/hash.hpp>
-#include <boost/tuple/tuple.hpp>
-
-#include "sentence_pair.h"
-#include "tdict.h"
-#include "wordid.h"
-#include "array2d.h"
-
-using namespace std;
-using namespace boost;
-using std::tr1::unordered_map;
-using boost::tuple;
-
-namespace {
-  inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
-
-  inline void SkipWhitespace(const char* buf, int* ptr) {
-    while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); }
-  }
-}
-
-Extract::RuleObserver::~RuleObserver() {
-  cerr << "Rules extracted: " << count << endl;
-}
-
-void Extract::ExtractBasePhrases(const int max_base_phrase_size,
-                        const AnnotatedParallelSentence& sentence,
-                        vector<ParallelSpan>* phrases) {
-  phrases->clear();
-
-  vector<pair<int,int> > f_spans(sentence.f_len, pair<int,int>(sentence.e_len, 0));
-  vector<pair<int,int> > e_spans(sentence.e_len, pair<int,int>(sentence.f_len, 0));
-  // for each alignment point in e, precompute the minimal consistent phrases in f
-  // for each alignment point in f, precompute the minimal consistent phrases in e
-  for (int i = 0; i < sentence.f_len; ++i) {
-    for (int j = 0; j < sentence.e_len; ++j) {
-      if (sentence.aligned(i,j)) {
-        if (j < f_spans[i].first) f_spans[i].first = j;
-        f_spans[i].second = j+1;
-        if (i < e_spans[j].first) e_spans[j].first = i;
-        e_spans[j].second = i+1;
-      }
-    }
-  }
-
-  for (int i1 = 0; i1 < sentence.f_len; ++i1) {
-    if (sentence.f_aligned[i1] == 0) continue;
-    int j1 = sentence.e_len;
-    int j2 = 0;
-    const int i_limit = min(sentence.f_len, i1 + max_base_phrase_size);
-    for (int i2 = i1 + 1; i2 <= i_limit; ++i2) {
-      if (sentence.f_aligned[i2-1] == 0) continue;
-      // cerr << "F has aligned span " << i1 << " to " << i2 << endl;
-      j1 = min(j1, f_spans[i2-1].first);
-      j2 = max(j2, f_spans[i2-1].second);
-      if (j1 >= j2) continue;
-      if (j2 - j1 > max_base_phrase_size) continue;
-      int condition = 0;
-      for (int j = j1; j < j2; ++j) {
-        if (e_spans[j].first < i1) { condition = 1; break; }
-        if (e_spans[j].second > i2) { condition = 2; break; }
-      }
-      if (condition == 1) break;
-      if (condition == 2) continue;
-      // category types added later!
-      phrases->push_back(ParallelSpan(i1, i2, j1, j2));
-      // cerr << i1 << " " << i2 << " : " << j1 << " " << j2 << endl;
-    }
-  }
-}
-
-void Extract::LoosenPhraseBounds(const AnnotatedParallelSentence& sentence,
-                                 const int max_base_phrase_size,
-                                 vector<ParallelSpan>* phrases) {
-  const int num_phrases = phrases->size();
-  map<int, map<int, map<int, map<int, bool> > > > marker;
-  for (int i = 0; i < num_phrases; ++i) {
-    const ParallelSpan& cur = (*phrases)[i];
-    marker[cur.i1][cur.i2][cur.j1][cur.j2] = true;
-  }
-  for (int i = 0; i < num_phrases; ++i) {
-    const ParallelSpan& cur = (*phrases)[i];
-    const int i1_max = cur.i1;
-    const int i2_min = cur.i2;
-    const int j1_max = cur.j1;
-    const int j2_min = cur.j2;
-    int i1_min = i1_max;
-    while (i1_min > 0 && sentence.f_aligned[i1_min-1] == 0) { --i1_min; }
-    int j1_min = j1_max;
-    while (j1_min > 0 && sentence.e_aligned[j1_min-1] == 0) { --j1_min; }
-    int i2_max = i2_min;
-    while (i2_max < sentence.f_len && sentence.f_aligned[i2_max] == 0) { ++i2_max; }
-    int j2_max = j2_min;
-    while (j2_max < sentence.e_len && sentence.e_aligned[j2_max] == 0) { ++j2_max; }
-    for (int i1 = i1_min; i1 <= i1_max; ++i1) {
-      const int ilim = min(i2_max, i1 + max_base_phrase_size);
-      for (int i2 = max(i1+1,i2_min); i2 <= ilim; ++i2) {
-        for (int j1 = j1_min; j1 <= j1_max; ++j1) {
-          const int jlim = std::min(j2_max, j1 + max_base_phrase_size);
-          for (int j2 = std::max(j1+1, j2_min); j2 <= jlim; ++j2) {
-            bool& seen = marker[i1][i2][j1][j2];
-            if (!seen)
-              phrases->push_back(ParallelSpan(i1,i2,j1,j2));
-            seen = true;
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename K, typename V>
-void
-lookup_and_append(const map<K, V> &dict, const K &key, V &output)
-{
-    typename map<K, V>::const_iterator found = dict.find(key);
-    if (found != dict.end())
-        copy(found->second.begin(), found->second.end(), back_inserter(output));
-}
-
-// this uses the TARGET span (i,j) to annotate phrases, will copy
-// phrases if there is more than one annotation.
-// TODO: support source annotation
-void Extract::AnnotatePhrasesWithCategoryTypes(const WordID default_cat,
-                                      const map< boost::tuple<short,short,short,short>, vector<WordID> > &types,
-                                      vector<ParallelSpan>* phrases) {
-  const int num_unannotated_phrases = phrases->size();
-  // have to use num_unannotated_phrases since we may grow the vector
-  for (int i = 0; i < num_unannotated_phrases; ++i) {
-    ParallelSpan& phrase = (*phrases)[i];
-    vector<WordID> cats;
-    lookup_and_append(types, boost::make_tuple(phrase.i1, phrase.i2, phrase.j1, phrase.j2), cats);
-    lookup_and_append(types, boost::make_tuple((short)-1, (short)-1, phrase.j1, phrase.j2), cats);
-    lookup_and_append(types, boost::make_tuple(phrase.i1, phrase.i2, (short)-1, (short)-1), cats);
-    if (cats.empty() && default_cat != 0) {
-      cats = vector<WordID>(1, default_cat);
-    }
-    if (cats.empty()) {
-      cerr << "ERROR span " << phrase.i1 << "," << phrase.i2 << "-"
-           << phrase.j1 << "," << phrase.j2 << " has no type. "
-              "Did you forget --default_category?\n";
-    }
-    phrase.cat = cats[0];
-    for (int ci = 1; ci < cats.size(); ++ci) {
-      ParallelSpan new_phrase = phrase;
-      new_phrase.cat = cats[ci];
-      phrases->push_back(new_phrase);
-    }
-  }
-}
-
-// a partially complete (f-side) of a rule
-struct RuleItem {
-  vector<ParallelSpan> f;
-  int i,j,syms,vars;
-  explicit RuleItem(int pi) : i(pi), j(pi), syms(), vars() {}
-  void Extend(const WordID& fword) {
-    f.push_back(ParallelSpan(fword));
-    ++j;
-    ++syms;
-  }
-  void Extend(const ParallelSpan& subphrase) {
-    f.push_back(subphrase);
-    j += subphrase.i2 - subphrase.i1;
-    ++vars;
-    ++syms;
-  }
-  bool RuleFEndsInVariable() const {
-    if (f.size() > 0) {
-      return f.back().IsVariable();
-    } else { return false; }
-  }
-};
-
-void Extract::ExtractConsistentRules(const AnnotatedParallelSentence& sentence,
-                          const vector<ParallelSpan>& phrases,
-                          const int max_vars,
-                          const int max_syms,
-                          const bool permit_adjacent_nonterminals,
-                          const bool require_aligned_terminal,
-                          RuleObserver* observer,
-                          vector<WordID>* all_cats) {
-  const char bkoff_mrkr = '_';
-  queue<RuleItem> q;  // agenda for BFS
-  int max_len = -1;
-  unordered_map<pair<short, short>, vector<ParallelSpan>, boost::hash<pair<short, short> > > fspans;
-  vector<vector<ParallelSpan> > spans_by_start(sentence.f_len);
-  set<int> starts;
-  WordID bkoff;
-  for (int i = 0; i < phrases.size(); ++i) {
-    fspans[make_pair(phrases[i].i1,phrases[i].i2)].push_back(phrases[i]);
-    max_len = max(max_len, phrases[i].i2 - phrases[i].i1);
-    // have we already added a rule item starting at phrases[i].i1?
-    if (starts.insert(phrases[i].i1).second)
-      q.push(RuleItem(phrases[i].i1));
-    spans_by_start[phrases[i].i1].push_back(phrases[i]);
-  }
-  starts.clear();
-  vector<pair<int,int> > next_e(sentence.e_len);
-  vector<WordID> cur_rhs_f, cur_rhs_e;
-  vector<pair<short, short> > cur_terminal_align;
-  vector<int> cur_es, cur_fs;
-  while(!q.empty()) {
-    const RuleItem& rule = q.front();
-
-    // extend the partial rule
-    if (rule.j < sentence.f_len && (rule.j - rule.i) < max_len && rule.syms < max_syms) {
-      RuleItem ew = rule;
-
-      // extend with a word
-      ew.Extend(sentence.f[ew.j]);
-      q.push(ew);
-
-      // with variables
-      if (rule.vars < max_vars &&
-          !spans_by_start[rule.j].empty() &&
-          ((!rule.RuleFEndsInVariable()) || permit_adjacent_nonterminals)) {
-        const vector<ParallelSpan>& sub_phrases = spans_by_start[rule.j];
-        for (int it = 0; it < sub_phrases.size(); ++it) {
-          if (sub_phrases[it].i2 - sub_phrases[it].i1 + rule.j - rule.i <= max_len) {
-            RuleItem ev = rule;
-            ev.Extend(sub_phrases[it]);
-            q.push(ev);
-            assert(ev.j <= sentence.f_len);
-          }
-        }
-      }
-    }
-    // determine if rule is consistent
-    if (rule.syms > 0 &&
-        fspans.count(make_pair(rule.i,rule.j)) &&
-        (!rule.RuleFEndsInVariable() || rule.syms > 1)) {
-      const vector<ParallelSpan>& orig_spans = fspans[make_pair(rule.i,rule.j)];
-      for (int s = 0; s < orig_spans.size(); ++s) {
-        const ParallelSpan& orig_span = orig_spans[s];
-        const WordID lhs = orig_span.cat;
-        for (int j = orig_span.j1; j < orig_span.j2; ++j) next_e[j].first = -1;
-        int nt_index_e = 0;
-        for (int i = 0; i < rule.f.size(); ++i) {
-          const ParallelSpan& cur = rule.f[i];
-          if (cur.IsVariable())
-            next_e[cur.j1] = pair<int,int>(cur.j2, ++nt_index_e);
-        }
-        cur_rhs_f.clear();
-        cur_rhs_e.clear();
-        cur_terminal_align.clear();
-        cur_fs.clear();
-        cur_es.clear();
-
-        const int elen = orig_span.j2 - orig_span.j1;
-        vector<int> isvar(elen, 0);
-        int fbias = rule.i;
-        bool bad_rule = false;
-        bool has_aligned_terminal = false;
-        for (int i = 0; i < rule.f.size(); ++i) {
-          const ParallelSpan& cur = rule.f[i];
-          cur_rhs_f.push_back(cur.cat);
-          if (cur.cat > 0) {   // terminal
-            if (sentence.f_aligned[fbias + i]) has_aligned_terminal = true;
-            cur_fs.push_back(fbias + i);
-          } else {             // non-terminal
-            int subj1 = cur.j1 - orig_span.j1;
-            int subj2 = cur.j2 - orig_span.j1;
-            if (subj1 < 0 || subj2 > elen) { bad_rule = true; break; }
-            for (int j = subj1; j < subj2 && !bad_rule; ++j) {
-              int& isvarj = isvar[j];
-              isvarj = true;
-            }
-            if (bad_rule) break;
-            cur_fs.push_back(-1);
-            fbias += cur.i2 - cur.i1 - 1;
-          }
-        }
-        if (require_aligned_terminal && !has_aligned_terminal) bad_rule = true;
-        if (!bad_rule) {
-          for (int j = orig_span.j1; j < orig_span.j2; ++j) {
-            if (next_e[j].first < 0) {
-              cur_rhs_e.push_back(sentence.e[j]);
-              cur_es.push_back(j);
-            } else {
-              cur_rhs_e.push_back(1 - next_e[j].second);  // next_e[j].second is NT gap index
-              cur_es.push_back(-1);
-              j = next_e[j].first - 1;
-            }
-          }
-          for (short i = 0; i < cur_fs.size(); ++i)
-            if (cur_fs[i] >= 0)
-              for (short j = 0; j < cur_es.size(); ++j)
-                if (cur_es[j] >= 0 && sentence.aligned(cur_fs[i],cur_es[j]))
-                  cur_terminal_align.push_back(make_pair(i,j));
-          //observer->CountRule(lhs, cur_rhs_f, cur_rhs_e, cur_terminal_align);
-          
-          if(!all_cats->empty()) {
-            //produce the backoff grammar if the category wordIDs are available
-          for (int i = 0; i < cur_rhs_f.size(); ++i) {
-            if(cur_rhs_f[i] < 0) {
-              //cerr << cur_rhs_f[i] << ": (cats,f) |" << TD::Convert(-cur_rhs_f[i]) << endl;
-              string nonterm = TD::Convert(-cur_rhs_f[i]);
-              nonterm+=bkoff_mrkr;
-              bkoff = -TD::Convert(nonterm);
-              cur_rhs_f[i]=bkoff;
-              /*vector<WordID> rhs_f_bkoff;
-              vector<WordID> rhs_e_bkoff;
-              vector<pair<short,short> > bkoff_align;
-              bkoff_align.clear();
-              bkoff_align.push_back(make_pair(0,0));
-              
-              for (int cat = 0; cat < all_cats->size(); ++cat) {
-                rhs_f_bkoff.clear();
-                rhs_e_bkoff.clear();
-                rhs_f_bkoff.push_back(-(*all_cats)[cat]);
-                rhs_e_bkoff.push_back(0);
-                observer->CountRule(bkoff,rhs_f_bkoff,rhs_e_bkoff,bkoff_align);
-                
-              }*/
-            }
-          }
-           
-          } 
-          observer->CountRule(lhs, cur_rhs_f, cur_rhs_e, cur_terminal_align);
-        }
-      }
-    }
-    q.pop();
-  }
-}
-
diff --git a/extools/extract.h b/extools/extract.h
deleted file mode 100644
index e9ea5e65..00000000
--- a/extools/extract.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _EXTRACT_H_
-#define _EXTRACT_H_
-
-#include <iostream>
-#include <utility>
-#include <vector>
-#include <boost/tuple/tuple.hpp>
-#include "array2d.h"
-#include "wordid.h"
-#include "sparse_vector.h"
-
-struct AnnotatedParallelSentence;
-
-// usually represents a consistent phrase, which may
-// be annotated with a type (cat)
-// inside the rule extractor, this class is also used to represent a word
-// in a partial rule.
-struct ParallelSpan {
-  // i1 = i of f side
-  // i2 = j of f side
-  // j1 = i of e side
-  // j2 = j of e side
-  short i1,i2,j1,j2;
-  // cat is set by AnnotatePhrasesWithCategoryTypes, otherwise it's 0
-  WordID cat;  // category type of span (also overloaded by RuleItem class
-               //                        to be a word ID)
-  ParallelSpan() : i1(-1), i2(-1), j1(-1), j2(-1), cat() {}
-  // used by Rule class to represent a terminal symbol:
-  explicit ParallelSpan(WordID w) : i1(-1), i2(-1), j1(-1), j2(-1), cat(w) {}
-  ParallelSpan(int pi1, int pi2, int pj1, int pj2) : i1(pi1), i2(pi2), j1(pj1), j2(pj2), cat() {}
-  ParallelSpan(int pi1, int pi2, int pj1, int pj2, WordID c) : i1(pi1), i2(pi2), j1(pj1), j2(pj2), cat(c) {}
-
-  // ParallelSpan is used in the Rule class where it is
-  // overloaded to also represent terminal symbols
-  inline bool IsVariable() const { return i1 != -1; }
-};
-
-// rule extraction logic lives here. this has no data, it's just got
-// static member functions.
-struct Extract {
-  // RuleObserver's CountRule is called for each rule extracted
-  // implement CountRuleImpl to do things like count the rules,
-  // write them to a file, etc.
-  struct RuleObserver {
-    RuleObserver() : count() {}
-    virtual void CountRule(WordID lhs,
-                           const std::vector<WordID>& rhs_f,
-                           const std::vector<WordID>& rhs_e,
-                           const std::vector<std::pair<short, short> >& fe_terminal_alignments) {
-      ++count;
-      CountRuleImpl(lhs, rhs_f, rhs_e, fe_terminal_alignments);
-    }
-    virtual ~RuleObserver();
-
-   protected:
-    virtual void CountRuleImpl(WordID lhs,
-                           const std::vector<WordID>& rhs_f,
-                           const std::vector<WordID>& rhs_e,
-                           const std::vector<std::pair<short, short> >& fe_terminal_alignments) = 0;
-   private:
-    int count;
-  };
-
-  // given a set of "tight" phrases and the aligned sentence they were
-  // extracted from, "loosen" them
-  static void LoosenPhraseBounds(const AnnotatedParallelSentence& sentence,
-                                 const int max_base_phrase_size,
-                                 std::vector<ParallelSpan>* phrases);
-
-  // extract all consistent phrase pairs, up to size max_base_phrase_size
-  // (on the source side). these phrases will be "tight".
-  static void ExtractBasePhrases(const int max_base_phrase_size,
-                        const AnnotatedParallelSentence& sentence,
-                        std::vector<ParallelSpan>* phrases);
-
-  // this uses the TARGET span (i,j) to annotate phrases, will copy
-  // phrases if there is more than one annotation.
-  static void AnnotatePhrasesWithCategoryTypes(const WordID default_cat,
-                                      const std::map< boost::tuple<short,short,short,short>, std::vector<WordID> > &types,
-                                      std::vector<ParallelSpan>* phrases);
-
-  // use the Chiang (2007) extraction logic to extract consistent subphrases
-  // observer->CountRule is called once for each rule extracted
-  static void ExtractConsistentRules(const AnnotatedParallelSentence& sentence,
-                          const std::vector<ParallelSpan>& phrases,
-                          const int max_vars,
-                          const int max_syms,
-                          const bool permit_adjacent_nonterminals,
-                          const bool require_aligned_terminal,
-                          RuleObserver* observer,
-                          std::vector<WordID>* all_cats);
-};
-
-#endif
diff --git a/extools/extractor.cc b/extools/extractor.cc
deleted file mode 100644
index 1e4154ef..00000000
--- a/extools/extractor.cc
+++ /dev/null
@@ -1,439 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <utility>
-#include <tr1/unordered_map>
-
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/lexical_cast.hpp>
-
-#include "sparse_vector.h"
-#include "sentence_pair.h"
-#include "extract.h"
-#include "tdict.h"
-#include "fdict.h"
-#include "wordid.h"
-#include "array2d.h"
-#include "filelib.h"
-#include "striped_grammar.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static const size_t MAX_LINE_LENGTH = 100000;
-WordID kBOS, kEOS, kDIVIDER, kGAP, kSPLIT;
-int kCOUNT;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i", po::value<string>()->default_value("-"), "Input file")
-        ("default_category,d", po::value<string>(), "Default span type (use X for 'Hiero')")
-        ("x_cdyer_pos,x", "Extract monolingual POS contexts (cdyer experimental)")
-        ("loose", "Use loose phrase extraction heuristic for base phrases")
-        ("base_phrase,B", "Write base phrases")
-        ("base_phrase_spans", "Write base sentences and phrase spans")
-        ("phrase_language", po::value<string>()->default_value("target"), "Extract phrase strings in source, target or both languages")
-        ("context_language", po::value<string>()->default_value("target"), "Extract context strings in source, target or both languages")
-        ("bidir,b", "Extract bidirectional rules (for computing p(f|e) in addition to p(e|f))")
-        ("combiner_size,c", po::value<size_t>()->default_value(800000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.")
-        ("silent", "Write nothing to stderr except errors")
-        ("phrase_context,C", "Write base phrase contexts")
-        ("phrase_context_size,S", po::value<int>()->default_value(2), "Use this many words of context on left and write when writing base phrase contexts")
-        ("max_base_phrase_size,L", po::value<int>()->default_value(10), "Maximum starting phrase size")
-        ("max_syms,l", po::value<int>()->default_value(5), "Maximum number of symbols in final phrase size")
-        ("max_vars,v", po::value<int>()->default_value(2), "Maximum number of nonterminal variables in final phrase size")
-        ("permit_adjacent_nonterminals,A", "Permit adjacent nonterminals in source side of rules")
-        ("no_required_aligned_terminal,n", "Do not require an aligned terminal")
-        ("topics,t", po::value<int>()->default_value(50), "Number of categories assigned during clustering")
-        ("backoff,g","Produce a backoff grammar")
-        ("help,h", "Print this help message and exit");
-  po::options_description clo("Command line options");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  po::notify(*conf);
-
-  if (conf->count("help") || conf->count("input") == 0) {
-    cerr << "\nUsage: extractor [-options]\n";
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-// TODO how to handle alignment information?
-void WriteBasePhrases(const AnnotatedParallelSentence& sentence,
-                      const vector<ParallelSpan>& phrases) {
-  vector<WordID> e,f;
-  for (int it = 0; it < phrases.size(); ++it) {
-    const ParallelSpan& phrase = phrases[it];
-    e.clear();
-    f.clear();
-    for (int i = phrase.i1; i < phrase.i2; ++i)
-      f.push_back(sentence.f[i]);
-    for (int j = phrase.j1; j < phrase.j2; ++j)
-      e.push_back(sentence.e[j]);
-    cout << TD::GetString(f) << " ||| " << TD::GetString(e) << endl;
-  }
-}
-
-void WriteBasePhraseSpans(const AnnotatedParallelSentence& sentence,
-                          const vector<ParallelSpan>& phrases) {
-  cout << TD::GetString(sentence.f) << " ||| " << TD::GetString(sentence.e) << " |||";
-  for (int it = 0; it < phrases.size(); ++it) {
-    const ParallelSpan& phrase = phrases[it];
-    cout << " " << phrase.i1 << "-" << phrase.i2
-      << "-" << phrase.j1 << "-" << phrase.j2;
-  }
-  cout << endl;
-}
-
-struct CountCombiner {
-  CountCombiner(const size_t& csize) : combiner_size(csize) {
-    if (csize == 0) { cerr << "Using unlimited combiner cache.\n"; }
-  }
-  ~CountCombiner() {
-    if (!cache.empty()) WriteAndClearCache();
-  }
-
-  void Count(const vector<WordID>& key,
-             const vector<WordID>& val,
-             const int count_type,
-             const vector<pair<short,short> >& aligns) {
-    if (combiner_size != 1) {
-      RuleStatistics& v = cache[key][val];
-      float newcount = v.counts.add_value(count_type, 1.0f);
-      // hack for adding alignments
-      if (newcount < 7.0f && aligns.size() > v.aligns.size())
-        v.aligns = aligns;
-      if (combiner_size > 1 && cache.size() > combiner_size)
-        WriteAndClearCache();
-    } else {
-      cout << TD::GetString(key) << '\t' << TD::GetString(val) << " ||| ";
-      cout << RuleStatistics(count_type, 1.0f, aligns) << endl;
-    }
-  }
-
- private:
-  void WriteAndClearCache() {
-    for (unordered_map<vector<WordID>, Vec2PhraseCount, boost::hash<vector<WordID> > >::iterator it = cache.begin();
-         it != cache.end(); ++it) {
-      cout << TD::GetString(it->first) << '\t';
-      const Vec2PhraseCount& vals = it->second;
-      bool needdiv = false;
-      for (Vec2PhraseCount::const_iterator vi = vals.begin(); vi != vals.end(); ++vi) {
-        if (needdiv) cout << " ||| "; else needdiv = true;
-        cout << TD::GetString(vi->first) << " ||| " << vi->second;
-      }
-      cout << endl;
-    }
-    cache.clear();
-  }
-
-  const size_t combiner_size;
-  typedef unordered_map<vector<WordID>, RuleStatistics, boost::hash<vector<WordID> > > Vec2PhraseCount;
-  unordered_map<vector<WordID>, Vec2PhraseCount, boost::hash<vector<WordID> > > cache;
-};
-
-// TODO optional source context
-// output <k, v> : k = phrase "document" v = context "term"
-void WritePhraseContexts(const AnnotatedParallelSentence& sentence,
-                         const vector<ParallelSpan>& phrases,
-                         const int ctx_size,
-                         bool phrase_s, bool phrase_t,
-                         bool context_s, bool context_t,
-                         CountCombiner* o) {
-  vector<WordID> context, context_f;
-  if (context_t)
-  {
-      context.resize(ctx_size * 2 + 1);
-      context[ctx_size] = kGAP;
-  }
-  if (context_s)
-  {
-      context_f.resize(ctx_size * 2 + 1);
-      context_f[ctx_size] = kGAP;
-  }
-  vector<WordID> key, key_f;
-  if (phrase_t) key.reserve(100);
-  if (phrase_s) key_f.reserve(100);
-
-  for (int it = 0; it < phrases.size(); ++it) {
-    const ParallelSpan& phrase = phrases[it];
-
-    key.clear();
-    for (int j = phrase.j1; j < phrase.j2 && phrase_t; ++j)
-      key.push_back(sentence.e[j]);
-
-    if (context_t)
-    {
-        context.resize(ctx_size * 2 + 1);
-        for (int i = 0; i < ctx_size && context_t; ++i) {
-          int epos = phrase.j1 - 1 - i;
-          const WordID left_ctx = (epos < 0) ? kBOS : sentence.e[epos];
-          context[ctx_size - i - 1] = left_ctx;
-          epos = phrase.j2 + i;
-          const WordID right_ctx = (epos >= sentence.e_len) ? kEOS : sentence.e[epos];
-          context[ctx_size + i + 1] = right_ctx;
-        }
-    }
-    else
-        context.clear();
-
-    if (phrase_s)
-    {
-        key_f.clear();
-        for (int i = phrase.i1; i < phrase.i2; ++i)
-          key_f.push_back(sentence.f[i]);
-        if (phrase_t) key.push_back(kSPLIT);
-        copy(key_f.begin(), key_f.end(), back_inserter(key));
-    }
-
-    if (context_s)
-    {
-        for (int i = 0; i < ctx_size; ++i) {
-          int fpos = phrase.i1 - 1 - i;
-          const WordID left_ctx = (fpos < 0) ? kBOS : sentence.f[fpos];
-          context_f[ctx_size - i - 1] = left_ctx;
-          fpos = phrase.i2 + i;
-          const WordID right_ctx = (fpos >= sentence.f_len) ? kEOS : sentence.f[fpos];
-          context_f[ctx_size + i + 1] = right_ctx;
-        }
-        if (context_t) context.push_back(kSPLIT);
-        copy(context_f.begin(), context_f.end(), back_inserter(context));
-    }
-
-    o->Count(key, context, kCOUNT, vector<pair<short,short> >());
-  }
-}
-
-struct SimpleRuleWriter : public Extract::RuleObserver {
- protected:
-  virtual void CountRuleImpl(WordID lhs,
-                             const vector<WordID>& rhs_f,
-                             const vector<WordID>& rhs_e,
-                             const vector<pair<short,short> >& fe_terminal_alignments) {
-    cout << "[" << TD::Convert(-lhs) << "] |||";
-    for (int i = 0; i < rhs_f.size(); ++i) {
-      if (rhs_f[i] < 0) cout << " [" << TD::Convert(-rhs_f[i]) << ']';
-      else cout << ' ' << TD::Convert(rhs_f[i]);
-    }
-    cout << " |||";
-    for (int i = 0; i < rhs_e.size(); ++i) {
-      if (rhs_e[i] <= 0) cout << " [" << (1-rhs_e[i]) << ']';
-      else cout << ' ' << TD::Convert(rhs_e[i]);
-    }
-    cout << " |||";
-    for (int i = 0; i < fe_terminal_alignments.size(); ++i) {
-      cout << ' ' << fe_terminal_alignments[i].first << '-' << fe_terminal_alignments[i].second;
-    }
-    cout << endl;
-  }
-};
-
-struct HadoopStreamingRuleObserver : public Extract::RuleObserver {
-  HadoopStreamingRuleObserver(CountCombiner* cc, bool bidir_flag) :
-     bidir(bidir_flag),
-     kF(TD::Convert("F")),
-     kE(TD::Convert("E")),
-     kDIVIDER(TD::Convert("|||")),
-     kLB("["), kRB("]"),
-     combiner(*cc),
-     kEMPTY(),
-     kCFE(FD::Convert("CFE")) {
-   for (int i=1; i < 50; ++i)
-     index2sym[1-i] = TD::Convert(kLB + boost::lexical_cast<string>(i) + kRB);
-   fmajor_key.resize(10, kF);
-   emajor_key.resize(10, kE);
-   if (bidir)
-     fmajor_key[2] = emajor_key[2] = kDIVIDER;
-   else
-     fmajor_key[1] = kDIVIDER;
- }
-
- protected:
-  virtual void CountRuleImpl(WordID lhs,
-                             const vector<WordID>& rhs_f,
-                             const vector<WordID>& rhs_e,
-                             const vector<pair<short,short> >& fe_terminal_alignments) {
-    if (bidir) { // extract rules in "both directions" E->F and F->E
-      fmajor_key.resize(3 + rhs_f.size());
-      emajor_key.resize(3 + rhs_e.size());
-      fmajor_val.resize(rhs_e.size());
-      emajor_val.resize(rhs_f.size());
-      emajor_key[1] = fmajor_key[1] = MapSym(lhs);
-      int nt = 1;
-      for (int i = 0; i < rhs_f.size(); ++i) {
-        const WordID id = rhs_f[i];
-        if (id < 0) {
-          fmajor_key[3 + i] = MapSym(id, nt);
-          emajor_val[i] = MapSym(id, nt);
-          ++nt;
-        } else {
-          fmajor_key[3 + i] = id;
-          emajor_val[i] = id;
-        }
-      }
-      for (int i = 0; i < rhs_e.size(); ++i) {
-        WordID id = rhs_e[i];
-        if (id <= 0) {
-          fmajor_val[i] = index2sym[id];
-          emajor_key[3 + i] = index2sym[id];
-        } else {
-          fmajor_val[i] = id;
-          emajor_key[3 + i] = id;
-        }
-      }
-      combiner.Count(fmajor_key, fmajor_val, kCFE, fe_terminal_alignments);
-      combiner.Count(emajor_key, emajor_val, kCFE, kEMPTY);
-    } else { // extract rules only in F->E
-      fmajor_key.resize(2 + rhs_f.size());
-      fmajor_val.resize(rhs_e.size());
-      fmajor_key[0] = MapSym(lhs);
-      int nt = 1;
-      for (int i = 0; i < rhs_f.size(); ++i) {
-        const WordID id = rhs_f[i];
-        if (id < 0)
-          fmajor_key[2 + i] = MapSym(id, nt++);
-        else
-          fmajor_key[2 + i] = id;
-      }
-      for (int i = 0; i < rhs_e.size(); ++i) {
-        const WordID id = rhs_e[i];
-        if (id <= 0)
-          fmajor_val[i] = index2sym[id];
-        else
-          fmajor_val[i] = id;
-      }
-      combiner.Count(fmajor_key, fmajor_val, kCFE, fe_terminal_alignments);
-    }
-  }
-
- private:
-  WordID MapSym(WordID sym, int ind = 0) {
-    WordID& r = cat2ind2sym[sym][ind];
-    if (!r) {
-      if (ind == 0)
-        r = TD::Convert(kLB + TD::Convert(-sym) + kRB);
-      else
-        r = TD::Convert(kLB + TD::Convert(-sym) + "," + boost::lexical_cast<string>(ind) + kRB);
-    }
-    return r;
-  }
-
-  const bool bidir;
-  const WordID kF, kE, kDIVIDER;
-  const string kLB, kRB;
-  CountCombiner& combiner;
-  const vector<pair<short,short> > kEMPTY;
-  const int kCFE;
-  map<WordID, map<int, WordID> > cat2ind2sym;
-  map<int, WordID> index2sym;
-  vector<WordID> emajor_key, emajor_val, fmajor_key, fmajor_val;
-};
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  kBOS = TD::Convert("<s>");
-  kEOS = TD::Convert("</s>");
-  kDIVIDER = TD::Convert("|||");
-  kGAP = TD::Convert("<PHRASE>");
-  kCOUNT = FD::Convert("C");
-  kSPLIT = TD::Convert("<SPLIT>");
-
-  WordID default_cat = 0;  // 0 means no default- extraction will
-                           // fail if a phrase is extracted without a
-                           // category
-  const bool backoff = (conf.count("backoff") ? true : false);
-  if (conf.count("default_category")) {
-    string sdefault_cat = conf["default_category"].as<string>();
-    default_cat = -TD::Convert(sdefault_cat);
-    cerr << "Default category: " << sdefault_cat << endl;
-  }
-  ReadFile rf(conf["input"].as<string>());
-  istream& in = *rf.stream();
-
-  char buf[MAX_LINE_LENGTH];
-  AnnotatedParallelSentence sentence;
-  vector<ParallelSpan> phrases;
-  vector<WordID> all_cats;
-  int max_base_phrase_size = conf["max_base_phrase_size"].as<int>();
-  bool write_phrase_contexts = conf.count("phrase_context") > 0;
-  const bool write_base_phrases = conf.count("base_phrase") > 0;
-  const bool write_base_phrase_spans = conf.count("base_phrase_spans") > 0;
-  const bool loose_phrases = conf.count("loose") > 0;
-  const bool silent = conf.count("silent") > 0;
-  const int max_syms = conf["max_syms"].as<int>();
-  const int max_vars = conf["max_vars"].as<int>();
-  const int ctx_size = conf["phrase_context_size"].as<int>();
-  const int num_categories = conf["topics"].as<int>();
-  const bool permit_adjacent_nonterminals = conf.count("permit_adjacent_nonterminals") > 0;
-  const bool require_aligned_terminal = conf.count("no_required_aligned_terminal") == 0;
-  const string ps = conf["phrase_language"].as<string>();
-  const bool phrase_s = ps == "source" || ps == "both";
-  const bool phrase_t = ps == "target" || ps == "both";
-  const string cs = conf["context_language"].as<string>();
-  const bool context_s = cs == "source" || cs == "both";
-  const bool context_t = cs == "target" || cs == "both";
-  const bool x_cdyer_pos = conf.count("x_cdyer_pos");
-  int line = 0;
-  CountCombiner cc(conf["combiner_size"].as<size_t>());
-  HadoopStreamingRuleObserver o(&cc,
-                                conf.count("bidir") > 0);
-
-  assert(phrase_s || phrase_t);
-  assert(context_s || context_t);
-
-  if(backoff) {
-    for (int i=0;i < num_categories;++i)
-        all_cats.push_back(TD::Convert("X"+boost::lexical_cast<string>(i)));
-  }
-
-  //SimpleRuleWriter o;
-  while(in) {
-    ++line;
-    in.getline(buf, MAX_LINE_LENGTH);
-    if (buf[0] == 0) continue;
-    //cerr << "line #" << line << " = " << buf << endl;
-    if (!silent) {
-      if (line % 200 == 0) cerr << '.';
-      if (line % 8000 == 0) cerr << " [" << line << "]\n" << flush;
-    }
-    sentence.ParseInputLine(buf);
-    if (x_cdyer_pos) {
-      sentence.e = sentence.f;
-      sentence.AllocateForAlignment();
-      for (int i = 0; i < sentence.e.size(); ++i) sentence.Align(i,i);
-      max_base_phrase_size = 1;
-      write_phrase_contexts = true;
-    }
-    phrases.clear();
-    Extract::ExtractBasePhrases(max_base_phrase_size, sentence, &phrases);
-    if (loose_phrases)
-      Extract::LoosenPhraseBounds(sentence, max_base_phrase_size, &phrases);
-    if (phrases.empty()) {
-      cerr << "WARNING no phrases extracted line: " << line << endl;
-      continue;
-    }
-    if (write_phrase_contexts) {
-      WritePhraseContexts(sentence, phrases, ctx_size, phrase_s, phrase_t, context_s, context_t, &cc);
-      continue;
-    }
-    if (write_base_phrases) {
-      WriteBasePhrases(sentence, phrases);
-      continue;
-    }
-    if (write_base_phrase_spans) {
-      WriteBasePhraseSpans(sentence, phrases);
-      continue;
-    }
-    Extract::AnnotatePhrasesWithCategoryTypes(default_cat, sentence.span_types, &phrases);
-    Extract::ExtractConsistentRules(sentence, phrases, max_vars, max_syms, permit_adjacent_nonterminals, require_aligned_terminal, &o, &all_cats);
-  }
-  if (!silent) cerr << endl;
-  return 0;
-}
diff --git a/extools/extractor_monolingual.cc b/extools/extractor_monolingual.cc
deleted file mode 100644
index 049ebc85..00000000
--- a/extools/extractor_monolingual.cc
+++ /dev/null
@@ -1,256 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <utility>
-#include <tr1/unordered_map>
-
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <boost/lexical_cast.hpp>
-
-#include "tdict.h"
-#include "fdict.h"
-#include "wordid.h"
-#include "filelib.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static const size_t MAX_LINE_LENGTH = 100000;
-WordID kBOS, kEOS, kDIVIDER, kGAP;
-int kCOUNT;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("input,i", po::value<string>()->default_value("-"), "Input file")
-        ("phrases,p", po::value<string>(), "File contatining phrases of interest")
-        ("phrase_context_size,S", po::value<int>()->default_value(2), "Use this many words of context on left and write when writing base phrase contexts")
-        ("combiner_size,c", po::value<size_t>()->default_value(30000), "Number of unique items to store in cache before writing rule counts. Set to 1 to disable cache. Set to 0 for no limit.")
-        ("prune", po::value<size_t>()->default_value(0), "Prune items with count less than threshold; applies each time the cache is dumped.")
-        ("silent", "Write nothing to stderr except errors")
-        ("help,h", "Print this help message and exit");
-  po::options_description clo("Command line options");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  po::notify(*conf);
-
-  if (conf->count("help") || conf->count("input") != 1 || conf->count("phrases") != 1) {
-    cerr << "\nUsage: extractor_monolingual [-options]\n";
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct TrieNode
-{
-  TrieNode(int l) : finish(false), length(l) {};
-  ~TrieNode()
-  {
-    for (unordered_map<int, TrieNode*>::iterator
-         it = next.begin(); it != next.end(); ++it)
-      delete it->second;
-    next.clear();
-  }
-
-  TrieNode *follow(int token)
-  {
-    unordered_map<int, TrieNode*>::iterator
-      found = next.find(token);
-    if (found != next.end())
-      return found->second;
-    else
-      return 0;
-  }
-
-  void insert(const vector<int> &tokens)
-  {
-    insert(tokens.begin(), tokens.end());
-  }
-
-  void insert(vector<int>::const_iterator begin, vector<int>::const_iterator end)
-  {
-    if (begin == end)
-      finish = true;
-    else
-    {
-      int token = *begin;
-      unordered_map<int, TrieNode*>::iterator 
-        nit = next.find(token);
-      if (nit == next.end())
-        nit = next.insert(make_pair(token, new TrieNode(length+1))).first;
-      ++begin;
-      nit->second->insert(begin, end);
-    }
-  }
-
-  bool finish;
-  int length;
-  unordered_map<int, TrieNode*> next;
-};
-
-struct CountCombiner {
-  CountCombiner(const size_t& csize, const size_t& prune) : combiner_size(csize), threshold(prune) {
-    if (csize == 0) { cerr << "Using unlimited combiner cache.\n"; }
-  }
-  ~CountCombiner() {
-    if (!cache.empty()) WriteAndClearCache();
-  }
-
-  void Count(const vector<WordID>& key,
-             const vector<WordID>& val,
-             const int count_type)
-  {
-    if (combiner_size != 1) {
-      cache[key][val] += count_type;
-      if (combiner_size > 1 && cache.size() > combiner_size)
-        WriteAndClearCache();
-    } else {
-      cout << TD::GetString(key) << '\t' << TD::GetString(val) << " ||| C=" << count_type << "\n";
-    }
-  }
-
- private:
-  void WriteAndClearCache() {
-    for (unordered_map<vector<WordID>, Vec2PhraseCount, boost::hash<vector<WordID> > >::iterator it = cache.begin();
-         it != cache.end(); ++it) {
-      const Vec2PhraseCount& vals = it->second;
-      bool first = true;
-      for (Vec2PhraseCount::const_iterator vi = vals.begin(); vi != vals.end(); ++vi) 
-      {
-        if (threshold > 1 && combiner_size != 1 && vi->second < threshold)
-            continue;
-
-        if (!first) cout << " ||| "; 
-        else 
-        {
-            cout << TD::GetString(it->first) << '\t';
-            first = false;
-        }
-        cout << TD::GetString(vi->first) << " ||| C=" << vi->second;
-       }
-      if (!first)
-          cout << '\n';
-    }
-    cout << flush;
-    cache.clear();
-  }
-
-  const size_t combiner_size, threshold;
-  typedef unordered_map<vector<WordID>, int, boost::hash<vector<WordID> > > Vec2PhraseCount;
-  unordered_map<vector<WordID>, Vec2PhraseCount, boost::hash<vector<WordID> > > cache;
-};
-
-void WriteContext(const vector<int>& sentence, int start, int end, int ctx_size, CountCombiner &combiner) 
-{
-  vector<WordID> phrase, context;
-  for (int i = start; i < end; ++i)
-      phrase.push_back(sentence[i]);
-
-  for (int i = ctx_size; i > 0; --i)
-    context.push_back(sentence[start-i]);
-  context.push_back(kGAP);
-  for (int i = 0; i < ctx_size; ++i)
-    context.push_back(sentence[end+i]);
-
-  combiner.Count(phrase, context, 1);
-}
-
-inline bool IsWhitespace(char c) { 
-    return c == ' ' || c == '\t'; 
-}
-
-inline void SkipWhitespace(const char* buf, int* ptr) {
-  while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); }
-}
-
-vector<int> ReadSentence(const char *buf, int padding)
-{
-  int ptr = 0;
-  SkipWhitespace(buf, &ptr);
-  int start = ptr;
-  vector<int> sentence;
-  for (int i = 0; i < padding; ++i)
-    sentence.push_back(kBOS);
-
-  while (char c = buf[ptr])
-  {
-    if (!IsWhitespace(c)) 
-      ++ptr; 
-    else {
-      sentence.push_back(TD::Convert(string(buf, start, ptr-start)));
-      SkipWhitespace(buf, &ptr);
-      start = ptr;
-    }
-  }
-  for (int i = 0; i < padding; ++i)
-    sentence.push_back(kEOS);
-
-  return sentence;
-}
-
-int main(int argc, char** argv) 
-{
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  kBOS = TD::Convert("<s>");
-  kEOS = TD::Convert("</s>");
-  kDIVIDER = TD::Convert("|||");
-  kGAP = TD::Convert("<PHRASE>");
-  kCOUNT = FD::Convert("C");
-
-  bool silent = conf.count("silent") > 0;
-  const int ctx_size = conf["phrase_context_size"].as<int>();
-  CountCombiner cc(conf["combiner_size"].as<size_t>(), conf["prune"].as<size_t>());
-
-  char buf[MAX_LINE_LENGTH];
-  TrieNode phrase_trie(0);
-  ReadFile rpf(conf["phrases"].as<string>());
-  istream& pin = *rpf.stream();
-  while (pin) {
-      pin.getline(buf, MAX_LINE_LENGTH);
-      phrase_trie.insert(ReadSentence(buf, 0));
-  }
-
-  ReadFile rif(conf["input"].as<string>());
-  istream &iin = *rif.stream();
-  int line = 0;
-  while (iin) {
-    ++line;
-    iin.getline(buf, MAX_LINE_LENGTH);
-    //cout << "line: " << line << " '" << buf << "'" << endl;
-    if (buf[0] == 0) continue;
-    if (!silent) {
-      if (line % 200 == 0) cerr << '.';
-      if (line % 8000 == 0) cerr << " [" << line << "]\n" << flush;
-    }
-
-    vector<int> sentence = ReadSentence(buf, ctx_size);
-    //cout << "sentence: " << TD::GetString(sentence) << endl;
-    vector<TrieNode*> tries;
-    for (int i = ctx_size; i < (int)sentence.size() - ctx_size; ++i)
-    {
-      //cout << "i: " << i << " token: " << TD::Convert(sentence[i]) << " tries: " << tries.size() << endl;
-      vector<TrieNode*> tries_prime;
-      tries.push_back(&phrase_trie);
-      for (vector<TrieNode*>::iterator tit = tries.begin(); tit != tries.end(); ++tit)
-      {
-        TrieNode* next = (*tit)->follow(sentence[i]);
-        if (next != 0)
-        {
-          //cout << "\tfollowed edge: " << next->finish << endl;
-          if (next->finish)
-            WriteContext(sentence, i + 1 - next->length, i + 1, ctx_size, cc);
-          tries_prime.push_back(next);
-        }
-      }
-      swap(tries, tries_prime);
-    }
-    //cout << "/sentence" << endl;
-  }
-  if (!silent) cerr << endl;
-  return 0;
-}
diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc
deleted file mode 100644
index 78175202..00000000
--- a/extools/featurize_grammar.cc
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Featurize a grammar in striped format
- */
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <map>
-#include <vector>
-#include <utility>
-#include <cstdlib>
-#include <tr1/unordered_map>
-
-#include "lex_trans_tbl.h"
-#include "sparse_vector.h"
-#include "sentence_pair.h"
-#include "extract.h"
-#include "fdict.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "striped_grammar.h"
-
-#include <boost/tuple/tuple.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-using namespace std;
-using namespace std::tr1;
-using boost::shared_ptr;
-namespace po = boost::program_options;
-
-static string aligned_corpus;
-static const size_t MAX_LINE_LENGTH = 64000000;
-
-// Data structures for indexing and counting rules
-//typedef boost::tuple< WordID, vector<WordID>, vector<WordID> > RuleTuple;
-struct RuleTuple {
-  RuleTuple(const WordID& lhs, const vector<WordID>& s, const vector<WordID>& t)
-  : m_lhs(lhs), m_source(s), m_target(t) {
-    hash_value();
-    m_dirty = false;
-  }
-
-  size_t hash_value() const {
-//    if (m_dirty) {
-      size_t hash = 0;
-      boost::hash_combine(hash, m_lhs);
-      boost::hash_combine(hash, m_source);
-      boost::hash_combine(hash, m_target);
-//    }
-//    m_dirty = false;
-    return hash;
-  }
-
-  bool operator==(RuleTuple const& b) const
-  { return m_lhs == b.m_lhs && m_source == b.m_source && m_target == b.m_target; }
-
-  WordID& lhs() { m_dirty=true; return m_lhs; }
-  vector<WordID>& source() { m_dirty=true; return m_source; }
-  vector<WordID>& target() { m_dirty=true; return m_target; }
-  const WordID& lhs() const { return m_lhs; }
-  const vector<WordID>& source() const { return m_source; }
-  const vector<WordID>& target() const { return m_target; }
-
-//  mutable size_t m_hash;
-private:
-  WordID m_lhs;
-  vector<WordID> m_source, m_target;
-  mutable bool m_dirty;
-};
-std::size_t hash_value(RuleTuple const& b) { return b.hash_value(); }
-bool operator<(RuleTuple const& l, RuleTuple const& r) {
-  if (l.lhs() < r.lhs()) return true;
-  else if (l.lhs() == r.lhs()) {
-    if (l.source() < r.source()) return true;
-    else if (l.source() == r.source()) {
-      if (l.target() < r.target()) return true;
-    }
-  }
-  return false;
-}
-
-ostream& operator<<(ostream& o, RuleTuple const& r) {
-  o << "(" << r.lhs() << "-->" << "<";
-  for (vector<WordID>::const_iterator it=r.source().begin(); it!=r.source().end(); ++it)
-    o << TD::Convert(*it) << " ";
-  o << "|||";
-  for (vector<WordID>::const_iterator it=r.target().begin(); it!=r.target().end(); ++it)
-    o << " " << TD::Convert(*it);
-  o << ">)";
-  return o;
-}
-
-template <typename Key>
-struct FreqCount {
-  //typedef unordered_map<Key, int, boost::hash<Key> > Counts;
-  typedef map<Key, int> Counts;
-  Counts counts;
-
-  int inc(const Key& r, int c=1) {
-    pair<typename Counts::iterator,bool> itb
-      = counts.insert(make_pair(r,c));
-    if (!itb.second)
-      itb.first->second += c;
-    return itb.first->second;
-  }
-
-  int inc_if_exists(const Key& r, int c=1) {
-    typename Counts::iterator it = counts.find(r);
-    if (it != counts.end())
-      it->second += c;
-    return it->second;
-  }
-
-  int count(const Key& r) const {
-    typename Counts::const_iterator it = counts.find(r);
-    if (it == counts.end()) return 0;
-    return it->second;
-  }
-
-  int operator()(const Key& r) const { return count(r); }
-};
-typedef FreqCount<RuleTuple> RuleFreqCount;
-
-class FeatureExtractor;
-class FERegistry;
-struct FEFactoryBase {
-  virtual ~FEFactoryBase() {}
-  virtual boost::shared_ptr<FeatureExtractor> Create() const = 0;
-};
-
-
-class FERegistry {
-  friend class FEFactoryBase;
- public:
-  FERegistry() {}
-  boost::shared_ptr<FeatureExtractor> Create(const std::string& ffname) const {
-    map<string, boost::shared_ptr<FEFactoryBase> >::const_iterator it = reg_.find(ffname);
-    boost::shared_ptr<FeatureExtractor> res;
-    if (it == reg_.end()) {
-      cerr << "I don't know how to create feature " << ffname << endl;
-    } else {
-      res = it->second->Create();
-    }
-    return res;
-  }
-  void DisplayList(ostream* out) const {
-    bool first = true;
-    for (map<string, boost::shared_ptr<FEFactoryBase> >::const_iterator it = reg_.begin();
-        it != reg_.end(); ++it) {
-      if (first) {first=false;} else {*out << ' ';}
-      *out << it->first;
-    }
-  }
-
-  void Register(const std::string& ffname, FEFactoryBase* factory) {
-    if (reg_.find(ffname) != reg_.end()) {
-      cerr << "Duplicate registration of FeatureExtractor with name " << ffname << "!\n";
-      exit(1);
-    }
-    reg_[ffname].reset(factory);
-  }
-
- private:
-  std::map<std::string, boost::shared_ptr<FEFactoryBase> > reg_;
-};
-
-template<class FE>
-class FEFactory : public FEFactoryBase {
-  boost::shared_ptr<FeatureExtractor> Create() const {
-    return boost::shared_ptr<FeatureExtractor>(new FE);
-  }
-};
-
-void InitCommandLine(const FERegistry& r, int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  ostringstream feats;
-  feats << "[multiple] Features to extract (";
-  r.DisplayList(&feats);
-  feats << ")";
-  opts.add_options()
-        ("filtered_grammar,g", po::value<string>(), "Grammar to add features to")
-        ("list_features,L", "List extractable features")
-        ("feature,f", po::value<vector<string> >()->composing(), feats.str().c_str())
-        ("aligned_corpus,c", po::value<string>(), "Aligned corpus (single line format)")
-        ("help,h", "Print this help message and exit");
-  po::options_description clo("Command line options");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  po::notify(*conf);
-
-  if (conf->count("help") || conf->count("aligned_corpus")==0 || conf->count("feature") == 0) {
-    cerr << "\nUsage: featurize_grammar -g FILTERED-GRAMMAR.gz -c ALIGNED_CORPUS.fr-en-al -f Feat1 -f Feat2 ... < UNFILTERED-GRAMMAR\n";
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-static const bool DEBUG = false;
-
-void LexTranslationTable::createTTable(const char* buf){
-  AnnotatedParallelSentence sent;
-  sent.ParseInputLine(buf);
-
-  //iterate over the alignment to compute aligned words
-
-  for(int i =0;i<sent.aligned.width();i++)
-    {
-      for (int j=0;j<sent.aligned.height();j++)
-        {
-          if (DEBUG) cerr << sent.aligned(i,j) << " ";
-          if( sent.aligned(i,j))
-            {
-              if (DEBUG) cerr << TD::Convert(sent.f[i])  << " aligned to " << TD::Convert(sent.e[j]);
-              ++word_translation[pair<WordID,WordID> (sent.f[i], sent.e[j])];
-              ++total_foreign[sent.f[i]];
-              ++total_english[sent.e[j]];
-            }
-        }
-      if (DEBUG)  cerr << endl;
-    }
-  if (DEBUG) cerr << endl;
-
-  const WordID NULL_ = TD::Convert("NULL");
-  //handle unaligned words - align them to null
-  for (int j =0; j < sent.e_len; j++) {
-    if (sent.e_aligned[j]) continue;
-    ++word_translation[pair<WordID,WordID> (NULL_, sent.e[j])];
-    ++total_foreign[NULL_];
-    ++total_english[sent.e[j]];
-  }
-
-  for (int i =0; i < sent.f_len; i++) {
-    if (sent.f_aligned[i]) continue;
-    ++word_translation[pair<WordID,WordID> (sent.f[i], NULL_)];
-    ++total_english[NULL_];
-    ++total_foreign[sent.f[i]];
-  }
-}
-
-inline float safenlog(float v) {
-  if (v == 1.0f) return 0.0f;
-  float res = -log(v);
-  if (res > 100.0f) res = 100.0f;
-  return res;
-}
-
-static bool IsZero(float f) { return (f > 0.999 && f < 1.001); }
-
-struct FeatureExtractor {
-  // create any keys necessary
-  virtual void ObserveFilteredRule(const WordID /* lhs */,
-                                   const vector<WordID>& /* src */,
-                                   const vector<WordID>& /* trg */) {}
-
-  // compute statistics over keys, the same lhs-src-trg tuple may be seen
-  // more than once
-  virtual void ObserveUnfilteredRule(const WordID /* lhs */,
-                                     const vector<WordID>& /* src */,
-                                     const vector<WordID>& /* trg */,
-                                     const RuleStatistics& /* info */) {}
-
-  // compute features, a unique lhs-src-trg tuple will be seen exactly once
-  virtual void ExtractFeatures(const WordID lhs,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& info,
-                               SparseVector<float>* result) const = 0;
-
-  virtual ~FeatureExtractor() {}
-};
-
-struct LogRuleCount : public FeatureExtractor {
-  LogRuleCount() :
-    fid_(FD::Convert("LogRuleCount")),
-    sfid_(FD::Convert("SingletonRule")),
-    kCFE(FD::Convert("CFE")) {}
-  virtual void ExtractFeatures(const WordID lhs,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& info,
-                               SparseVector<float>* result) const {
-    (void) lhs; (void) src; (void) trg;
-    //result->set_value(fid_, log(info.counts.get(kCFE)));
-    result->set_value(fid_, log(info.counts.get(kCFE)));
-    if (IsZero(info.counts.get(kCFE)))
-      result->set_value(sfid_, 1);
-  }
-  const int fid_;
-  const int sfid_;
-  const int kCFE;
-};
-
-struct RulePenalty : public FeatureExtractor {
-  RulePenalty() : fid_(FD::Convert("RulePenalty")) {}
-  virtual void ExtractFeatures(const WordID /*lhs*/,
-                               const vector<WordID>& /*src*/,
-                               const vector<WordID>& /*trg*/,
-                               const RuleStatistics& /*info*/,
-                               SparseVector<float>* result) const
-  { result->set_value(fid_, 1); }
-
-  const int fid_;
-};
-
-// The negative log of the condition rule probs
-// ignoring the identities of the  non-terminals.
-// i.e. the prob Hiero would assign.
-// Also extracts Labelled features.
-struct XFeatures: public FeatureExtractor {
-  XFeatures() :
-    fid_xfe(FD::Convert("XFE")),
-    fid_xef(FD::Convert("XEF")),
-    fid_labelledfe(FD::Convert("LabelledFE")),
-    fid_labelledef(FD::Convert("LabelledEF")),
-    fid_xesingleton(FD::Convert("XE_Singleton")),
-    fid_xfsingleton(FD::Convert("XF_Singleton")),
-    kCFE(FD::Convert("CFE")) {}
-  virtual void ObserveFilteredRule(const WordID /*lhs*/,
-                                   const vector<WordID>& src,
-                                   const vector<WordID>& trg) {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    rule_counts.inc(r, 0);
-    source_counts.inc(r.source(), 0);
-    target_counts.inc(r.target(), 0);
-  }
-
-  virtual void ObserveUnfilteredRule(const WordID /*lhs*/,
-                                     const vector<WordID>& src,
-                                     const vector<WordID>& trg,
-                                     const RuleStatistics& info) {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    const int count = info.counts.get(kCFE);
-    assert(count > 0);
-    rule_counts.inc_if_exists(r, count);
-    source_counts.inc_if_exists(r.source(), count);
-    target_counts.inc_if_exists(r.target(), count);
-  }
-
-  virtual void ExtractFeatures(const WordID /*lhs*/,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& info,
-                               SparseVector<float>* result) const {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    double l_r_freq = log(rule_counts(r));
-
-    const int t_c = target_counts(r.target());
-    assert(t_c > 0);
-    result->set_value(fid_xfe, log(t_c) - l_r_freq);
-    result->set_value(fid_labelledfe, log(t_c) - log(info.counts.get(kCFE)));
-//    if (t_c == 1)
-//      result->set_value(fid_xesingleton, 1.0);
-
-    const int s_c = source_counts(r.source());
-    assert(s_c > 0);
-    result->set_value(fid_xef, log(s_c) - l_r_freq);
-    result->set_value(fid_labelledef, log(s_c) - log(info.counts.get(kCFE)));
-//    if (s_c == 1)
-//      result->set_value(fid_xfsingleton, 1.0);
-  }
-
-  void map_rule(RuleTuple& r) const {
-    vector<WordID> indexes; int i=0;
-    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it) {
-      if (*it <= 0)
-        indexes.push_back(*it);
-    }
-    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it) {
-      if (*it <= 0)
-        *it = indexes.at(i++);
-    }
-  }
-
-  const int fid_xfe, fid_xef;
-  const int fid_labelledfe, fid_labelledef;
-  const int fid_xesingleton, fid_xfsingleton;
-  const int kCFE;
-  RuleFreqCount rule_counts;
-  FreqCount< vector<WordID> > source_counts, target_counts;
-};
-
-
-struct LabelledRuleConditionals: public FeatureExtractor {
-  LabelledRuleConditionals() :
-    fid_fe(FD::Convert("LabelledFE")),
-    fid_ef(FD::Convert("LabelledEF")),
-    kCFE(FD::Convert("CFE")) {}
-  virtual void ObserveFilteredRule(const WordID lhs,
-                                   const vector<WordID>& src,
-                                   const vector<WordID>& trg) {
-    RuleTuple r(lhs, src, trg);
-    rule_counts.inc(r, 0);
-    source_counts.inc(r.source(), 0);
-
-    target_counts.inc(r.target(), 0);
-  }
-
-  virtual void ObserveUnfilteredRule(const WordID lhs,
-                                     const vector<WordID>& src,
-                                     const vector<WordID>& trg,
-                                     const RuleStatistics& info) {
-    RuleTuple r(lhs, src, trg);
-    rule_counts.inc_if_exists(r, info.counts.get(kCFE));
-    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE));
-
-    target_counts.inc_if_exists(r.target(), info.counts.get(kCFE));
-  }
-
-  virtual void ExtractFeatures(const WordID lhs,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& /*info*/,
-                               SparseVector<float>* result) const {
-    RuleTuple r(lhs, src, trg);
-    double l_r_freq = log(rule_counts(r));
-    result->set_value(fid_fe, log(target_counts(r.target())) - l_r_freq);
-    result->set_value(fid_ef, log(source_counts(r.source())) - l_r_freq);
-  }
-
-  const int fid_fe, fid_ef;
-  const int kCFE;
-  RuleFreqCount rule_counts;
-  FreqCount< vector<WordID> > source_counts, target_counts;
-};
-
-struct LHSProb: public FeatureExtractor {
-  LHSProb() : fid_(FD::Convert("LHSProb")), kCFE(FD::Convert("CFE")), total_count(0) {}
-
-  virtual void ObserveUnfilteredRule(const WordID lhs,
-                                     const vector<WordID>& /*src*/,
-                                     const vector<WordID>& /*trg*/,
-                                     const RuleStatistics& info) {
-    int count = info.counts.get(kCFE);
-    total_count += count;
-    lhs_counts.inc(lhs, count);
-  }
-
-  virtual void ExtractFeatures(const WordID lhs,
-                               const vector<WordID>& /*src*/,
-                               const vector<WordID>& /*trg*/,
-                               const RuleStatistics& /*info*/,
-                               SparseVector<float>* result) const {
-    double lhs_log_prob =  log(total_count) - log(lhs_counts(lhs));
-    result->set_value(fid_, lhs_log_prob);
-  }
-
-  const int fid_;
-  const int kCFE;
-  int total_count;
-  FreqCount<WordID> lhs_counts;
-};
-
-// Proper rule generative probability: p( s,t | lhs)
-struct GenerativeProb: public FeatureExtractor {
-  GenerativeProb() :
-    fid_(FD::Convert("GenerativeProb")),
-    kCFE(FD::Convert("CFE")) {}
-
-  virtual void ObserveUnfilteredRule(const WordID lhs,
-                                     const vector<WordID>& /*src*/,
-                                     const vector<WordID>& /*trg*/,
-                                     const RuleStatistics& info)
-  { lhs_counts.inc(lhs, info.counts.get(kCFE)); }
-
-  virtual void ExtractFeatures(const WordID lhs,
-                               const vector<WordID>& /*src*/,
-                               const vector<WordID>& /*trg*/,
-                               const RuleStatistics& info,
-                               SparseVector<float>* result) const {
-    double log_prob = log(lhs_counts(lhs)) - log(info.counts.get(kCFE));
-    result->set_value(fid_, log_prob);
-  }
-
-  const int fid_;
-  const int kCFE;
-  FreqCount<WordID> lhs_counts;
-};
-
-// remove terminals from the rules before estimating the conditional prob
-struct LabellingShape: public FeatureExtractor {
-  LabellingShape() : fid_(FD::Convert("LabellingShape")), kCFE(FD::Convert("CFE")) {}
-
-  virtual void ObserveFilteredRule(const WordID /*lhs*/,
-                                   const vector<WordID>& src,
-                                   const vector<WordID>& trg) {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    rule_counts.inc(r, 0);
-    source_counts.inc(r.source(), 0);
-  }
-
-  virtual void ObserveUnfilteredRule(const WordID /*lhs*/,
-                                     const vector<WordID>& src,
-                                     const vector<WordID>& trg,
-                                     const RuleStatistics& info) {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    rule_counts.inc_if_exists(r, info.counts.get(kCFE));
-    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE));
-  }
-
-  virtual void ExtractFeatures(const WordID /*lhs*/,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& /*info*/,
-                               SparseVector<float>* result) const {
-    RuleTuple r(-1, src, trg);
-    map_rule(r);
-    double l_r_freq = log(rule_counts(r));
-    result->set_value(fid_, log(source_counts(r.source())) - l_r_freq);
-  }
-
-  // Replace all terminals with generic -1
-  void map_rule(RuleTuple& r) const {
-    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it)
-      if (*it <= 0) *it = -1;
-    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it)
-      if (*it <= 0) *it = -1;
-  }
-
-  const int fid_, kCFE;
-  RuleFreqCount rule_counts;
-  FreqCount< vector<WordID> > source_counts;
-};
-
-
-// this extracts the lexical translation prob features
-// in BOTH directions.
-struct LexProbExtractor : public FeatureExtractor {
-  LexProbExtractor() :
-      e2f_(FD::Convert("LexE2F")), f2e_(FD::Convert("LexF2E")) {
-    ReadFile rf(aligned_corpus);
-    //create lexical translation table
-    cerr << "Computing lexical translation probabilities from " << aligned_corpus << "..." << endl;
-    char* buf = new char[MAX_LINE_LENGTH];
-    istream& alignment = *rf.stream();
-    while(alignment) {
-      alignment.getline(buf, MAX_LINE_LENGTH);
-      if (buf[0] == 0) continue;
-      table.createTTable(buf);
-    }
-    delete[] buf;
-  }
-
-  virtual void ExtractFeatures(const WordID /*lhs*/,
-                               const vector<WordID>& src,
-                               const vector<WordID>& trg,
-                               const RuleStatistics& info,
-                               SparseVector<float>* result) const {
-    map <WordID, pair<int, float> > foreign_aligned;
-    map <WordID, pair<int, float> > english_aligned;
-
-    //Loop over all the alignment points to compute lexical translation probability
-    const vector< pair<short,short> >& al = info.aligns;
-    vector< pair<short,short> >::const_iterator ita;
-    for (ita = al.begin(); ita != al.end(); ++ita) {
-            if (DEBUG) {
-              cerr << "\nA:" << ita->first << "," << ita->second << "::";
-              cerr <<  TD::Convert(src[ita->first]) << "-" << TD::Convert(trg[ita->second]);
-            }
-
-            //Lookup this alignment probability in the table
-            int temp = table.word_translation[pair<WordID,WordID> (src[ita->first],trg[ita->second])];
-            float f2e=0, e2f=0;
-            if ( table.total_foreign[src[ita->first]] != 0)
-              f2e = (float) temp / table.total_foreign[src[ita->first]];
-            if ( table.total_english[trg[ita->second]] !=0 )
-              e2f = (float) temp / table.total_english[trg[ita->second]];
-            if (DEBUG) printf (" %d %E %E\n", temp, f2e, e2f);
-
-            //local counts to keep track of which things haven't been aligned, to later compute their null alignment
-            if (foreign_aligned.count(src[ita->first])) {
-              foreign_aligned[ src[ita->first] ].first++;
-              foreign_aligned[ src[ita->first] ].second += e2f;
-            } else {
-              foreign_aligned[ src[ita->first] ] = pair<int,float> (1,e2f);
-            }
-
-            if (english_aligned.count( trg[ ita->second] )) {
-               english_aligned[ trg[ ita->second] ].first++;
-               english_aligned[ trg[ ita->second] ].second += f2e;
-            } else {
-               english_aligned[ trg[ ita->second] ] = pair<int,float> (1,f2e);
-            }
-          }
-
-          float final_lex_f2e=1, final_lex_e2f=1;
-          static const WordID NULL_ = TD::Convert("NULL");
-
-          //compute lexical weight P(F|E) and include unaligned foreign words
-           for(int i=0;i<src.size(); i++) {
-               if (!table.total_foreign.count(src[i])) continue;      //if we dont have it in the translation table, we won't know its lexical weight
-
-               if (foreign_aligned.count(src[i]))
-                 {
-                   pair<int, float> temp_lex_prob = foreign_aligned[src[i]];
-                   final_lex_e2f *= temp_lex_prob.second / temp_lex_prob.first;
-                 }
-               else //dealing with null alignment
-                 {
-                   int temp_count = table.word_translation[pair<WordID,WordID> (src[i],NULL_)];
-                   float temp_e2f = (float) temp_count / table.total_english[NULL_];
-                   final_lex_e2f *= temp_e2f;
-                 }
-
-             }
-
-           //compute P(E|F) unaligned english words
-           for(int j=0; j< trg.size(); j++) {
-               if (!table.total_english.count(trg[j])) continue;
-
-               if (english_aligned.count(trg[j]))
-                 {
-                   pair<int, float> temp_lex_prob = english_aligned[trg[j]];
-                   final_lex_f2e *= temp_lex_prob.second / temp_lex_prob.first;
-                 }
-               else //dealing with null
-                 {
-                   int temp_count = table.word_translation[pair<WordID,WordID> (NULL_,trg[j])];
-                   float temp_f2e = (float) temp_count / table.total_foreign[NULL_];
-                   final_lex_f2e *= temp_f2e;
-                 }
-           }
-     result->set_value(e2f_, safenlog(final_lex_e2f));
-     result->set_value(f2e_, safenlog(final_lex_f2e));
-  }
-  const int e2f_, f2e_;
-  mutable LexTranslationTable table;
-};
-
-struct Featurizer {
-  Featurizer(const vector<boost::shared_ptr<FeatureExtractor> >& ex) : extractors(ex) {
-  }
-  void Callback1(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
-    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
-      for (int i = 0; i < extractors.size(); ++i)
-        extractors[i]->ObserveFilteredRule(lhs, src, it->first);
-    }
-  }
-  void Callback2(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
-    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
-      for (int i = 0; i < extractors.size(); ++i)
-        extractors[i]->ObserveUnfilteredRule(lhs, src, it->first, it->second);
-    }
-  }
-  void Callback3(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
-    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
-      SparseVector<float> feats;
-      for (int i = 0; i < extractors.size(); ++i)
-        extractors[i]->ExtractFeatures(lhs, src, it->first, it->second, &feats);
-      cout << '[' << TD::Convert(-lhs) << "] ||| ";
-      WriteNamed(src, &cout);
-      cout << " ||| ";
-      WriteAnonymous(it->first, &cout);
-      cout << " ||| ";
-      print(cout,feats,"=");
-      cout << endl;
-    }
-  }
- private:
-  vector<boost::shared_ptr<FeatureExtractor> > extractors;
-};
-
-void cb1(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
-  static_cast<Featurizer*>(extra)->Callback1(lhs, src_rhs, rules);
-}
-
-void cb2(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
-  static_cast<Featurizer*>(extra)->Callback2(lhs, src_rhs, rules);
-}
-
-void cb3(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
-  static_cast<Featurizer*>(extra)->Callback3(lhs, src_rhs, rules);
-}
-
-int main(int argc, char** argv){
-  FERegistry reg;
-  reg.Register("LogRuleCount", new FEFactory<LogRuleCount>);
-  reg.Register("LexProb", new FEFactory<LexProbExtractor>);
-  reg.Register("XFeatures", new FEFactory<XFeatures>);
-  reg.Register("LabelledRuleConditionals", new FEFactory<LabelledRuleConditionals>);
-  reg.Register("RulePenalty", new FEFactory<RulePenalty>);
-  reg.Register("LHSProb", new FEFactory<LHSProb>);
-  reg.Register("LabellingShape", new FEFactory<LabellingShape>);
-  reg.Register("GenerativeProb", new FEFactory<GenerativeProb>);
-  po::variables_map conf;
-  InitCommandLine(reg, argc, argv, &conf);
-  aligned_corpus = conf["aligned_corpus"].as<string>();  // GLOBAL VAR
-  ReadFile fg1(conf["filtered_grammar"].as<string>());
-
-  vector<string> feats = conf["feature"].as<vector<string> >();
-  vector<boost::shared_ptr<FeatureExtractor> > extractors(feats.size());
-  for (int i = 0; i < feats.size(); ++i)
-    extractors[i] = reg.Create(feats[i]);
-  Featurizer fizer(extractors);
-
-  cerr << "Reading filtered grammar to detect keys..." << endl;
-  StripedGrammarLexer::ReadStripedGrammar(fg1.stream(), cb1, &fizer);
-
-  cerr << "Reading unfiltered grammar..." << endl;
-  StripedGrammarLexer::ReadStripedGrammar(&cin, cb2, &fizer);
-
-  ReadFile fg2(conf["filtered_grammar"].as<string>());
-  cerr << "Reading filtered grammar and adding features..." << endl;
-  StripedGrammarLexer::ReadStripedGrammar(fg2.stream(), cb3, &fizer);
-
-  return 0;
-}
-
diff --git a/extools/filter_grammar.cc b/extools/filter_grammar.cc
deleted file mode 100644
index cafcc923..00000000
--- a/extools/filter_grammar.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Filter a grammar in striped format
- */
-#include <iostream>
-#include <string>
-#include <map>
-#include <vector>
-#include <utility>
-#include <tr1/unordered_map>
-
-#include "suffix_tree.h"
-#include "sparse_vector.h"
-#include "sentence_pair.h"
-#include "extract.h"
-#include "fdict.h"
-#include "tdict.h"
-#include "filelib.h"
-#include "striped_grammar.h"
-
-#include <boost/shared_ptr.hpp>
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static const size_t MAX_LINE_LENGTH = 64000000;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("test_set,t", po::value<string>(), "Filter for this test set")
-        ("top_e_given_f,n", po::value<size_t>()->default_value(30), "Keep top N rules, according to p(e|f). 0 for all")
-        ("help,h", "Print this help message and exit");
-  po::options_description clo("Command line options");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  po::notify(*conf);
-
-  if (conf->count("help") || conf->count("test_set")==0) {
-    cerr << "\nUsage: filter_grammar -t TEST-SET.fr [-options] < grammar\n";
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct SourceFilter {
-  // return true to keep the rule, otherwise false
-  virtual bool Matches(const vector<WordID>& key) const = 0;
-  virtual ~SourceFilter() {}
-};
-
-struct DumbSuffixTreeFilter : SourceFilter {
-  DumbSuffixTreeFilter(const string& corpus) {
-    cerr << "Build suffix tree from test set in " << corpus << endl;
-    assert(FileExists(corpus));
-    ReadFile rfts(corpus);
-    istream& testSet = *rfts.stream();
-    char* buf = new char[MAX_LINE_LENGTH];
-    AnnotatedParallelSentence sent;
-
-    /* process the data set to build suffix tree
-     */
-    while(!testSet.eof()) {
-      testSet.getline(buf, MAX_LINE_LENGTH);
-      if (buf[0] == 0) continue;
-
-      //hack to read in the test set using AnnotatedParallelSentence
-      strcat(buf," ||| fake ||| 0-0");
-      sent.ParseInputLine(buf);
-
-      //add each successive suffix to the tree
-      for(int i=0; i<sent.f_len; i++)
-        root.InsertPath(sent.f, i, sent.f_len - 1);
-    }
-    delete[] buf;
-  }
-  virtual bool Matches(const vector<WordID>& src_rhs) const {
-    const Node<int>* curnode = &root;
-    for(int i=0; i < src_rhs.size(); i++) {
-      if (src_rhs[i] <= 0) {
-        curnode = &root;
-      } else if (curnode) {
-        curnode = curnode->Extend(src_rhs[i]);
-        if (!curnode) return false;
-      }
-    }
-    return true;
-  }
-  Node<int> root;
-};
-
-boost::shared_ptr<SourceFilter> filter;
-multimap<float, ID2RuleStatistics::const_iterator> options;
-int kCOUNT;
-int max_options;
-
-void cb(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void*) {
-  options.clear();
-  if (!filter || filter->Matches(src_rhs)) {
-    for (ID2RuleStatistics::const_iterator it = rules.begin(); it != rules.end(); ++it) {
-      options.insert(make_pair(-it->second.counts.get(kCOUNT), it));
-    }
-    int ocount = 0;
-    cout << '[' << TD::Convert(-lhs) << ']' << " ||| ";
-    WriteNamed(src_rhs, &cout);
-    cout << '\t';
-    bool first = true;
-    for (multimap<float,ID2RuleStatistics::const_iterator>::iterator it = options.begin(); it != options.end(); ++it) {
-      if (first) { first = false; } else { cout << " ||| "; }
-      WriteAnonymous(it->second->first, &cout);
-      cout << " ||| " << it->second->second;
-      ++ocount;
-      if (ocount == max_options) break;
-    }
-    cout << endl;
-  }
-}
-
-int main(int argc, char** argv){
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  max_options = conf["top_e_given_f"].as<size_t>();;
-  kCOUNT = FD::Convert("CFE");
-  istream& unscored_grammar = cin;
-  cerr << "Loading test set " << conf["test_set"].as<string>() << "...\n";
-  filter.reset(new DumbSuffixTreeFilter(conf["test_set"].as<string>()));
-  cerr << "Filtering...\n";
-  StripedGrammarLexer::ReadStripedGrammar(&unscored_grammar, cb, NULL);
-}
-
diff --git a/extools/lex_trans_tbl.h b/extools/lex_trans_tbl.h
deleted file mode 100644
index 161b4a0d..00000000
--- a/extools/lex_trans_tbl.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * lex_trans_tbl.h
- *
- *  Created on: May 25, 2010
- *      Author: Vlad
- */
-
-#ifndef LEX_TRANS_TBL_H_
-#define LEX_TRANS_TBL_H_
-
-#include "wordid.h"
-#include <map>
-
-class LexTranslationTable
-{
- public:
-
-  std::map < std::pair<WordID,WordID>,int > word_translation;
-  std::map <WordID, int> total_foreign;
-  std::map <WordID, int> total_english;
-  void createTTable(const char* buf);
-  
-};
-
-#endif /* LEX_TRANS_TBL_H_ */
diff --git a/extools/merge_lines.pl b/extools/merge_lines.pl
deleted file mode 100755
index 8711e4ce..00000000
--- a/extools/merge_lines.pl
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-if (scalar @ARGV < 2) {
-  die "Usage: $0 file1.txt file2.txt ...\n\n  Concatenate the nth line of each input file. All files\n  must be the same length.\n\n";
-}
-
-my @fhs=();
-for my $file (@ARGV) {
-  my $fh;
-  open $fh, "<$file" or die "Can't read $file: $!\n";
-  push @fhs, $fh;
-}
-
-my $first = shift @fhs;
-
-while(my $x = <$first>) {
-  my $ind = 0;
-  chomp $x;
-  my @fields = ($x);
-  for my $fh (@fhs) {
-    $ind++;
-    $x = <$fh>;
-    die "ERROR: Mismatched number of lines: $ARGV[$ind]\n" unless $x;
-    chomp $x;
-    push @fields, $x;
-  }
-  print join ' ||| ', @fields;
-  print "\n";
-}
-my $ind = 0;
-for my $fh (@fhs) {
-  $ind++;
-  my $x=<$fh>;
-  die "ERROR: $ARGV[$ind] has extra lines!\n" if $x;
-}
-
-exit 0;
-
-for my $fh (@fhs) {
-  close $fh;
-}
-
diff --git a/extools/mr_stripe_rule_reduce.cc b/extools/mr_stripe_rule_reduce.cc
deleted file mode 100644
index c9b2eb2a..00000000
--- a/extools/mr_stripe_rule_reduce.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <utility>
-#include <cstdlib>
-#include <tr1/unordered_map>
-
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "striped_grammar.h"
-#include "tdict.h"
-#include "sentence_pair.h"
-#include "fdict.h"
-#include "extract.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-static const size_t MAX_LINE_LENGTH = 64000000;
-
-bool use_hadoop_counters = false;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("phrase_marginals,p", "Compute phrase marginals")
-	("use_hadoop_counters,C", "Enable this if running inside Hadoop")
-        ("bidir,b", "Rules are tagged as being F->E or E->F, invert E rules in output")
-        ("help,h", "Print this help message and exit");
-  po::options_description clo("Command line options");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  po::notify(*conf);
-
-  if (conf->count("help")) {
-    cerr << "\nUsage: mr_stripe_rule_reduce [-options]\n";
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void PlusEquals(const ID2RuleStatistics& v, ID2RuleStatistics* self) {
-  for (ID2RuleStatistics::const_iterator it = v.begin(); it != v.end(); ++it) {
-    RuleStatistics& dest = (*self)[it->first];
-    dest += it->second;
-    // TODO - do something smarter about alignments?
-    if (dest.aligns.empty() && !it->second.aligns.empty())
-      dest.aligns = it->second.aligns;
-  }
-}
-
-void WriteKeyValue(const vector<WordID>& key, const ID2RuleStatistics& val) {
-  cout << TD::GetString(key) << '\t';
-  bool needdiv = false;
-  for (ID2RuleStatistics::const_iterator it = val.begin(); it != val.end(); ++it) {
-    if (needdiv) cout << " ||| "; else needdiv = true;
-    cout << TD::GetString(it->first) << " ||| " << it->second;
-  }
-  cout << endl;
-  if (use_hadoop_counters) cerr << "reporter:counter:UserCounters,RuleCount," << val.size() << endl;
-}
-
-void DoPhraseMarginals(const vector<WordID>& key, const bool bidir, ID2RuleStatistics* val) {
-  static const WordID kF = TD::Convert("F");
-  static const WordID kE = TD::Convert("E");
-  static const int kCF = FD::Convert("CF");
-  static const int kCE = FD::Convert("CE");
-  static const int kCFE = FD::Convert("CFE");
-  assert(key.size() > 0);
-  int cur_marginal_id = kCF;
-  if (bidir) {
-    if (key[0] != kF && key[0] != kE) {
-      cerr << "DoPhraseMarginals expects keys to have the from 'F|E [NT] word word word'\n";
-      cerr << "  but got: " << TD::GetString(key) << endl;
-      exit(1);
-    }
-    if (key[0] == kE) cur_marginal_id = kCE;
-  }
-  double tot = 0;
-  for (ID2RuleStatistics::iterator it = val->begin(); it != val->end(); ++it)
-    tot += it->second.counts.get(kCFE);
-  for (ID2RuleStatistics::iterator it = val->begin(); it != val->end(); ++it) {
-    it->second.counts.set_value(cur_marginal_id, tot);
-
-    // prevent double counting of the joint
-    if (cur_marginal_id == kCE) it->second.counts.erase(kCFE);
-  }
-}
-
-void WriteWithInversions(const vector<WordID>& key, const ID2RuleStatistics& val) {
-  static const WordID kE = TD::Convert("E");
-  static const WordID kDIV = TD::Convert("|||");
-  vector<WordID> new_key(key.size() - 1);
-  for (int i = 1; i < key.size(); ++i)
-    new_key[i - 1] = key[i];
-  const bool do_invert = (key[0] == kE);
-  if (!do_invert) {
-    WriteKeyValue(new_key, val);
-  } else {
-    ID2RuleStatistics inv;
-    assert(new_key.size() > 2);
-    vector<WordID> tk(new_key.size() - 2);
-    for (int i = 0; i < tk.size(); ++i)
-      tk[i] = new_key[2 + i];
-    RuleStatistics& inv_stats = inv[tk];
-    for (ID2RuleStatistics::const_iterator it = val.begin(); it != val.end(); ++it) {
-      inv_stats.counts = it->second.counts;
-      vector<WordID> ekey(2 + it->first.size());
-      ekey[0] = key[1];
-      ekey[1] = kDIV;
-      for (int i = 0; i < it->first.size(); ++i)
-        ekey[2+i] = it->first[i];
-      WriteKeyValue(ekey, inv);
-    }
-  }
-}
-
-struct Reducer {
-  Reducer(bool phrase_marginals, bool bidir) : pm_(phrase_marginals), bidir_(bidir) {}
-
-  void ProcessLine(const vector<WordID>& key, const ID2RuleStatistics& rules) {
-    if (cur_key_ != key) {
-      if (cur_key_.size() > 0) Emit();
-      acc_.clear();
-      cur_key_ = key;
-    }
-    PlusEquals(rules, &acc_);
-  }
-
-  ~Reducer() {
-    Emit();
-  }
-
-  void Emit() {
-    if (pm_)
-      DoPhraseMarginals(cur_key_, bidir_, &acc_);
-    if (bidir_)
-      WriteWithInversions(cur_key_, acc_);
-    else
-      WriteKeyValue(cur_key_, acc_);
-  }
-
-  const bool pm_;
-  const bool bidir_;
-  vector<WordID> cur_key_;
-  ID2RuleStatistics acc_;
-};
-
-void cb(const vector<WordID>& key, const ID2RuleStatistics& contexts, void* red) {
-  static_cast<Reducer*>(red)->ProcessLine(key, contexts);
-}
-
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-
-  char* buf = new char[MAX_LINE_LENGTH];
-  vector<WordID> key, cur_key;
-  int line = 0;
-  use_hadoop_counters = conf.count("use_hadoop_counters") > 0;
-  const bool phrase_marginals = conf.count("phrase_marginals") > 0;
-  const bool bidir = conf.count("bidir") > 0;
-  Reducer reducer(phrase_marginals, bidir);
-  StripedGrammarLexer::ReadContexts(&cin, cb, &reducer);
-  return 0;
-}
-
diff --git a/extools/score_grammar.cc b/extools/score_grammar.cc
deleted file mode 100644
index 0945e018..00000000
--- a/extools/score_grammar.cc
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Score a grammar in striped format
- * ./score_grammar <alignment> < filtered.grammar > scored.grammar
- */
-#include <iostream>
-#include <string>
-#include <map>
-#include <vector>
-#include <utility>
-#include <cstdlib>
-#include <fstream>
-#include <tr1/unordered_map>
-
-#include "sentence_pair.h"
-#include "extract.h"
-#include "fdict.h"
-#include "tdict.h"
-#include "lex_trans_tbl.h"
-#include "filelib.h"
-
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-using namespace std;
-using namespace std::tr1;
-
-
-static const size_t MAX_LINE_LENGTH = 64000000;
-
-typedef unordered_map<vector<WordID>, RuleStatistics, boost::hash<vector<WordID> > > ID2RuleStatistics;
-
-
-namespace {
-  inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
-  inline bool IsBracket(char c){return c == '[' || c == ']';}
-  inline void SkipWhitespace(const char* buf, int* ptr) {
-    while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); }
-  }
-}
-
-int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end, vector<WordID>* p) {
-  static const WordID kDIV = TD::Convert("|||");
-  int ptr = sstart;
-  while(ptr < end) {
-    while(ptr < end && IsWhitespace(buf[ptr])) { ++ptr; }
-    int start = ptr;
-    while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; }
-    if (ptr == start) {cerr << "Warning! empty token.\n"; return ptr; }
-    const WordID w = TD::Convert(string(buf, start, ptr - start));
-
-    if((IsBracket(buf[start]) and IsBracket(buf[ptr-1])) or( w == kDIV))
-      p->push_back(1 * w);
-    else {
-      if (w == kDIV) return ptr;
-      p->push_back(w);
-    }
-  }
-  return ptr;
-}
-
-
-void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* counts) {
-  static const WordID kDIV = TD::Convert("|||");
-  counts->clear();
-  int ptr = 0;
-  while(buf[ptr] != 0 && buf[ptr] != '\t') { ++ptr; }
-  if (buf[ptr] != '\t') {
-    cerr << "Missing tab separator between key and value!\n INPUT=" << buf << endl;
-    exit(1);
-  }
-  cur_key->clear();
-  // key is: "[X] ||| word word word"
-  int tmpp = ReadPhraseUntilDividerOrEnd(buf, 0, ptr, cur_key);
-  cur_key->push_back(kDIV);
-  ReadPhraseUntilDividerOrEnd(buf, tmpp, ptr, cur_key);
-  ++ptr;
-  int start = ptr;
-  int end = ptr;
-  int state = 0; // 0=reading label, 1=reading count
-  vector<WordID> name;
-  while(buf[ptr] != 0) {
-    while(buf[ptr] != 0 && buf[ptr] != '|') { ++ptr; }
-    if (buf[ptr] == '|') {
-      ++ptr;
-      if (buf[ptr] == '|') {
-        ++ptr;
-        if (buf[ptr] == '|') {
-          ++ptr;
-          end = ptr - 3;
-          while (end > start && IsWhitespace(buf[end-1])) { --end; }
-          if (start == end) {
-            cerr << "Got empty token!\n  LINE=" << buf << endl;
-            exit(1);
-          }
-          switch (state) {
-            case 0: ++state; name.clear(); ReadPhraseUntilDividerOrEnd(buf, start, end, &name); break;
-            case 1: --state; (*counts)[name].ParseRuleStatistics(buf, start, end); break;
-            default: cerr << "Can't happen\n"; abort();
-          }
-          SkipWhitespace(buf, &ptr);
-          start = ptr;
-        }
-      }
-    }
-  }
-  end=ptr;
-  while (end > start && IsWhitespace(buf[end-1])) { --end; }
-  if (end > start) {
-    switch (state) {
-      case 0: ++state; name.clear(); ReadPhraseUntilDividerOrEnd(buf, start, end, &name); break;
-      case 1: --state; (*counts)[name].ParseRuleStatistics(buf, start, end); break;
-      default: cerr << "Can't happen\n"; abort();
-    }
-  }
-}
-
-
-
-void LexTranslationTable::createTTable(const char* buf){
-
-  bool DEBUG = false;
-
-  AnnotatedParallelSentence sent;
-      
-  sent.ParseInputLine(buf);
-      
-  //iterate over the alignment to compute aligned words
-  
-  for(int i =0;i<sent.aligned.width();i++)
-    {
-      for (int j=0;j<sent.aligned.height();j++)
-	{
-	  if (DEBUG) cerr << sent.aligned(i,j) << " ";
-	  if( sent.aligned(i,j))
-	    {
-	      if (DEBUG) cerr << TD::Convert(sent.f[i])  << " aligned to " << TD::Convert(sent.e[j]);
-	      ++word_translation[pair<WordID,WordID> (sent.f[i], sent.e[j])];
-	      ++total_foreign[sent.f[i]];
-	      ++total_english[sent.e[j]];
-	    }
-	}
-      if (DEBUG)  cerr << endl;
-    }
-  if (DEBUG) cerr << endl;
-  
-  static const WordID NULL_ = TD::Convert("NULL");
-  //handle unaligned words - align them to null
-  for (int j =0; j < sent.e_len; j++)
-    {
-      if (sent.e_aligned[j]) continue;
-      ++word_translation[pair<WordID,WordID> (NULL_, sent.e[j])];
-      ++total_foreign[NULL_];
-      ++total_english[sent.e[j]];
-    }
-  
-  for (int i =0; i < sent.f_len; i++)
-    {
-      if (sent.f_aligned[i]) continue;
-      ++word_translation[pair<WordID,WordID> (sent.f[i], NULL_)];
-      ++total_english[NULL_];
-      ++total_foreign[sent.f[i]];
-    }
- 
-}
-
-
-inline float safenlog(float v) {
-  if (v == 1.0f) return 0.0f;
-  float res = -log(v);
-  if (res > 100.0f) res = 100.0f;
-  return res;
-}
-
-int main(int argc, char** argv){
-  bool DEBUG= false;
-  if (argc != 2) {
-    cerr << "Usage: " << argv[0] << " corpus.al < filtered.grammar\n";
-    return 1;
-  }
-  ifstream alignment (argv[1]);
-  istream& unscored_grammar = cin;
-  ostream& scored_grammar = cout;
-
-  //create lexical translation table
-  cerr << "Creating table..." << endl;
-  char* buf = new char[MAX_LINE_LENGTH];
-
-  LexTranslationTable table;
-
-  while(!alignment.eof())
-    {
-      alignment.getline(buf, MAX_LINE_LENGTH);
-      if (buf[0] == 0) continue;
-      
-      table.createTTable(buf);      	
-    }
-  
-  bool PRINT_TABLE=false;
-  if (PRINT_TABLE)
-    {
-      ofstream trans_table;
-      trans_table.open("lex_trans_table.out");
-      for(map < pair<WordID,WordID>,int >::iterator it = table.word_translation.begin(); it != table.word_translation.end(); ++it)
-      {
-	trans_table <<  TD::Convert(it->first.first) <<  "|||" << TD::Convert(it->first.second) << "==" << it->second << "//" << table.total_foreign[it->first.first] << "//" << table.total_english[it->first.second] << endl;
-      } 
-
-      trans_table.close();
-    }
-  
- 
-  //score unscored grammar
-  cerr <<"Scoring grammar..." << endl;
-
-  ID2RuleStatistics acc, cur_counts;
-  vector<WordID> key, cur_key,temp_key;
-  vector< pair<short,short> > al;
-  vector< pair<short,short> >::iterator ita;
-  int line = 0;
-
-  static const int kCF = FD::Convert("CF");
-  static const int kCE = FD::Convert("CE");
-  static const int kCFE = FD::Convert("CFE");	
-
-  while(!unscored_grammar.eof())
-    {
-      ++line;
-      unscored_grammar.getline(buf, MAX_LINE_LENGTH);
-      if (buf[0] == 0) continue;
-      ParseLine(buf, &cur_key, &cur_counts);
-      
-      //loop over all the Target side phrases that this source aligns to
-      for (ID2RuleStatistics::const_iterator it = cur_counts.begin(); it != cur_counts.end(); ++it)
-	{
-	  
-	 /*Compute phrase translation prob.
-	   Print out scores in this format:
-	   Phrase trnaslation prob P(F|E)
-	   Phrase translation prob P(E|F)
-	   Lexical weighting prob lex(F|E)
-	   Lexical weighting prob lex(E|F)
-	 */      
-	  
-	  float pEF_ = it->second.counts.value(kCFE) / it->second.counts.value(kCF);
-	  float pFE_ = it->second.counts.value(kCFE) / it->second.counts.value(kCE);
-
-	  map <WordID, pair<int, float> > foreign_aligned;
-	  map <WordID, pair<int, float> > english_aligned;
-
-	  //Loop over all the alignment points to compute lexical translation probability
-	  al = it->second.aligns;	  
-	  for(ita = al.begin(); ita != al.end(); ++ita)
-	    {
-	     
-	      if (DEBUG)
-		{
-		  cerr << "\nA:" << ita->first << "," << ita->second << "::";
-		  cerr <<  TD::Convert(cur_key[ita->first + 2]) << "-" << TD::Convert(it->first[ita->second]);
-		}
-
-
-	      //Lookup this alignment probability in the table
-	      int temp = table.word_translation[pair<WordID,WordID> (cur_key[ita->first+2],it->first[ita->second])];
-	      float f2e=0, e2f=0;
-	      if ( table.total_foreign[cur_key[ita->first+2]] != 0)
-		f2e = (float) temp / table.total_foreign[cur_key[ita->first+2]];
-	      if ( table.total_english[it->first[ita->second]] !=0 )
-		e2f = (float) temp / table.total_english[it->first[ita->second]];
-	      if (DEBUG) printf (" %d %E %E\n", temp, f2e, e2f);
-	      
-	      
-	      //local counts to keep track of which things haven't been aligned, to later compute their null alignment	      
-	      if (foreign_aligned.count(cur_key[ita->first+2]))
-		{
-		  foreign_aligned[ cur_key[ita->first+2] ].first++;
-		  foreign_aligned[ cur_key[ita->first+2] ].second += e2f;
-		}
-	      else
-		foreign_aligned [ cur_key[ita->first+2] ] = pair<int,float> (1,e2f);
-		
-	      
-
-	      if (english_aligned.count( it->first[ ita->second] ))
-		{
-		  english_aligned[ it->first[ ita->second ]].first++;
-		  english_aligned[  it->first[ ita->second] ].second += f2e;
-		}
-	      else
-		english_aligned [ it->first[ ita->second] ] = pair<int,float> (1,f2e);
-		
-	      
-	    
-	   	      
-	    }
-
-	  float final_lex_f2e=1, final_lex_e2f=1;
-	  static const WordID NULL_ = TD::Convert("NULL");
-
-	  //compute lexical weight P(F|E) and include unaligned foreign words
-	   for(int i=0;i<cur_key.size(); i++)
-	     {
-	       
-	       if (!table.total_foreign.count(cur_key[i])) continue;      //if we dont have it in the translation table, we won't know its lexical weight
-	       
-	       if (foreign_aligned.count(cur_key[i])) 
-		 {
-		   pair<int, float> temp_lex_prob = foreign_aligned[cur_key[i]];
-		   final_lex_e2f *= temp_lex_prob.second / temp_lex_prob.first;
-		 }
-	       else //dealing with null alignment
-		 {
-		   int temp_count = table.word_translation[pair<WordID,WordID> (cur_key[i],NULL_)];
-		   float temp_e2f = (float) temp_count / table.total_english[NULL_];
-		   final_lex_e2f *= temp_e2f;
-		 }	       	       
-
-	     }
-
-	   //compute P(E|F) unaligned english words
-	   for(int j=0; j< it->first.size(); j++)
-	     {
-	       if (!table.total_english.count(it->first[j])) continue;
-	       
-	       if (english_aligned.count(it->first[j]))
-		 {
-		   pair<int, float> temp_lex_prob = english_aligned[it->first[j]];
-		   final_lex_f2e *= temp_lex_prob.second / temp_lex_prob.first;
-		 }
-	       else //dealing with null
-		 {
-		   int temp_count = table.word_translation[pair<WordID,WordID> (NULL_,it->first[j])];
-		   float temp_f2e = (float) temp_count / table.total_foreign[NULL_];
-		   final_lex_f2e *= temp_f2e;
-		 }
-	     }
-	   
-	   
-       scored_grammar << TD::GetString(cur_key);
-       string lhs = TD::Convert(cur_key[0]);
-	   scored_grammar << " " << TD::GetString(it->first) << " |||";
-	   if(lhs.find('_')!=string::npos) {
-	       scored_grammar << " Bkoff=" << safenlog(3.0f);
-	   } else {
-	       scored_grammar << " FGivenE=" << safenlog(pFE_) << " EGivenF=" << safenlog(pEF_);
-	       scored_grammar << " LexE2F=" << safenlog(final_lex_e2f) << " LexF2E=" << safenlog(final_lex_f2e);
-	   }  
-	   scored_grammar << endl;
-	}  
-    }
-}
-
diff --git a/extools/sentence_pair.cc b/extools/sentence_pair.cc
deleted file mode 100644
index 7d60715a..00000000
--- a/extools/sentence_pair.cc
+++ /dev/null
@@ -1,198 +0,0 @@
-#include "sentence_pair.h"
-
-#include <queue>
-#include <iostream>
-#include <string>
-#include <vector>
-#include <utility>
-#include <set>
-#include <boost/tuple/tuple_comparison.hpp>
-
-#include "tdict.h"
-#include "wordid.h"
-#include "array2d.h"
-
-using namespace std;
-using namespace boost;
-
-namespace {
-  inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
-
-  inline void SkipWhitespace(const char* buf, int* ptr) {
-    while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); }
-  }
-}
-
-void AnnotatedParallelSentence::Reset() {
-  f.clear();
-  e.clear();
-  e_aligned.clear();
-  f_aligned.clear();
-  aligns_by_fword.clear();
-  aligned.clear();
-  span_types.clear();
-}
-
-void AnnotatedParallelSentence::AllocateForAlignment() {
-  f_len = f.size();
-  e_len = e.size();
-  aligned.resize(f_len, e_len, false);
-  f_aligned.resize(f_len, 0);
-  e_aligned.resize(e_len, 0);
-  aligns_by_fword.resize(f_len);
-}
-
-// read an alignment point of the form X-Y where X and Y are strings
-// of digits. if permit_col is true, the right edge will be determined
-// by the presence of a colon
-int AnnotatedParallelSentence::ReadAlignmentPoint(const char* buf,
-                                                  const int start,
-                                                  const int end,
-                                                  const bool permit_col,
-                                                  short* a, short* b, short* c, short* d) {
-  if (end - start < 3) {
-    cerr << "Alignment point badly formed 1: " << string(buf, start, end-start) << endl << buf << endl;
-    exit(1);
-  }
-  int ch = start;
-  *a = 0;
-  while(ch < end && buf[ch] != '-') {
-    if (buf[ch] < '0' || buf[ch] > '9') {
-      cerr << "Alignment point badly formed 2: " << string(buf, start, end-start) << endl << buf << endl;
-      exit(1);
-    }
-    (*a) *= 10;
-    (*a) += buf[ch] - '0';
-    ++ch;
-  }
-  ++ch;
-  if (ch >= end) {
-    cerr << "Alignment point badly formed 3: " << string(buf, start, end-start) << endl << buf << endl;
-    exit(1);
-  }
-  (*b) = 0;
-  while((ch < end) && (c == 0 && (!permit_col || (permit_col && buf[ch] != ':')) || c != 0 && buf[ch] != '-')) {
-    if ((buf[ch] < '0') || (buf[ch] > '9')) {
-      cerr << "Alignment point badly formed 4: " << string(buf, start, end-start) << endl << buf << endl << buf[ch] << endl;
-      exit(1);
-    }
-    (*b) *= 10;
-    (*b) += buf[ch] - '0';
-    ++ch;
-  }
-  if (c != 0)
-  {
-      ++ch;
-      if (ch >= end) {
-        cerr << "Alignment point badly formed 5: " << string(buf, start, end-start) << endl << buf << endl;
-        exit(1);
-      }
-      (*c) = 0;
-      while(ch < end && buf[ch] != '-') {
-        if (buf[ch] < '0' || buf[ch] > '9') {
-          cerr << "Alignment point badly formed 6: " << string(buf, start, end-start) << endl << buf << endl;
-          exit(1);
-        }
-        (*c) *= 10;
-        (*c) += buf[ch] - '0';
-        ++ch;
-      }
-      ++ch;
-      if (ch >= end) {
-        cerr << "Alignment point badly formed 7: " << string(buf, start, end-start) << endl << buf << endl;
-        exit(1);
-      }
-      (*d) = 0;
-      while(ch < end && (!permit_col || (permit_col && buf[ch] != ':'))) {
-        if (buf[ch] < '0' || buf[ch] > '9') {
-          cerr << "Alignment point badly formed 8: " << string(buf, start, end-start) << endl << buf << endl;
-          exit(1);
-        }
-        (*d) *= 10;
-        (*d) += buf[ch] - '0';
-        ++ch;
-      }
-  }
-  return ch;
-}
-
-void AnnotatedParallelSentence::Align(const short a, const short b) {
-  aligned(a,b) = true;
-  ++f_aligned[a];
-  ++e_aligned[b];
-  aligns_by_fword[a].push_back(make_pair(a,b));
-  // cerr << a << " " << b << endl;
-}
-
-void AnnotatedParallelSentence::ParseAlignmentPoint(const char* buf, int start, int end) {
-  short a, b;
-  ReadAlignmentPoint(buf, start, end, false, &a, &b, 0, 0);
-  if (a >= f_len || b >= e_len) {
-    cerr << "(" << a << ',' << b << ") is out of bounds. INPUT=\n" << buf << endl;
-    exit(1);
-  }
-  Align(a,b);
-}
-
-void AnnotatedParallelSentence::ParseSpanLabel(const char* buf, int start, int end) {
-  short a,b,c,d;
-  int ch = ReadAlignmentPoint(buf, start, end, true, &a, &b, &c, &d) + 1;
-  if (buf[ch-1] != ':' || ch >= end) {
-    cerr << "Span badly formed: " << string(buf, start, end-start) << endl << buf << endl;
-    exit(1);
-  }
-  if (a >= f_len || b > f_len) {
-    cerr << "(" << a << ',' << b << ") is out of bounds in labeled span. INPUT=\n" << buf << endl;
-    exit(1);
-  }
-  if (c >= e_len || d > e_len) {
-    cerr << "(" << c << ',' << d << ") is out of bounds in labeled span. INPUT=\n" << buf << endl;
-    exit(1);
-  }
-  // cerr << a << " " << b << " " << string(buf,c,end-c) << endl;
-  span_types[boost::make_tuple(a,b,c,d)].push_back(-TD::Convert(string(buf, ch, end-ch)));
-}
-
-// INPUT FORMAT
-// ein haus ||| a house ||| 0-0 1-1 ||| 0-0:DT 1-1:NN 0-1:NP
-void AnnotatedParallelSentence::ParseInputLine(const char* buf) {
-  Reset();
-  int ptr = 0;
-  SkipWhitespace(buf, &ptr);
-  int start = ptr;
-  int state = 0;  // 0 = French, 1 = English, 2 = Alignment, 3 = Spans
-  while(char c = buf[ptr]) {
-    if (!IsWhitespace(c)) { ++ptr; continue; } else {
-      if (ptr - start == 3 && buf[start] == '|' && buf[start+1] == '|' && buf[start+2] == '|') {
-        ++state;
-        if (state == 4) { cerr << "Too many fields (ignoring):\n  " << buf << endl; return; }
-        if (state == 2) {
-          // cerr << "FLEN=" << f->size() << " ELEN=" << e->size() << endl;
-          AllocateForAlignment();
-        }
-        SkipWhitespace(buf, &ptr);
-        start = ptr;
-        continue;
-      }
-      switch (state) {
-        case 0:  f.push_back(TD::Convert(string(buf, start, ptr-start))); break;
-        case 1:  e.push_back(TD::Convert(string(buf, start, ptr-start))); break;
-        case 2:  ParseAlignmentPoint(buf, start, ptr); break;
-        case 3:  ParseSpanLabel(buf, start, ptr); break;
-        default: cerr << "Can't happen\n"; abort();
-      }
-      SkipWhitespace(buf, &ptr);
-      start = ptr;
-    }
-  }
-  if (ptr > start) {
-    switch (state) {
-      case 0:  f.push_back(TD::Convert(string(buf, start, ptr-start))); break;
-      case 1:  e.push_back(TD::Convert(string(buf, start, ptr-start))); break;
-      case 2:  ParseAlignmentPoint(buf, start, ptr); break;
-      case 3:  ParseSpanLabel(buf, start, ptr); break;
-      default: cerr << "Can't happen\n"; abort();
-    }
-  }
-}
-
diff --git a/extools/sentence_pair.h b/extools/sentence_pair.h
deleted file mode 100644
index a05275e7..00000000
--- a/extools/sentence_pair.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _SENTENCE_PAIR_H_
-#define _SENTENCE_PAIR_H_
-
-#include <map>
-#include <utility>
-#include <vector>
-#include <boost/tuple/tuple.hpp>
-#include "wordid.h"
-#include "array2d.h"
-
-// represents a parallel sentence with a word alignment and category
-// annotations over subspans (currently in terms of f)
-// you should read one using ParseInputLine and then use the public
-// member variables to query things about it
-struct AnnotatedParallelSentence {
-  // read annotated parallel sentence from string
-  void ParseInputLine(const char* buf);
-
-  std::vector<WordID> f, e;  // words in f and e
-
-  // word alignment information
-  std::vector<int> e_aligned, f_aligned; // counts the number of times column/row x is aligned
-  Array2D<bool> aligned;
-  std::vector<std::vector<std::pair<short, short> > > aligns_by_fword;
-
-  // span type information
-  std::map< boost::tuple<short,short,short,short>, std::vector<WordID> > span_types;
-  // span_types(i,j,k,l) is the list of category span (i,j) in source and (k,l) in the target language.
-
-  int f_len, e_len;
-
-  void Align(const short a, const short b);
-  void AllocateForAlignment();
-
-  static int ReadAlignmentPoint(const char* buf, int start, int end, bool permit_col, short* a, short* b, short* c, short* d);
-
- private:
-  void Reset();
-  void ParseAlignmentPoint(const char* buf, int start, int end);
-  void ParseSpanLabel(const char* buf, int start, int end);
-};
-
-#endif
diff --git a/extools/sg_lexer.l b/extools/sg_lexer.l
deleted file mode 100644
index c85cdea7..00000000
--- a/extools/sg_lexer.l
+++ /dev/null
@@ -1,294 +0,0 @@
-%{
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <cstring>
-#include <cassert>
-#include "tdict.h"
-#include "fdict.h"
-#include "striped_grammar.h"
-
-int lex_line = 0;
-int read_contexts = 0;
-std::istream* sglex_stream = NULL;
-StripedGrammarLexer::GrammarCallback grammar_callback = NULL;
-StripedGrammarLexer::ContextCallback context_callback = NULL;
-void* grammar_callback_extra = NULL;
-void* context_callback_extra = NULL;
-
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) (result = sglex_stream->read(buf, max_size).gcount())
-
-#define YY_SKIP_YYWRAP 1
-int num_rules = 0;
-int yywrap() { return 1; }
-bool fl = true;
-#define MAX_TOKEN_SIZE 255
-std::string sglex_tmp_token(MAX_TOKEN_SIZE, '\0');
-
-#define MAX_RULE_SIZE 48
-WordID sglex_src_rhs[MAX_RULE_SIZE];
-WordID sglex_trg_rhs[MAX_RULE_SIZE];
-int sglex_src_rhs_size;
-int sglex_trg_rhs_size;
-WordID sglex_lhs;
-int sglex_src_arity;
-int sglex_trg_arity;
-
-#define MAX_FEATS 100
-int sglex_feat_ids[MAX_FEATS];
-double sglex_feat_vals[MAX_FEATS];
-int sglex_num_feats;
-
-#define MAX_ARITY 20
-int sglex_nt_sanity[MAX_ARITY];
-int sglex_src_nts[MAX_ARITY];
-float sglex_nt_size_means[MAX_ARITY];
-float sglex_nt_size_vars[MAX_ARITY];
-
-std::vector<WordID> cur_src_rhs;
-std::vector<WordID> cur_trg_rhs;
-ID2RuleStatistics cur_options;
-RuleStatistics* cur_stats = NULL;
-int sglex_cur_fid = 0;
-
-static void sanity_check_trg_index(int index) {
-  if (index > sglex_src_arity) {
-    std::cerr << "Target index " << index << " exceeds source arity " << sglex_src_arity << std::endl;
-    abort();
-  }
-  int& flag = sglex_nt_sanity[index - 1];
-  if (flag) {
-    std::cerr << "Target index " << index << " used multiple times!" << std::endl;
-    abort();
-  }
-  flag = 1;
-}
-
-static void sglex_reset() {
-  sglex_src_arity = 0;
-  sglex_trg_arity = 0;
-  sglex_num_feats = 0;
-  sglex_src_rhs_size = 0;
-  sglex_trg_rhs_size = 0;
-}
-
-%}
-
-REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf
-NT [^\t \[\],]+
-ALIGN [0-9]+-[0-9]+
-
-%x LHS_END SRC TRG FEATS FEATVAL ALIGNS
-%%
-
-<INITIAL>[ ]	;
-<INITIAL>[\t]	{
-		if (read_contexts) {
-			cur_options.clear();
-			BEGIN(TRG);
-		} else {
-			std::cerr << "Unexpected tab while reading striped grammar\n";
-			exit(1);
-		}
-		}
-
-<INITIAL>\[{NT}\]   {
-		if (read_contexts) {
-			sglex_tmp_token.assign(yytext, yyleng);
-			sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token);
-			++sglex_src_rhs_size;
-		} else {
-			sglex_tmp_token.assign(yytext + 1, yyleng - 2);
-			sglex_lhs = -TD::Convert(sglex_tmp_token);
-			// std::cerr << sglex_tmp_token << "\n";
-  			BEGIN(LHS_END);
-			}
-		}
-
-<INITIAL>[^ \t]+ {
-		if (read_contexts) {
-			// std::cerr << "Context: " << yytext << std::endl;
-			sglex_tmp_token.assign(yytext, yyleng);
-			sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token);
-			++sglex_src_rhs_size;
-		} else {
-			std::cerr << "Unexpected input: " << yytext << " when NT expected\n";
-			exit(1);
-		}
-		}
-
-<SRC>\[{NT}\]   {
-		sglex_tmp_token.assign(yytext + 1, yyleng - 2);
-		sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token);
-		++sglex_src_arity;
-		++sglex_src_rhs_size;
-		}
-
-<LHS_END>[ ] { ; }
-<LHS_END>\|\|\|	{
-		sglex_reset();
-		BEGIN(SRC);
-		}
-
-<LHS_END>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
-		exit(1);
-		}
-
-
-<SRC>\[{NT},[1-9][0-9]?\]   {
-		int index = yytext[yyleng - 2] - '0';
-		if (yytext[yyleng - 3] == ',') {
-		  sglex_tmp_token.assign(yytext + 1, yyleng - 4);
-		} else {
-		  sglex_tmp_token.assign(yytext + 1, yyleng - 5);
-		  index += 10 * (yytext[yyleng - 3] - '0');
-		}
-		if ((sglex_src_arity+1) != index) {
-			std::cerr << "Src indices must go in order: expected " << sglex_src_arity << " but got " << index << std::endl;
-			abort();
-		}
-		sglex_src_nts[sglex_src_arity] = sglex_src_rhs[sglex_src_rhs_size] = -TD::Convert(sglex_tmp_token);
-		++sglex_src_rhs_size;
-		++sglex_src_arity;
-		}
-
-<SRC>[^ \t]+	{ 
-		sglex_tmp_token.assign(yytext, yyleng);
-		sglex_src_rhs[sglex_src_rhs_size] = TD::Convert(sglex_tmp_token);
-		++sglex_src_rhs_size;
-		}
-<SRC>[ ]	{ ; }
-<SRC>\t		{
-		//std::cerr << "LHS=" << TD::Convert(-sglex_lhs) << " ";
-		//std::cerr << "  src_size: " << sglex_src_rhs_size << std::endl;
-		//std::cerr << "  src_arity: " << sglex_src_arity << std::endl;
-		cur_options.clear();
-		memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int));
-		sglex_trg_rhs_size = 0;
-		BEGIN(TRG);
-		}
-
-<TRG>\[[1-9][0-9]?\]   {
-		if (read_contexts) {
-			sglex_tmp_token.assign(yytext, yyleng);
-			sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token);
-			++sglex_trg_rhs_size;
-		} else {
-			int index = yytext[yyleng - 2] - '0';
-			if (yyleng == 4) {
-			  index += 10 * (yytext[yyleng - 3] - '0');
-			}
-			++sglex_trg_arity;
-			sanity_check_trg_index(index);
-			sglex_trg_rhs[sglex_trg_rhs_size] = 1 - index;
-			++sglex_trg_rhs_size;
-		}
-}
-
-<TRG>\|\|\|	{
-		//std::cerr << "  trg_size: " << sglex_trg_rhs_size << std::endl;
-		//std::cerr << "  trg_arity: " << sglex_trg_arity << std::endl;
-		assert(sglex_trg_rhs_size > 0);
-		cur_trg_rhs.resize(sglex_trg_rhs_size);
-		for (int i = 0; i < sglex_trg_rhs_size; ++i)
-			cur_trg_rhs[i] = sglex_trg_rhs[i];
-		cur_stats = &cur_options[cur_trg_rhs];
-		BEGIN(FEATS);
-		}
-
-<TRG>[^ ]+	{
-		sglex_tmp_token.assign(yytext, yyleng);
-		sglex_trg_rhs[sglex_trg_rhs_size] = TD::Convert(sglex_tmp_token);
-		
-		++sglex_trg_rhs_size;
-		}
-<TRG>[ ]+	{ ; }
-
-<FEATS>\n	{
-		assert(sglex_src_rhs_size > 0);
-		cur_src_rhs.resize(sglex_src_rhs_size);
-		for (int i = 0; i < sglex_src_rhs_size; ++i)
-			cur_src_rhs[i] = sglex_src_rhs[i];
-		if (read_contexts) {
-			context_callback(cur_src_rhs, cur_options, context_callback_extra);
-		} else {
-			assert(sglex_lhs < 0);
-			grammar_callback(sglex_lhs, cur_src_rhs, cur_options, grammar_callback_extra);
-		}
-		cur_options.clear();
-		sglex_reset();
-		BEGIN(INITIAL);
-		}
-<FEATS>[ ]+	{ ; }
-<FEATS>\|\|\|	{
-		memset(sglex_nt_sanity, 0, sglex_src_arity * sizeof(int));
-		sglex_trg_rhs_size = 0;
-		BEGIN(TRG);
-		}
-<FEATS>[A-Z][A-Z_0-9]*=	{
-		// std::cerr << "FV: " << yytext << std::endl;
-		sglex_tmp_token.assign(yytext, yyleng - 1);
-		sglex_cur_fid = FD::Convert(sglex_tmp_token);
-		static const int Afid = FD::Convert("A");
-		if (sglex_cur_fid == Afid) {
-			BEGIN(ALIGNS);
-		} else {
-			BEGIN(FEATVAL);
-		}
-		}
-<FEATVAL>{REAL}	{
-		// std::cerr << "Feature val input: " << yytext << std::endl;
-		cur_stats->counts.add_value(sglex_cur_fid, strtod(yytext, NULL));
-		BEGIN(FEATS);
-		}
-<FEATVAL>.	{
-		std::cerr << "Feature val unexpected input: " << yytext << std::endl;
-		exit(1);
-		}
-<FEATS>.	{
-		std::cerr << "Features unexpected input: " << yytext << std::endl;
-		exit(1);
-		}
-<ALIGNS>{ALIGN}(,{ALIGN})*	{
-		assert(cur_stats->aligns.empty());
-		int i = 0;
-		while(i < yyleng) {
-			short a = 0;
-			short b = 0;
-			while (yytext[i] != '-') { a *= 10; a += yytext[i] - '0'; ++i; }
-			++i;
-			while (yytext[i] != ',' && i < yyleng) { b *= 10; b += yytext[i] - '0'; ++i; }
-			++i;
-			cur_stats->aligns.push_back(std::make_pair(a,b));
-		}
-		BEGIN(FEATS);
-		}
-<ALIGNS>.	{
-		std::cerr << "Aligns unexpected input: " << yytext << std::endl;
-		exit(1);
-		}
-%%
-
-#include "filelib.h"
-
-void StripedGrammarLexer::ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra) {
-  read_contexts = 0;
-  lex_line = 1;
-  sglex_stream = in;
-  grammar_callback_extra = extra;
-  grammar_callback = func;
-  yylex();
-}
-
-void StripedGrammarLexer::ReadContexts(std::istream* in, ContextCallback func, void* extra) {
-  read_contexts = 1;
-  lex_line = 1;
-  sglex_stream = in;
-  context_callback_extra = extra;
-  context_callback = func;
-  yylex();
-}
-
-
diff --git a/extools/simple-extract-context.sh b/extools/simple-extract-context.sh
deleted file mode 100755
index 17487b1c..00000000
--- a/extools/simple-extract-context.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-MYDIR=$(dirname $0)
-
-export LANG=C
-date 1>&2
-$MYDIR/extractor -i $1 -c 500000 -L 12 -C | sort -t $'\t' -k 1 | $MYDIR/mr_stripe_rule_reduce
-date 1>&2
-
diff --git a/extools/simple-extract.sh b/extools/simple-extract.sh
deleted file mode 100755
index ec5c5276..00000000
--- a/extools/simple-extract.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-export LANG=C
-date
-./extractor -i $1 -d X -c 500000 -L 12 -b | sort -t $'\t' -k 1 | gzip > ex.output.gz
-date
-# -p = compute phrase marginals
-# -b = bidirectional rules (starting with F or E) were extracted
-zcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz
-date
-
diff --git a/extools/striped_grammar.cc b/extools/striped_grammar.cc
deleted file mode 100644
index 785f4bbe..00000000
--- a/extools/striped_grammar.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-#include "striped_grammar.h"
-
-#include <iostream>
-
-#include "sentence_pair.h"
-
-using namespace std;
-
-namespace {
-  inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
-
-  inline void SkipWhitespace(const char* buf, int* ptr) {
-    while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); }
-  }
-}
-
-void RuleStatistics::ParseRuleStatistics(const char* buf, int start, int end) {
-  int ptr = start;
-  counts.clear();
-  aligns.clear();
-  while (ptr < end) {
-    SkipWhitespace(buf, &ptr);
-    int vstart = ptr;
-    while(ptr < end && buf[ptr] != '=') ++ptr;
-    assert(buf[ptr] == '=');
-    assert(ptr > vstart);
-    if (buf[vstart] == 'A' && buf[vstart+1] == '=') {
-      ++ptr;
-      while (ptr < end && !IsWhitespace(buf[ptr])) {
-        while(ptr < end && buf[ptr] == ',') { ++ptr; }
-        assert(ptr < end);
-        vstart = ptr;
-        while(ptr < end && buf[ptr] != ',' && !IsWhitespace(buf[ptr])) { ++ptr; }
-        if (ptr > vstart) {
-          short a, b;
-          AnnotatedParallelSentence::ReadAlignmentPoint(buf, vstart, ptr, false, &a, &b, 0, 0);
-          aligns.push_back(make_pair(a,b));
-        }
-      }
-    } else {
-      int name = FD::Convert(string(buf,vstart,ptr-vstart));
-      ++ptr;
-      vstart = ptr;
-      while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; }
-      assert(ptr > vstart);
-      counts.set_value(name, strtod(buf + vstart, NULL));
-    }
-  }
-}
-
-ostream& operator<<(ostream& os, const RuleStatistics& s) {
-  bool needspace = false;
-  for (SparseVector<float>::const_iterator it = s.counts.begin(); it != s.counts.end(); ++it) {
-    if (needspace) os << ' '; else needspace = true;
-    os << FD::Convert(it->first) << '=' << it->second;
-  }
-  if (s.aligns.size() > 0) {
-    os << " A=";
-    needspace = false;
-    for (int i = 0; i < s.aligns.size(); ++i) {
-      if (needspace) os << ','; else needspace = true;
-      os << s.aligns[i].first << '-' << s.aligns[i].second;
-    }
-  }
-  return os;
-}
-
diff --git a/extools/striped_grammar.h b/extools/striped_grammar.h
deleted file mode 100644
index bf3aec7d..00000000
--- a/extools/striped_grammar.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef _STRIPED_GRAMMAR_H_
-#define _STRIPED_GRAMMAR_H_
-
-#include <iostream>
-#include <boost/functional/hash.hpp>
-#include <vector>
-#include <tr1/unordered_map>
-#include "sparse_vector.h"
-#include "wordid.h"
-#include "tdict.h"
-
-// represents statistics / information about a rule pair
-struct RuleStatistics {
-  SparseVector<float> counts;
-  std::vector<std::pair<short,short> > aligns;
-  RuleStatistics() {}
-  RuleStatistics(int name, float val, const std::vector<std::pair<short,short> >& al) :
-      aligns(al) {
-    counts.set_value(name, val);
-  }
-  void ParseRuleStatistics(const char* buf, int start, int end);
-  RuleStatistics& operator+=(const RuleStatistics& rhs) {
-    counts += rhs.counts;
-    return *this;
-  }
-};
-std::ostream& operator<<(std::ostream& os, const RuleStatistics& s);
-
-inline void WriteNamed(const std::vector<WordID>& v, std::ostream* os) {
-  bool first = true;
-  for (int i = 0; i < v.size(); ++i) {
-    if (first) { first = false; } else { (*os) << ' '; }
-    if (v[i] < 0) { (*os) << '[' << TD::Convert(-v[i]) << ']'; }
-    else (*os) << TD::Convert(v[i]);
-  }
-}
-
-inline void WriteAnonymous(const std::vector<WordID>& v, std::ostream* os) {
-  bool first = true;
-  for (int i = 0; i < v.size(); ++i) {
-    if (first) { first = false; } else { (*os) << ' '; }
-    if (v[i] <= 0) { (*os) << '[' << (1-v[i]) << ']'; }
-    else (*os) << TD::Convert(v[i]);
-  }
-}
-
-typedef std::tr1::unordered_map<std::vector<WordID>, RuleStatistics, boost::hash<std::vector<WordID> > > ID2RuleStatistics;
-
-struct StripedGrammarLexer {
-  typedef void (*GrammarCallback)(WordID lhs, const std::vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void *extra);
-  static void ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra);
-  typedef void (*ContextCallback)(const std::vector<WordID>& phrase, const ID2RuleStatistics& rules, void *extra);
-  static void ReadContexts(std::istream* in, ContextCallback func, void* extra);
-};
-
-#endif
diff --git a/extools/suffix_tree.h b/extools/suffix_tree.h
deleted file mode 100644
index f62f53f4..00000000
--- a/extools/suffix_tree.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * suffix_tree.h
- *
- *  Created on: May 17, 2010
- *      Author: Vlad
-
-NOTE (graehl): this seems to be a (forward) trie of the suffixes (of sentences).
-so O(m*n^2) for m sentences of length n.
-
-For a real suffix tree (linear size/time), see:
-http://en.wikipedia.org/wiki/Suffix_tree
-http://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
-
- */
-
-#ifndef SUFFIX_TREE_H_
-#define SUFFIX_TREE_H_
-
-#include <string>
-#include <map>
-#include <vector>
-
-template <class T>
-class Node {
-	public:
-		std::map<T, Node> edge_list_;
-		int InsertPath(const std::vector<T>& p, int start, int end);
-		const Node* Extend(const T& e) const {
-			typename std::map<T, Node>::const_iterator it = edge_list_.find(e);
-			if (it == edge_list_.end()) return NULL;
-			return &it->second;
-		}
-};
-
-bool DEBUG = false;
-
-template <class T>
-int Node<T>::InsertPath(const std::vector<T>& p, int start, int end){
-	Node* currNode = this;
-	for(int i=start;i<= end; i++ ) {
-		currNode = &(currNode->edge_list_)[p[i]];
-	}
-	return 1;
-}
-
-#endif /* SUFFIX_TRIE_H_ */
diff --git a/extools/test_data/README b/extools/test_data/README
deleted file mode 100644
index e368cffc..00000000
--- a/extools/test_data/README
+++ /dev/null
@@ -1,10 +0,0 @@
-The following was used to create the test data.  The real inputs
-were corpus.fr, corpus.en, and corpus.aligned.  The generated files
-were corpus.len_cats and fr-en.al.len.
-
-
-  ./make_len_cats.pl corpus.en > corpus.len_cats
-
-  ../merge_lines.pl corpus.fr corpus.en corpus.aligned corpus.len_cats > fr-en.al.len
-
-
diff --git a/extools/test_data/corpus.aligned b/extools/test_data/corpus.aligned
deleted file mode 100644
index aa09e9ab..00000000
--- a/extools/test_data/corpus.aligned
+++ /dev/null
@@ -1,5 +0,0 @@
-0-0 1-2 2-1
-0-0 1-1
-0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3
-0-0
-0-0 1-1
diff --git a/extools/test_data/corpus.en b/extools/test_data/corpus.en
deleted file mode 100644
index 2d4751bf..00000000
--- a/extools/test_data/corpus.en
+++ /dev/null
@@ -1,5 +0,0 @@
-the blue house
-the hat
-there is a hat
-cap
-the cat
diff --git a/extools/test_data/corpus.fr b/extools/test_data/corpus.fr
deleted file mode 100644
index 75b5e127..00000000
--- a/extools/test_data/corpus.fr
+++ /dev/null
@@ -1,5 +0,0 @@
-la maison bleue
-le chapeau
-il y a un chapeau
-chapeau
-le chat
diff --git a/extools/test_data/corpus.len_cats b/extools/test_data/corpus.len_cats
deleted file mode 100644
index 18d321de..00000000
--- a/extools/test_data/corpus.len_cats
+++ /dev/null
@@ -1,5 +0,0 @@
-0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT
-0-1:SHORT 0-2:SHORT 1-2:SHORT
-0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT
-0-1:SHORT
-0-1:SHORT 0-2:SHORT 1-2:SHORT
diff --git a/extools/test_data/fr-en.al.len b/extools/test_data/fr-en.al.len
deleted file mode 100644
index 7ee6b85d..00000000
--- a/extools/test_data/fr-en.al.len
+++ /dev/null
@@ -1,5 +0,0 @@
-la maison bleue ||| the blue house ||| 0-0 1-2 2-1 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT
-le chapeau ||| the hat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT
-il y a un chapeau ||| there is a hat ||| 0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT
-chapeau ||| cap ||| 0-0 ||| 0-1:SHORT
-le chat ||| the cat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT
diff --git a/extools/test_data/make_len_cats.pl b/extools/test_data/make_len_cats.pl
deleted file mode 100755
index 25ef75fa..00000000
--- a/extools/test_data/make_len_cats.pl
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $max_len = 15;
-my @cat_names = qw( NULL SHORT SHORT MID MID MID LONG LONG LONG LONG LONG VLONG VLONG VLONG VLONG VLONG );
-
-while(<>) {
-  chomp;
-  my @words = split /\s+/;
-  my $len = scalar @words;
-  my @spans;
-  for (my $i =0; $i < $len; $i++) {
-    for (my $k = 1; $k <= $max_len; $k++) {
-      my $j = $i + $k;
-      next if ($j > $len);
-      my $cat = $cat_names[$k];
-      die unless $cat;
-      push @spans, "$i-$j:$cat";
-    }
-  }
-  print "@spans\n";
-}
-