clean up names of feature functions, fix tagger, fix tests

author: Chris Dyer <cdyer@cs.cmu.edu> 2010-12-22 08:49:18 -0600
committer: Chris Dyer <cdyer@cs.cmu.edu> 2010-12-22 08:49:18 -0600
commit: 86805dcb8aaaa716fdc73725ad41e411be53f6a6 (patch)
tree: 4d3cf9d479388d65447c0bd25b012fc9f247e360
parent: 491848839c0340f6c629ebae7ed6b6dc1a3842ad (diff)
19 files changed, 142 insertions, 163 deletions
diff --git a/.gitignore b/.gitignore
index c2c20c55..3892891c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+klm/lm/build_binary
 extools/extractor_monolingual
 gi/posterior-regularisation/prjava/lib/*.jar
 klm/lm/libklm.a
diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc
index 9504419c..aecf1cfa 100644
--- a/decoder/bottom_up_parser.cc
+++ b/decoder/bottom_up_parser.cc
@@ -14,20 +14,6 @@
 
 using namespace std;
 
-struct ParserStats {
-  ParserStats() : active_items(), passive_items() {}
-  void Reset() { active_items=0; passive_items=0; }
-  void Report() {
-    if (!SILENT) cerr << "  ACTIVE ITEMS: " << active_items << "\tPASSIVE ITEMS: " << passive_items << endl;
-  }
-  int active_items;
-  int passive_items;
-  void NotifyActive(int , int ) { ++active_items; }
-  void NotifyPassive(int , int ) { ++passive_items; }
-};
-
-ParserStats stats;
-
 class ActiveChart;
 class PassiveChart {
  public:
@@ -90,7 +76,6 @@ class ActiveChart {
     void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const {
       const GrammarIter* ni = gptr_->Extend(symbol);
       if (ni) {
-        stats.NotifyActive(-1,-1);  // TRACKING STATS
         out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost));
       }
     }
@@ -98,7 +83,6 @@ class ActiveChart {
       int symbol = hg->nodes_[node_index].cat_;
       const GrammarIter* ni = gptr_->Extend(symbol);
       if (!ni) return;
-      stats.NotifyActive(-1,-1);  // TRACKING STATS
       Hypergraph::TailNodeVector na(ant_nodes_.size() + 1);
       for (int i = 0; i < ant_nodes_.size(); ++i)
         na[i] = ant_nodes_[i];
@@ -181,7 +165,6 @@ void PassiveChart::ApplyRule(const int i,
                              const TRulePtr& r,
                              const Hypergraph::TailNodeVector& ant_nodes,
                              const float lattice_cost) {
-  stats.NotifyPassive(i,j);  // TRACKING STATS
   Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes);
   new_edge->prev_i_ = r->prev_i;
   new_edge->prev_j_ = r->prev_j;
@@ -299,9 +282,7 @@ ExhaustiveBottomUpParser::ExhaustiveBottomUpParser(
 
 bool ExhaustiveBottomUpParser::Parse(const Lattice& input,
                                      Hypergraph* forest) const {
-  stats.Reset();
   PassiveChart chart(goal_sym_, grammars_, input, forest);
   const bool result = chart.Parse();
-  stats.Report();
   return result;
 }
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 729d1214..75591af8 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -53,15 +53,15 @@ void register_feature_functions() {
   ff_registry.Register("LexNullJump", new FFFactory<LexNullJump>);
   ff_registry.Register("NewJump", new FFFactory<NewJump>);
   ff_registry.Register("SourceBigram", new FFFactory<SourceBigram>);
+  ff_registry.Register("Fertility", new FFFactory<Fertility>);
   ff_registry.Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>);
-  ff_registry.Register("AlignerResults", new FFFactory<AlignerResults>);
   ff_registry.Register("CSplit_BasicFeatures", new FFFactory<BasicCSplitFeatures>);
   ff_registry.Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>);
-  ff_registry.Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>);
-  ff_registry.Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>);
-  ff_registry.Register("OutputIdentity", new FFFactory<OutputIdentity>);
+  ff_registry.Register("Tagger_BigramIndicator", new FFFactory<Tagger_BigramIndicator>);
+  ff_registry.Register("LexicalPairIndicator", new FFFactory<LexicalPairIndicator>);
+  ff_registry.Register("OutputIndicator", new FFFactory<OutputIndicator>);
   ff_registry.Register("IdentityCycleDetector", new FFFactory<IdentityCycleDetector>);
-  ff_registry.Register("InputIdentity", new FFFactory<InputIdentity>);
+  ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);
   ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index f267f8e8..1485009b 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -208,9 +208,9 @@ void ReverseCharLMCSplitFeature::TraversalFeaturesImpl(
   if (edge.rule_->EWords() != 1) return;
   const double lpp = pimpl_->LeftPhonotacticProb(smeta.GetSourceLattice(), edge.i_);
   features->set_value(fid_, lpp);
-#if 0
   WordID neighbor_word = 0;
   const WordID word = edge.rule_->e_[1];
+#if 0
   if (chars > 4 && (sword[0] == 's' || sword[0] == 'n')) {
     neighbor_word = TD::Convert(string(&sword[1]));
   }
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc
index 21d0f812..46c85cf3 100644
--- a/decoder/ff_tagger.cc
+++ b/decoder/ff_tagger.cc
@@ -8,10 +8,10 @@
 
 using namespace std;
 
-Tagger_BigramIdentity::Tagger_BigramIdentity(const std::string& param) :
+Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) :
   FeatureFunction(sizeof(WordID)) {}
 
-void Tagger_BigramIdentity::FireFeature(const WordID& left,
+void Tagger_BigramIndicator::FireFeature(const WordID& left,
                                  const WordID& right,
                                  SparseVector<double>* features) const {
   int& fid = fmap_[left][right];
@@ -30,7 +30,7 @@ void Tagger_BigramIdentity::FireFeature(const WordID& left,
   features->set_value(fid, 1.0);
 }
 
-void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void Tagger_BigramIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
@@ -53,18 +53,18 @@ void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   }
 }
 
-void LexicalPairIdentity::PrepareForInput(const SentenceMetadata& smeta) {
+void LexicalPairIndicator::PrepareForInput(const SentenceMetadata& smeta) {
   lexmap_->PrepareForInput(smeta);
 }
 
-LexicalPairIdentity::LexicalPairIdentity(const std::string& param) {
+LexicalPairIndicator::LexicalPairIndicator(const std::string& param) {
   name_ = "Id";
   if (param.size()) {
     // name corpus.f emap.txt
     vector<string> params;
     SplitOnWhitespace(param, &params);
     if (params.size() != 3) {
-      cerr << "LexicalPairIdentity takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n";
+      cerr << "LexicalPairIndicator takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n";
       cerr << " * may be used for corpus.src.txt or trgmap.txt to use surface forms\n";
       cerr << " Received: " << param << endl;
       abort();
@@ -76,7 +76,7 @@ LexicalPairIdentity::LexicalPairIdentity(const std::string& param) {
   }
 }
 
-void LexicalPairIdentity::FireFeature(WordID src,
+void LexicalPairIndicator::FireFeature(WordID src,
                                       WordID trg,
                                       SparseVector<double>* features) const {
   int& fid = fmap_[src][trg];
@@ -88,7 +88,7 @@ void LexicalPairIdentity::FireFeature(WordID src,
   features->set_value(fid, 1.0);
 }
 
-void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void LexicalPairIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
@@ -105,9 +105,9 @@ void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   }
 }
 
-OutputIdentity::OutputIdentity(const std::string& param) {}
+OutputIndicator::OutputIndicator(const std::string& param) {}
 
-void OutputIdentity::FireFeature(WordID trg,
+void OutputIndicator::FireFeature(WordID trg,
                                  SparseVector<double>* features) const {
   int& fid = fmap_[trg];
   if (!fid) {
@@ -125,7 +125,7 @@ void OutputIdentity::FireFeature(WordID trg,
   features->set_value(fid, 1.0);
 }
 
-void OutputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void OutputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h
index 6adee5ab..3066866a 100644
--- a/decoder/ff_tagger.h
+++ b/decoder/ff_tagger.h
@@ -13,9 +13,9 @@ typedef std::map<WordID, Class2FID> Class2Class2FID;
 // the sequence unfolds from left to right, which means it doesn't
 // have to split states based on left context.
 // fires unigram features as well
-class Tagger_BigramIdentity : public FeatureFunction {
+class Tagger_BigramIndicator : public FeatureFunction {
  public:
-  Tagger_BigramIdentity(const std::string& param);
+  Tagger_BigramIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
@@ -32,9 +32,9 @@ class Tagger_BigramIdentity : public FeatureFunction {
 
 // for each pair of symbols cooccuring in a lexicalized rule, fire
 // a feature (mostly used for tagging, but could be used for any model)
-class LexicalPairIdentity : public FeatureFunction {
+class LexicalPairIndicator : public FeatureFunction {
  public:
-  LexicalPairIdentity(const std::string& param);
+  LexicalPairIndicator(const std::string& param);
   virtual void PrepareForInput(const SentenceMetadata& smeta);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
@@ -53,9 +53,9 @@ class LexicalPairIdentity : public FeatureFunction {
 };
 
 
-class OutputIdentity : public FeatureFunction {
+class OutputIndicator : public FeatureFunction {
  public:
-  OutputIdentity(const std::string& param);
+  OutputIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index ef3310b4..cdb8662a 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -6,6 +6,7 @@
 #include <sstream>
 #include <string>
 #include <cmath>
+#include <bitset>
 #include <tr1/unordered_map>
 
 #include <boost/tuple/tuple.hpp>
@@ -443,58 +444,6 @@ void LexicalTranslationTrigger::TraversalFeaturesImpl(const SentenceMetadata& sm
   }
 }
 
-// state: src word used, number of trg words generated
-AlignerResults::AlignerResults(const std::string& param) :
-    cur_sent_(-1),
-    cur_grid_(NULL) {
-  vector<string> argv;
-  int argc = SplitOnWhitespace(param, &argv);
-  if (argc != 2) {
-    cerr << "Required format: AlignerResults [FeatureName] [file.pharaoh]\n";
-    exit(1);
-  }
-  cerr << "  feature: " << argv[0] << "\talignments: " << argv[1] << endl;
-  fid_ = FD::Convert(argv[0]);
-  ReadFile rf(argv[1]);
-  istream& in = *rf.stream(); int lc = 0;
-  while(in) {
-    string line;
-    getline(in, line);
-    if (!in) break;
-    ++lc;
-    is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line));
-  }
-  cerr << "  Loaded " << lc << " refs\n";
-}
-
-void AlignerResults::TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                           const Hypergraph::Edge& edge,
-                                           const vector<const void*>& /* ant_states */,
-                                           SparseVector<double>* features,
-                                           SparseVector<double>* /* estimated_features */,
-                                           void* /* state */) const {
-  if (edge.i_ == -1 || edge.prev_i_ == -1)
-    return;
-
-  if (cur_sent_ != smeta.GetSentenceID()) {
-    assert(smeta.HasReference());
-    cur_sent_ = smeta.GetSentenceID();
-    assert(cur_sent_ < is_aligned_.size());
-    cur_grid_ = is_aligned_[cur_sent_].get();
-  }
-
-  //cerr << edge.rule_->AsString() << endl;
-
-  int j = edge.i_;        // source side (f)
-  int i = edge.prev_i_;   // target side (e)
-  if (j < cur_grid_->height() && i < cur_grid_->width() && (*cur_grid_)(i, j)) {
-//    if (edge.rule_->e_[0] == smeta.GetReference()[i][0].label) {
-      features->set_value(fid_, 1.0);
-//      cerr << edge.rule_->AsString() << "   (" << i << "," << j << ")\n";
-//    }
-  }
-}
-
 BlunsomSynchronousParseHack::BlunsomSynchronousParseHack(const string& param) :
   FeatureFunction((100 / 8) + 1), fid_(FD::Convert("NotRef")), cur_sent_(-1) {
   ReadFile rf(param);
@@ -618,10 +567,10 @@ void IdentityCycleDetector::TraversalFeaturesImpl(const SentenceMetadata& smeta,
 }
 
 
-InputIdentity::InputIdentity(const std::string& param) {}
+InputIndicator::InputIndicator(const std::string& param) {}
 
-void InputIdentity::FireFeature(WordID src,
-                                SparseVector<double>* features) const {
+void InputIndicator::FireFeature(WordID src,
+                                 SparseVector<double>* features) const {
   int& fid = fmap_[src];
   if (!fid) {
     static map<WordID, WordID> escape;
@@ -638,7 +587,7 @@ void InputIdentity::FireFeature(WordID src,
   features->set_value(fid, 1.0);
 }
 
-void InputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+void InputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
                                      const std::vector<const void*>& ant_contexts,
                                      SparseVector<double>* features,
@@ -770,3 +719,40 @@ void WordPairFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   }
 }
 
+struct PathFertility {
+  unsigned char null_fertility;
+  unsigned char index_fertility[255];
+  PathFertility& operator+=(const PathFertility& rhs) {
+    null_fertility += rhs.null_fertility;
+    for (int i = 0; i < 255; ++i)
+      index_fertility[i] += rhs.index_fertility[i];
+    return *this;
+  }
+};
+
+Fertility::Fertility(const string& config) :
+    FeatureFunction(sizeof(PathFertility)) {}
+
+void Fertility::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                      const Hypergraph::Edge& edge,
+                                      const std::vector<const void*>& ant_contexts,
+                                      SparseVector<double>* features,
+                                      SparseVector<double>* estimated_features,
+                                      void* context) const {
+  PathFertility& out_fert = *static_cast<PathFertility*>(context);
+  if (edge.Arity() == 0) {
+    if (edge.i_ < 0) {
+      out_fert.null_fertility = 1;
+    } else {
+      out_fert.index_fertility[edge.i_] = 1;
+    }
+  } else if (edge.Arity() == 2) {
+    const PathFertility left = *static_cast<const PathFertility*>(ant_contexts[0]);
+    const PathFertility right = *static_cast<const PathFertility*>(ant_contexts[1]);
+    out_fert += left;
+    out_fert += right;
+  } else if (edge.Arity() == 1) {
+    out_fert += *static_cast<const PathFertility*>(ant_contexts[0]);
+  }
+}
+
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index 8035000e..d7a2dda8 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -124,23 +124,6 @@ class LexicalTranslationTrigger : public FeatureFunction {
   std::vector<std::vector<WordID> > triggers_;
 };
 
-class AlignerResults : public FeatureFunction {
- public:
-  AlignerResults(const std::string& param);
- protected:
-  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-                                     const Hypergraph::Edge& edge,
-                                     const std::vector<const void*>& ant_contexts,
-                                     SparseVector<double>* features,
-                                     SparseVector<double>* estimated_features,
-                                     void* out_context) const;
- private:
-  int fid_;
-  std::vector<boost::shared_ptr<Array2D<bool> > > is_aligned_;
-  mutable int cur_sent_;
-  const Array2D<bool> mutable* cur_grid_;
-};
-
 #include <tr1/unordered_map>
 #include <boost/functional/hash.hpp>
 #include <cassert>
@@ -254,9 +237,9 @@ class IdentityCycleDetector : public FeatureFunction {
   mutable std::map<WordID, bool> big_enough_;
 };
 
-class InputIdentity : public FeatureFunction {
+class InputIndicator : public FeatureFunction {
  public:
-  InputIdentity(const std::string& param);
+  InputIndicator(const std::string& param);
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
@@ -270,4 +253,18 @@ class InputIdentity : public FeatureFunction {
   mutable Class2FID fmap_;
 };
 
+class Fertility : public FeatureFunction {
+ public:
+  Fertility(const std::string& param);
+ protected:
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const Hypergraph::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* context) const;
+ private:
+  mutable std::map<WordID, int> fids_;
+};
+
 #endif
diff --git a/decoder/tagger.cc b/decoder/tagger.cc
index 4dded35f..54890e85 100644
--- a/decoder/tagger.cc
+++ b/decoder/tagger.cc
@@ -96,7 +96,7 @@ bool Tagger::TranslateImpl(const string& input,
                        SentenceMetadata* smeta,
                        const vector<double>& weights,
                        Hypergraph* forest) {
-  Lattice lattice;
+  Lattice& lattice = smeta->src_lattice_;
   LatticeTools::ConvertTextToLattice(input, &lattice);
   smeta->SetSourceLength(lattice.size());
   vector<WordID> sequence(lattice.size());
diff --git a/tests/system_tests/controlled_synparse/gold.statistics b/tests/system_tests/controlled_synparse/gold.statistics
index 0b8453f8..4cd93617 100644
--- a/tests/system_tests/controlled_synparse/gold.statistics
+++ b/tests/system_tests/controlled_synparse/gold.statistics
@@ -1,12 +1,6 @@
 -lm_nodes 11
 -lm_edges 18
 -lm_paths 18
-constr_nodes 8
-constr_edges 8
-constr_paths 1
 -lm_nodes 11
 -lm_edges 18
 -lm_paths 18
-constr_nodes 12
-constr_edges 14
-constr_paths 3
diff --git a/tests/system_tests/controlled_synparse/input.txt b/tests/system_tests/controlled_synparse/input.txt
index 2dbc09c8..4b2b2589 100644
--- a/tests/system_tests/controlled_synparse/input.txt
+++ b/tests/system_tests/controlled_synparse/input.txt
@@ -1,2 +1,2 @@
-A B C D ||| a d c b
-A B C D ||| a b c d
+A B C D
+A B C D
diff --git a/tests/system_tests/tagger/cdec.ini b/tests/system_tests/tagger/cdec.ini
index 15cc930d..e11e6878 100644
--- a/tests/system_tests/tagger/cdec.ini
+++ b/tests/system_tests/tagger/cdec.ini
@@ -1,5 +1,5 @@
 formalism=tagger
-feature_function=Tagger_BigramIdentity
-feature_function=LexicalPairIdentity
+feature_function=Tagger_BigramIndicator
+feature_function=LexicalPairIndicator
 intersection_strategy=full
-tagger_tagset=tagset
-\ No newline at end of file
+tagger_tagset=tagset
diff --git a/tests/system_tests/tagger/gold.statistics b/tests/system_tests/tagger/gold.statistics
new file mode 100644
index 00000000..a86584aa
--- /dev/null
+++ b/tests/system_tests/tagger/gold.statistics
@@ -0,0 +1,12 @@
+-lm_nodes 6
+-lm_edges 12
+-lm_paths 27
++lm_nodes 16
++lm_edges 30
++lm_paths 27
+-lm_nodes 2
+-lm_edges 4
+-lm_paths 3
++lm_nodes 4
++lm_edges 6
++lm_paths 3
diff --git a/tests/system_tests/tagger/gold.stdout b/tests/system_tests/tagger/gold.stdout
new file mode 100644
index 00000000..0dc9a4ec
--- /dev/null
+++ b/tests/system_tests/tagger/gold.stdout
@@ -0,0 +1,2 @@
+c c c
+c
diff --git a/tests/system_tests/tagger/input.txt b/tests/system_tests/tagger/input.txt
index fe5cb58d..b6cfc776 100644
--- a/tests/system_tests/tagger/input.txt
+++ b/tests/system_tests/tagger/input.txt
@@ -1,2 +1,2 @@
 A B C
-A
-\ No newline at end of file
+A
diff --git a/tests/system_tests/tagger/weights b/tests/system_tests/tagger/weights
index 5a035c26..df9adc7b 100644
--- a/tests/system_tests/tagger/weights
+++ b/tests/system_tests/tagger/weights
@@ -1,27 +1,27 @@
-Uni:a 0
-Id:A:a 0
-Uni:b 0
-Id:A:b 0
-Uni:c 0
-Id:A:c 0
-Id:B:a 0
-Id:B:b 0
-Id:B:c 0
-Bi:BOS_a 0
-Bi:a_a 0
-Bi:BOS_b 0
-Bi:b_a 0
-Bi:BOS_c 0
-Bi:c_a 0
-Bi:a_b 0
-Bi:b_b 0
-Bi:c_b 0
-Bi:a_c 0
-Bi:b_c 0
-Bi:c_c 0
-Id:C:a 0
-Id:C:b 0
-Id:C:c 0
-Bi:a_EOS 0
-Bi:b_EOS 0
-Bi:c_EOS 0
+Uni:a 0.1
+Id:A:a 0.2
+Uni:b 0.321
+Id:A:b -0.2
+Uni:c 0.54
+Id:A:c 0.7
+Id:B:a 0.13
+Id:B:b 0.14
+Id:B:c 0.15
+Bi:BOS_a 0.16
+Bi:a_a 0.27
+Bi:BOS_b 0.18
+Bi:b_a 0.19
+Bi:BOS_c 0.22
+Bi:c_a 0.23
+Bi:a_b 0.24
+Bi:b_b 0.25
+Bi:c_b 0.26
+Bi:a_c 0.27
+Bi:b_c 0.28
+Bi:c_c 0.29
+Id:C:a 0.41
+Id:C:b 0.42
+Id:C:c 0.43
+Bi:a_EOS 0.44
+Bi:b_EOS 0.45
+Bi:c_EOS 0.98
diff --git a/tests/system_tests/unsup-align/cdec.ini b/tests/system_tests/unsup-align/cdec.ini
index 885338a6..f591bcbe 100644
--- a/tests/system_tests/unsup-align/cdec.ini
+++ b/tests/system_tests/unsup-align/cdec.ini
@@ -3,4 +3,4 @@ grammar=unsup-align.lex-grammar
 intersection_strategy=full
 formalism=lextrans
 feature_function=RelativeSentencePosition
-feature_function=MarkovJump -b
+feature_function=NewJump
diff --git a/tests/system_tests/unsup-align/weights b/tests/system_tests/unsup-align/weights
index 7d9012c5..535ee67b 100644
--- a/tests/system_tests/unsup-align/weights
+++ b/tests/system_tests/unsup-align/weights
@@ -1,4 +1,9 @@
 RelativeSentencePosition -0.1
+J:R1 0.2
+J:R2 0.1
+J:S0 0
+J:L1 -0.1
+J:L2 -0.2
 MarkovJump -0.2
 F1000001 0.45280036748928199
 F1000002 -0.30603801277140658
diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl
index fc2e7fcb..3a385a88 100755
--- a/word-aligner/aligner.pl
+++ b/word-aligner/aligner.pl
@@ -119,16 +119,17 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz
 # per_sentence_grammar_file=$align_dir/grammars/psg.$direction
 
 feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz
-feature_function=LexicalPairIdentity
+feature_function=LexicalPairIndicator
 # stem translation
-feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map
+feature_function=LexicalPairIndicator S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map
 # POS translation
-feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
-feature_function=InputIdentity
-feature_function=OutputIdentity
+feature_function=LexicalPairIndicator C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second
+feature_function=InputIndicator
+feature_function=OutputIndicator
 feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first
 feature_function=LexNullJump
 feature_function=NewJump
+feature_function=IdentityCycleDetector
 feature_function=NewJump use_binned_log_lengths flen
 # jump distance and src and destination class type
 feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first
author	Chris Dyer <cdyer@cs.cmu.edu>	2010-12-22 08:49:18 -0600
committer	Chris Dyer <cdyer@cs.cmu.edu>	2010-12-22 08:49:18 -0600
commit	86805dcb8aaaa716fdc73725ad41e411be53f6a6 (patch)
tree	4d3cf9d479388d65447c0bd25b012fc9f247e360
parent	491848839c0340f6c629ebae7ed6b6dc1a3842ad (diff)