From d9cc1a6986188a97e09e4c8cef46c34eee5f9cd2 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@Chriss-MacBook-Air.local>
Date: Sun, 10 Nov 2013 00:58:44 -0500
Subject: guard against direct includes of tr1

---
 word-aligner/fast_align.cc |  8 ++++++--
 word-aligner/ttables.cc    |  1 -
 word-aligner/ttables.h     | 11 ++++++++---
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'word-aligner')
diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc
index fddcba9c..589ca62d 100644
--- a/word-aligner/fast_align.cc
+++ b/word-aligner/fast_align.cc
@@ -1,7 +1,12 @@
 #include <iostream>
 #include <cmath>
 #include <utility>
-#include <tr1/unordered_map>
+#ifdef HAVE_CXX11
+# include <unordered_map>
+#else
+# include <tr1/unordered_map>
+namespace std { using std::tr1::unordered_map; }
+#endif
 
 #include <boost/functional/hash.hpp>
 #include <boost/program_options.hpp>
@@ -17,7 +22,6 @@
 
 namespace po = boost::program_options;
 using namespace std;
-using namespace std::tr1;
 
 bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description opts("Configuration options");
diff --git a/word-aligner/ttables.cc b/word-aligner/ttables.cc
index c177aa30..a56bbcef 100644
--- a/word-aligner/ttables.cc
+++ b/word-aligner/ttables.cc
@@ -5,7 +5,6 @@
 #include "dict.h"
 
 using namespace std;
-using namespace std::tr1;
 
 void TTable::DeserializeProbsFromText(std::istream* in) {
   int c = 0;
diff --git a/word-aligner/ttables.h b/word-aligner/ttables.h
index 507f591a..1785e064 100644
--- a/word-aligner/ttables.h
+++ b/word-aligner/ttables.h
@@ -2,7 +2,12 @@
 #define _TTABLES_H_
 
 #include <iostream>
-#include <tr1/unordered_map>
+#ifdef HAVE_CXX11
+# include <unordered_map>
+#else
+# include <tr1/unordered_map>
+namespace std { using std::tr1::unordered_map; }
+#endif
 
 #include "sparse_vector.h"
 #include "m.h"
@@ -12,8 +17,8 @@
 class TTable {
  public:
   TTable() {}
-  typedef std::tr1::unordered_map<WordID, double> Word2Double;
-  typedef std::tr1::unordered_map<WordID, Word2Double> Word2Word2Double;
+  typedef std::unordered_map<WordID, double> Word2Double;
+  typedef std::unordered_map<WordID, Word2Double> Word2Word2Double;
   inline double prob(const int& e, const int& f) const {
     const Word2Word2Double::const_iterator cit = ttable.find(e);
     if (cit != ttable.end()) {
-- 
cgit v1.2.3


From 1e9afb904a57ff0b03edd0e94d634ef98e7d4b2a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Sun, 10 Nov 2013 01:46:28 -0500
Subject: fix for c++11

---
 .gitignore                        |  2 +-
 decoder/apply_models.cc           |  2 +-
 decoder/decoder.cc                |  2 +-
 decoder/earley_composer.cc        |  2 +-
 decoder/ff_source_syntax.cc       |  2 +-
 decoder/ff_source_syntax2_p.cc    |  2 +-
 decoder/ff_source_syntax_p.cc     |  2 +-
 decoder/ff_wordalign.cc           |  2 +-
 decoder/ff_wordalign.h            |  2 +-
 decoder/ff_wordset.h              |  2 +-
 decoder/grammar.cc                |  2 +-
 decoder/hg_intersect.cc           |  2 +-
 decoder/kbest.h                   |  2 +-
 decoder/maxtrans_blunsom.cc       |  2 +-
 decoder/phrasebased_translator.cc |  2 +-
 python/cdec/sa/strmap.cc          |  2 +-
 python/setup.py.in                |  3 ++-
 training/latent_svm/latent_svm.cc | 13 ++++++-------
 training/mira/kbest_cut_mira.cc   |  7 -------
 training/utils/candidate_set.cc   |  2 +-
 word-aligner/fast_align.cc        |  2 +-
 word-aligner/ttables.h            |  2 +-
 22 files changed, 27 insertions(+), 34 deletions(-)

(limited to 'word-aligner')

diff --git a/.gitignore b/.gitignore
index 697a1a9d..5f573137 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,7 +103,7 @@ jam-files/bjam
 jam-files/engine/bin.*
 jam-files/engine/bootstrap/
 klm/lm/bin/
-klm/lm/builder/builder
+klm/lm/builder/lmplz
 klm/lm/build_binary
 klm/lm/ngram_query
 klm/lm/query
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 2e093d6a..4cd8b36f 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -8,7 +8,7 @@
 
 #include <vector>
 #include <algorithm>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 # include <unordered_set>
 #else
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 2c0e07b7..da65713a 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -1,6 +1,6 @@
 #include "decoder.h"
 
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc
index 32c387d3..d47a6969 100644
--- a/decoder/earley_composer.cc
+++ b/decoder/earley_composer.cc
@@ -4,7 +4,7 @@
 #include <fstream>
 #include <map>
 #include <queue>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 # include <unordered_set>
 #else
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 95709076..88f6714c 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -2,7 +2,7 @@
 
 #include <sstream>
 #include <stack>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/decoder/ff_source_syntax2_p.cc b/decoder/ff_source_syntax2_p.cc
index 130144fa..6a2ae742 100644
--- a/decoder/ff_source_syntax2_p.cc
+++ b/decoder/ff_source_syntax2_p.cc
@@ -3,7 +3,7 @@
 #include <sstream>
 #include <stack>
 #include <string>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/decoder/ff_source_syntax_p.cc b/decoder/ff_source_syntax_p.cc
index 1d3dc497..c094de59 100644
--- a/decoder/ff_source_syntax_p.cc
+++ b/decoder/ff_source_syntax_p.cc
@@ -2,7 +2,7 @@
 
 #include <sstream>
 #include <stack>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index 8ed053c2..dcb80110 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -7,7 +7,7 @@
 #include <string>
 #include <cmath>
 #include <bitset>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index 50f0dafa..0161f603 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -9,7 +9,7 @@
 #include <cassert>
 #include <boost/scoped_ptr.hpp>
 #include <boost/multi_array.hpp>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index affee2f4..e78cd2fb 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -9,7 +9,7 @@
 #include <iostream>
 #include <fstream>
 
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/decoder/grammar.cc b/decoder/grammar.cc
index f2530d35..160d00e6 100644
--- a/decoder/grammar.cc
+++ b/decoder/grammar.cc
@@ -3,7 +3,7 @@
 #include <algorithm>
 #include <utility>
 #include <map>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 # include <unordered_set>
 #else
diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc
index c5f1cc91..31a9a1ce 100644
--- a/decoder/hg_intersect.cc
+++ b/decoder/hg_intersect.cc
@@ -1,7 +1,7 @@
 #include "hg_intersect.h"
 
 #include <vector>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/decoder/kbest.h b/decoder/kbest.h
index cd386aef..c7194c7e 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -3,7 +3,7 @@
 
 #include <vector>
 #include <utility>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc
index 8d1d471c..a9f65fab 100644
--- a/decoder/maxtrans_blunsom.cc
+++ b/decoder/maxtrans_blunsom.cc
@@ -2,7 +2,7 @@
 
 #include <vector>
 #include <algorithm>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 # include <unordered_set>
 #else
diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc
index 321fb286..04b3e5d2 100644
--- a/decoder/phrasebased_translator.cc
+++ b/decoder/phrasebased_translator.cc
@@ -2,7 +2,7 @@
 
 #include <queue>
 #include <iostream>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 # include <unordered_set>
 #else
diff --git a/python/cdec/sa/strmap.cc b/python/cdec/sa/strmap.cc
index d7c4f2a3..b6debfb0 100644
--- a/python/cdec/sa/strmap.cc
+++ b/python/cdec/sa/strmap.cc
@@ -4,7 +4,7 @@
 #include <string>
 #include <cstdint>
 
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/python/setup.py.in b/python/setup.py.in
index ce1eb2ed..8ed0b100 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -21,7 +21,8 @@ ext_modules = [
         extra_compile_args=CPPFLAGS,
         extra_link_args=LDFLAGS),
     Extension(name='cdec.sa._sa',
-        sources=['cdec/sa/_sa.c', 'cdec/sa/strmap.cc'])
+        sources=['cdec/sa/_sa.c', 'cdec/sa/strmap.cc'],
+        extra_compile_args=CPPFLAGS)
 ]
 
 setup(
diff --git a/training/latent_svm/latent_svm.cc b/training/latent_svm/latent_svm.cc
index ab9c1d5d..60e52550 100644
--- a/training/latent_svm/latent_svm.cc
+++ b/training/latent_svm/latent_svm.cc
@@ -32,7 +32,6 @@ total_loss and prev_loss actually refer not to loss, but the metric (usually BLE
 #include "sampler.h"
 
 using namespace std;
-using boost::shared_ptr;
 namespace po = boost::program_options;
 
 bool invert_score; 
@@ -128,7 +127,7 @@ struct HypothesisInfo {
 };
 
 struct GoodOracle {
-  shared_ptr<HypothesisInfo> good;
+  boost::shared_ptr<HypothesisInfo> good;
 };
 
 struct TrainingObserver : public DecoderObserver {
@@ -143,9 +142,9 @@ struct TrainingObserver : public DecoderObserver {
   const DocScorer& ds;
   const vector<weight_t>& feature_weights;
   vector<GoodOracle>& oracles;
-  shared_ptr<HypothesisInfo> cur_best;
-  shared_ptr<HypothesisInfo> cur_costaug_best;
-  shared_ptr<HypothesisInfo> cur_ref; 
+  boost::shared_ptr<HypothesisInfo> cur_best;
+  boost::shared_ptr<HypothesisInfo> cur_costaug_best;
+  boost::shared_ptr<HypothesisInfo> cur_ref; 
   const int kbest_size;
   const double mt_metric_scale;
   const double mu;
@@ -168,8 +167,8 @@ struct TrainingObserver : public DecoderObserver {
     UpdateOracles(smeta.GetSentenceID(), *hg);
   }
 
-  shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double metric) {
-    shared_ptr<HypothesisInfo> h(new HypothesisInfo);
+  boost::shared_ptr<HypothesisInfo> MakeHypothesisInfo(const SparseVector<double>& feats, const double metric) {
+    boost::shared_ptr<HypothesisInfo> h(new HypothesisInfo);
     h->features = feats;
     h->mt_metric_score = metric;
     return h;
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
index 3b1108e0..990609d7 100644
--- a/training/mira/kbest_cut_mira.cc
+++ b/training/mira/kbest_cut_mira.cc
@@ -49,13 +49,6 @@ bool sent_approx;
 bool checkloss;
 bool stream;
 
-void SanityCheck(const vector<double>& w) {
-  for (int i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
-  }
-}
-
 struct FComp {
   const vector<double>& w_;
   FComp(const vector<double>& w) : w_(w) {}
diff --git a/training/utils/candidate_set.cc b/training/utils/candidate_set.cc
index 1dec9609..33dae9a3 100644
--- a/training/utils/candidate_set.cc
+++ b/training/utils/candidate_set.cc
@@ -1,6 +1,6 @@
 #include "candidate_set.h"
 
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_set>
 #else
 # include <tr1/unordered_set>
diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc
index 589ca62d..f54233eb 100644
--- a/word-aligner/fast_align.cc
+++ b/word-aligner/fast_align.cc
@@ -1,7 +1,7 @@
 #include <iostream>
 #include <cmath>
 #include <utility>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
diff --git a/word-aligner/ttables.h b/word-aligner/ttables.h
index 1785e064..d82aff72 100644
--- a/word-aligner/ttables.h
+++ b/word-aligner/ttables.h
@@ -2,7 +2,7 @@
 #define _TTABLES_H_
 
 #include <iostream>
-#ifdef HAVE_CXX11
+#ifndef HAVE_OLD_CPP
 # include <unordered_map>
 #else
 # include <tr1/unordered_map>
-- 
cgit v1.2.3


From 40e8ba348b3a0af499a754e436fe960f780f4f7e Mon Sep 17 00:00:00 2001
From: Waleed Ammar <wammar@cs.cmu.edu>
Date: Wed, 13 Nov 2013 19:28:07 -0500
Subject: 1) fix the call to ibm model 1 aligner, 2) create a makefile target
 for generating wordpair features, 3) optionally generate sparse affix
 features (default behavior is still identical).

---
 word-aligner/aligner.pl                            |  7 +++
 word-aligner/makefiles/makefile.grammars           | 12 +++--
 .../support/generate_word_pair_features.pl         | 63 +++++++++++++++++++++-
 3 files changed, 76 insertions(+), 6 deletions(-)

(limited to 'word-aligner')

diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl
index cbccb94a..08d95162 100755
--- a/word-aligner/aligner.pl
+++ b/word-aligner/aligner.pl
@@ -86,10 +86,17 @@ PTRAIN_PARAMS = --gaussian_prior --sigma_squared 1.0 --max_iteration 15
 #MPIRUN = mpirun -np $(MPIJOBS)
 MPIRUN=
 
+USE_AFFIXES = 0
+
 WALLTIME=90
 
 export
 
+generate-wordpair-features:
+	\@failcom='exit 1'; \\
+	(cd grammars &&	make USE_AFFIXES=\$(USE_AFFIXES) ) || eval \$\$failcom;
+	cd ..
+
 all:
 	\@failcom='exit 1'; \\
 	list='\$(TARGETS)'; for subdir in \$\$list; do \\
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars
index 8d3ea8cb..1db516f1 100644
--- a/word-aligner/makefiles/makefile.grammars
+++ b/word-aligner/makefiles/makefile.grammars
@@ -19,6 +19,8 @@ MAKE_LEX_GRAMMAR = $(SUPPORT_DIR)/make_lex_grammar.pl
 MODEL1 = $(SCRIPT_DIR)/fast_align
 MERGE_CORPUS = $(SUPPORT_DIR)/merge_corpus.pl
 
+USE_AFFIXES = 0
+
 e.voc: corpus.e
 	$(EXTRACT_VOCAB) < corpus.e > $@
 
@@ -66,20 +68,20 @@ corpus.e-f: corpus.f corpus.e
 	$(MERGE_CORPUS) corpus.e corpus.f > $@
 
 corpus.f-e.model1: corpus.f-e
-	$(MODEL1) -p -v -i corpus.f-e > $@
+	$(MODEL1) -p corpus.f-e.model1 -v -i corpus.f-e > $@
 
 corpus.e-f.model1: corpus.e-f
-	$(MODEL1) -p -v -V -i corpus.e-f > $@
+	$(MODEL1) -p corpus.e-f.model1 -v -V -i corpus.e-f > $@
 
 corpus.f-e.full-model1: corpus.f-e
-	$(MODEL1) -p -t -999999 -v -V -i corpus.f-e > $@
+	$(MODEL1) -p corpus.f-e.full-model1 -t -999999 -v -V -i corpus.f-e > $@
 
 corpus.e-f.full-model1: corpus.e-f
-	$(MODEL1) -p -t -999999 -v -V -i corpus.e-f > $@
+	$(MODEL1) -p corpus.e-f.full-model1 -t -999999 -v -V -i corpus.e-f > $@
 
 corpus.f-e.lex-grammar.gz: corpus.f-e corpus.f-e.model1 corpus.e-f.model1
 	$(MAKE_LEX_GRAMMAR) corpus.f-e corpus.f-e.model1 corpus.e-f.model1 | $(GZIP) -9 > $@
 
 wordpairs.f-e.features.gz: corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1
-	$(GENERATE_WORDPAIR_FEATURES) corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1 | $(GZIP) -9 > $@
+	$(GENERATE_WORDPAIR_FEATURES) corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1 $(USE_AFFIXES) $(USE_AFFIXES) | $(GZIP) -9 > $@
 
diff --git a/word-aligner/support/generate_word_pair_features.pl b/word-aligner/support/generate_word_pair_features.pl
index 54b89ce1..f3fdf149 100755
--- a/word-aligner/support/generate_word_pair_features.pl
+++ b/word-aligner/support/generate_word_pair_features.pl
@@ -2,7 +2,7 @@
 use utf8;
 use strict;
 
-my ($effile, $model1, $imodel1, $orthof, $orthoe, $class_e, $class_f, $sparse_m1) = @ARGV;
+my ($effile, $model1, $imodel1, $orthof, $orthoe, $class_e, $class_f, $sparse_m1, $use_prefixes, $use_suffixes) = @ARGV;
 die "Usage: $0 corpus.fr-en corpus.f-e.full-model1 corpus.e-f.full-model1 corpus.orthonorm-dict.f corpus.orthnorm-dict.e class.e class.f corpus.f-e.model1\n" unless $effile && -f $effile && $model1 && -f $model1 && $imodel1 && -f $imodel1 && $orthof && -f $orthof && $orthoe && -f $orthoe && -f $class_e && -f $class_f && $sparse_m1 && -f $sparse_m1;
 
 my %eclass = ();
@@ -253,10 +253,71 @@ for my $f (sort keys %fdict) {
         push @feats, "PuncMiss=1";
       }
     }
+    if ($use_prefixes) {
+      my $prefix1 = prefix_to_type($f, $e, 1);
+      if (length $prefix1 > 0 && !$is_null) { push @feats, $prefix1."=1";}
+      my $prefix2 = prefix_to_type($f, $e, 2);
+      if (length $prefix2 > 0 && !$is_null) { push @feats, $prefix2."=1";}
+      my $prefix3 = prefix_to_type($f, $e, 3);
+      if (length $prefix3 > 0 && !$is_null) { push @feats, $prefix3."=1";}
+      my $prefix1_reverse = prefix_to_type($e, $f, 1);
+      if (length $prefix1_reverse > 0 && !$is_null) { push @feats, $prefix1_reverse."=1";}
+      my $prefix2_reverse = prefix_to_type($e, $f, 2);
+      if (length $prefix2_reverse > 0 && !$is_null) { push @feats, $prefix2_reverse."=1";}
+      my $prefix3_reverse = prefix_to_type($e, $f, 3);
+      if (length $prefix3_reverse > 0 && !$is_null) { push @feats, $prefix3_reverse."=1";}
+    }
+    if ($use_suffixes) {
+      my $suffix1 = suffix_to_type($f, $e, 1);
+      if (length $suffix1 > 0 && !$is_null) { push @feats, $suffix1."=1";}
+      my $suffix2 = suffix_to_type($f, $e, 2);
+      if (length $suffix2 > 0 && !$is_null) { push @feats, $suffix2."=1";}
+      my $suffix3 = suffix_to_type($f, $e, 3);
+      if (length $suffix3 > 0 && !$is_null) { push @feats, $suffix3."=1";}
+      my $suffix1_reverse = suffix_to_type($e, $f, 1);
+      if (length $suffix1_reverse > 0 && !$is_null) { push @feats, $suffix1_reverse."=1";}
+      my $suffix2_reverse = suffix_to_type($e, $f, 2);
+      if (length $suffix2_reverse > 0 && !$is_null) { push @feats, $suffix2_reverse."=1";}
+      my $suffix3_reverse = suffix_to_type($e, $f, 3);
+      if (length $suffix3_reverse > 0 && !$is_null) { push @feats, $suffix3_reverse."=1";}
+    }
     print "$f ||| $e ||| @feats\n";
   }
 }
 
+# returns a feature string instantiating the pattern "(source_prefix,target)"
+sub prefix_to_type
+{
+    # $f => src token
+    # $e => tgt token
+    my ($f, $e, $len_prefix) = @_;
+    
+    if (length $f > $len_prefix && index($e.$f, '=') < 0)
+    {
+        return substr($f, 0, $len_prefix)."-".$e;
+    } 
+    else
+    {
+        return "";
+    }
+}
+
+# returns a feature string instantiating the pattern "(source_prefix,target)"
+sub suffix_to_type
+{
+    # $f => src token
+    # $e => tgt token
+    my ($f, $e, $len_prefix) = @_;
+
+    if ( (length $f) > $len_prefix && index($e.$f, '=') < 0) 
+    {
+        return substr($f, (length $f)-$len_prefix, $len_prefix)."_".$e;
+    } 
+    else 
+    {
+        return "";
+    }
+}
 
 sub levenshtein
 {
-- 
cgit v1.2.3