From d9cc1a6986188a97e09e4c8cef46c34eee5f9cd2 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 10 Nov 2013 00:58:44 -0500 Subject: guard against direct includes of tr1 --- word-aligner/fast_align.cc | 8 ++++++-- word-aligner/ttables.cc | 1 - word-aligner/ttables.h | 11 ++++++++--- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'word-aligner') diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc index fddcba9c..589ca62d 100644 --- a/word-aligner/fast_align.cc +++ b/word-aligner/fast_align.cc @@ -1,7 +1,12 @@ #include #include #include -#include +#ifdef HAVE_CXX11 +# include +#else +# include +namespace std { using std::tr1::unordered_map; } +#endif #include #include @@ -17,7 +22,6 @@ namespace po = boost::program_options; using namespace std; -using namespace std::tr1; bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); diff --git a/word-aligner/ttables.cc b/word-aligner/ttables.cc index c177aa30..a56bbcef 100644 --- a/word-aligner/ttables.cc +++ b/word-aligner/ttables.cc @@ -5,7 +5,6 @@ #include "dict.h" using namespace std; -using namespace std::tr1; void TTable::DeserializeProbsFromText(std::istream* in) { int c = 0; diff --git a/word-aligner/ttables.h b/word-aligner/ttables.h index 507f591a..1785e064 100644 --- a/word-aligner/ttables.h +++ b/word-aligner/ttables.h @@ -2,7 +2,12 @@ #define _TTABLES_H_ #include -#include +#ifdef HAVE_CXX11 +# include +#else +# include +namespace std { using std::tr1::unordered_map; } +#endif #include "sparse_vector.h" #include "m.h" @@ -12,8 +17,8 @@ class TTable { public: TTable() {} - typedef std::tr1::unordered_map Word2Double; - typedef std::tr1::unordered_map Word2Word2Double; + typedef std::unordered_map Word2Double; + typedef std::unordered_map Word2Word2Double; inline double prob(const int& e, const int& f) const { const Word2Word2Double::const_iterator cit = ttable.find(e); if (cit != ttable.end()) { -- cgit v1.2.3 From 1e9afb904a57ff0b03edd0e94d634ef98e7d4b2a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 10 Nov 2013 01:46:28 -0500 Subject: fix for c++11 --- .gitignore | 2 +- decoder/apply_models.cc | 2 +- decoder/decoder.cc | 2 +- decoder/earley_composer.cc | 2 +- decoder/ff_source_syntax.cc | 2 +- decoder/ff_source_syntax2_p.cc | 2 +- decoder/ff_source_syntax_p.cc | 2 +- decoder/ff_wordalign.cc | 2 +- decoder/ff_wordalign.h | 2 +- decoder/ff_wordset.h | 2 +- decoder/grammar.cc | 2 +- decoder/hg_intersect.cc | 2 +- decoder/kbest.h | 2 +- decoder/maxtrans_blunsom.cc | 2 +- decoder/phrasebased_translator.cc | 2 +- python/cdec/sa/strmap.cc | 2 +- python/setup.py.in | 3 ++- training/latent_svm/latent_svm.cc | 13 ++++++------- training/mira/kbest_cut_mira.cc | 7 ------- training/utils/candidate_set.cc | 2 +- word-aligner/fast_align.cc | 2 +- word-aligner/ttables.h | 2 +- 22 files changed, 27 insertions(+), 34 deletions(-) (limited to 'word-aligner') diff --git a/.gitignore b/.gitignore index 697a1a9d..5f573137 100644 --- a/.gitignore +++ b/.gitignore @@ -103,7 +103,7 @@ jam-files/bjam jam-files/engine/bin.* jam-files/engine/bootstrap/ klm/lm/bin/ -klm/lm/builder/builder +klm/lm/builder/lmplz klm/lm/build_binary klm/lm/ngram_query klm/lm/query diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 2e093d6a..4cd8b36f 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -8,7 +8,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include # include #else diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 2c0e07b7..da65713a 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -1,6 +1,6 @@ #include "decoder.h" -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc index 32c387d3..d47a6969 100644 --- a/decoder/earley_composer.cc +++ b/decoder/earley_composer.cc @@ -4,7 +4,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include # include #else diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 95709076..88f6714c 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -2,7 +2,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/ff_source_syntax2_p.cc b/decoder/ff_source_syntax2_p.cc index 130144fa..6a2ae742 100644 --- a/decoder/ff_source_syntax2_p.cc +++ b/decoder/ff_source_syntax2_p.cc @@ -3,7 +3,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/ff_source_syntax_p.cc b/decoder/ff_source_syntax_p.cc index 1d3dc497..c094de59 100644 --- a/decoder/ff_source_syntax_p.cc +++ b/decoder/ff_source_syntax_p.cc @@ -2,7 +2,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index 8ed053c2..dcb80110 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -7,7 +7,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index 50f0dafa..0161f603 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -9,7 +9,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index affee2f4..e78cd2fb 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -9,7 +9,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/grammar.cc b/decoder/grammar.cc index f2530d35..160d00e6 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -3,7 +3,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include # include #else diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index c5f1cc91..31a9a1ce 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -1,7 +1,7 @@ #include "hg_intersect.h" #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/kbest.h b/decoder/kbest.h index cd386aef..c7194c7e 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -3,7 +3,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc index 8d1d471c..a9f65fab 100644 --- a/decoder/maxtrans_blunsom.cc +++ b/decoder/maxtrans_blunsom.cc @@ -2,7 +2,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include # include #else diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc index 321fb286..04b3e5d2 100644 --- a/decoder/phrasebased_translator.cc +++ b/decoder/phrasebased_translator.cc @@ -2,7 +2,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include # include #else diff --git a/python/cdec/sa/strmap.cc b/python/cdec/sa/strmap.cc index d7c4f2a3..b6debfb0 100644 --- a/python/cdec/sa/strmap.cc +++ b/python/cdec/sa/strmap.cc @@ -4,7 +4,7 @@ #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/python/setup.py.in b/python/setup.py.in index ce1eb2ed..8ed0b100 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -21,7 +21,8 @@ ext_modules = [ extra_compile_args=CPPFLAGS, extra_link_args=LDFLAGS), Extension(name='cdec.sa._sa', - sources=['cdec/sa/_sa.c', 'cdec/sa/strmap.cc']) + sources=['cdec/sa/_sa.c', 'cdec/sa/strmap.cc'], + extra_compile_args=CPPFLAGS) ] setup( diff --git a/training/latent_svm/latent_svm.cc b/training/latent_svm/latent_svm.cc index ab9c1d5d..60e52550 100644 --- a/training/latent_svm/latent_svm.cc +++ b/training/latent_svm/latent_svm.cc @@ -32,7 +32,6 @@ total_loss and prev_loss actually refer not to loss, but the metric (usually BLE #include "sampler.h" using namespace std; -using boost::shared_ptr; namespace po = boost::program_options; bool invert_score; @@ -128,7 +127,7 @@ struct HypothesisInfo { }; struct GoodOracle { - shared_ptr good; + boost::shared_ptr good; }; struct TrainingObserver : public DecoderObserver { @@ -143,9 +142,9 @@ struct TrainingObserver : public DecoderObserver { const DocScorer& ds; const vector& feature_weights; vector& oracles; - shared_ptr cur_best; - shared_ptr cur_costaug_best; - shared_ptr cur_ref; + boost::shared_ptr cur_best; + boost::shared_ptr cur_costaug_best; + boost::shared_ptr cur_ref; const int kbest_size; const double mt_metric_scale; const double mu; @@ -168,8 +167,8 @@ struct TrainingObserver : public DecoderObserver { UpdateOracles(smeta.GetSentenceID(), *hg); } - shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double metric) { - shared_ptr h(new HypothesisInfo); + boost::shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double metric) { + boost::shared_ptr h(new HypothesisInfo); h->features = feats; h->mt_metric_score = metric; return h; diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 3b1108e0..990609d7 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -49,13 +49,6 @@ bool sent_approx; bool checkloss; bool stream; -void SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { - assert(!isnan(w[i])); - assert(!isinf(w[i])); - } -} - struct FComp { const vector& w_; FComp(const vector& w) : w_(w) {} diff --git a/training/utils/candidate_set.cc b/training/utils/candidate_set.cc index 1dec9609..33dae9a3 100644 --- a/training/utils/candidate_set.cc +++ b/training/utils/candidate_set.cc @@ -1,6 +1,6 @@ #include "candidate_set.h" -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc index 589ca62d..f54233eb 100644 --- a/word-aligner/fast_align.cc +++ b/word-aligner/fast_align.cc @@ -1,7 +1,7 @@ #include #include #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include diff --git a/word-aligner/ttables.h b/word-aligner/ttables.h index 1785e064..d82aff72 100644 --- a/word-aligner/ttables.h +++ b/word-aligner/ttables.h @@ -2,7 +2,7 @@ #define _TTABLES_H_ #include -#ifdef HAVE_CXX11 +#ifndef HAVE_OLD_CPP # include #else # include -- cgit v1.2.3 From 40e8ba348b3a0af499a754e436fe960f780f4f7e Mon Sep 17 00:00:00 2001 From: Waleed Ammar Date: Wed, 13 Nov 2013 19:28:07 -0500 Subject: 1) fix the call to ibm model 1 aligner, 2) create a makefile target for generating wordpair features, 3) optionally generate sparse affix features (default behavior is still identical). --- word-aligner/aligner.pl | 7 +++ word-aligner/makefiles/makefile.grammars | 12 +++-- .../support/generate_word_pair_features.pl | 63 +++++++++++++++++++++- 3 files changed, 76 insertions(+), 6 deletions(-) (limited to 'word-aligner') diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl index cbccb94a..08d95162 100755 --- a/word-aligner/aligner.pl +++ b/word-aligner/aligner.pl @@ -86,10 +86,17 @@ PTRAIN_PARAMS = --gaussian_prior --sigma_squared 1.0 --max_iteration 15 #MPIRUN = mpirun -np $(MPIJOBS) MPIRUN= +USE_AFFIXES = 0 + WALLTIME=90 export +generate-wordpair-features: + \@failcom='exit 1'; \\ + (cd grammars && make USE_AFFIXES=\$(USE_AFFIXES) ) || eval \$\$failcom; + cd .. + all: \@failcom='exit 1'; \\ list='\$(TARGETS)'; for subdir in \$\$list; do \\ diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index 8d3ea8cb..1db516f1 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -19,6 +19,8 @@ MAKE_LEX_GRAMMAR = $(SUPPORT_DIR)/make_lex_grammar.pl MODEL1 = $(SCRIPT_DIR)/fast_align MERGE_CORPUS = $(SUPPORT_DIR)/merge_corpus.pl +USE_AFFIXES = 0 + e.voc: corpus.e $(EXTRACT_VOCAB) < corpus.e > $@ @@ -66,20 +68,20 @@ corpus.e-f: corpus.f corpus.e $(MERGE_CORPUS) corpus.e corpus.f > $@ corpus.f-e.model1: corpus.f-e - $(MODEL1) -p -v -i corpus.f-e > $@ + $(MODEL1) -p corpus.f-e.model1 -v -i corpus.f-e > $@ corpus.e-f.model1: corpus.e-f - $(MODEL1) -p -v -V -i corpus.e-f > $@ + $(MODEL1) -p corpus.e-f.model1 -v -V -i corpus.e-f > $@ corpus.f-e.full-model1: corpus.f-e - $(MODEL1) -p -t -999999 -v -V -i corpus.f-e > $@ + $(MODEL1) -p corpus.f-e.full-model1 -t -999999 -v -V -i corpus.f-e > $@ corpus.e-f.full-model1: corpus.e-f - $(MODEL1) -p -t -999999 -v -V -i corpus.e-f > $@ + $(MODEL1) -p corpus.e-f.full-model1 -t -999999 -v -V -i corpus.e-f > $@ corpus.f-e.lex-grammar.gz: corpus.f-e corpus.f-e.model1 corpus.e-f.model1 $(MAKE_LEX_GRAMMAR) corpus.f-e corpus.f-e.model1 corpus.e-f.model1 | $(GZIP) -9 > $@ wordpairs.f-e.features.gz: corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1 - $(GENERATE_WORDPAIR_FEATURES) corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1 | $(GZIP) -9 > $@ + $(GENERATE_WORDPAIR_FEATURES) corpus.f-e corpus.f-e.full-model1 corpus.e-f.full-model1 orthonorm-dict.f orthonorm-dict.e voc2class.e voc2class.f corpus.f-e.model1 $(USE_AFFIXES) $(USE_AFFIXES) | $(GZIP) -9 > $@ diff --git a/word-aligner/support/generate_word_pair_features.pl b/word-aligner/support/generate_word_pair_features.pl index 54b89ce1..f3fdf149 100755 --- a/word-aligner/support/generate_word_pair_features.pl +++ b/word-aligner/support/generate_word_pair_features.pl @@ -2,7 +2,7 @@ use utf8; use strict; -my ($effile, $model1, $imodel1, $orthof, $orthoe, $class_e, $class_f, $sparse_m1) = @ARGV; +my ($effile, $model1, $imodel1, $orthof, $orthoe, $class_e, $class_f, $sparse_m1, $use_prefixes, $use_suffixes) = @ARGV; die "Usage: $0 corpus.fr-en corpus.f-e.full-model1 corpus.e-f.full-model1 corpus.orthonorm-dict.f corpus.orthnorm-dict.e class.e class.f corpus.f-e.model1\n" unless $effile && -f $effile && $model1 && -f $model1 && $imodel1 && -f $imodel1 && $orthof && -f $orthof && $orthoe && -f $orthoe && -f $class_e && -f $class_f && $sparse_m1 && -f $sparse_m1; my %eclass = (); @@ -253,10 +253,71 @@ for my $f (sort keys %fdict) { push @feats, "PuncMiss=1"; } } + if ($use_prefixes) { + my $prefix1 = prefix_to_type($f, $e, 1); + if (length $prefix1 > 0 && !$is_null) { push @feats, $prefix1."=1";} + my $prefix2 = prefix_to_type($f, $e, 2); + if (length $prefix2 > 0 && !$is_null) { push @feats, $prefix2."=1";} + my $prefix3 = prefix_to_type($f, $e, 3); + if (length $prefix3 > 0 && !$is_null) { push @feats, $prefix3."=1";} + my $prefix1_reverse = prefix_to_type($e, $f, 1); + if (length $prefix1_reverse > 0 && !$is_null) { push @feats, $prefix1_reverse."=1";} + my $prefix2_reverse = prefix_to_type($e, $f, 2); + if (length $prefix2_reverse > 0 && !$is_null) { push @feats, $prefix2_reverse."=1";} + my $prefix3_reverse = prefix_to_type($e, $f, 3); + if (length $prefix3_reverse > 0 && !$is_null) { push @feats, $prefix3_reverse."=1";} + } + if ($use_suffixes) { + my $suffix1 = suffix_to_type($f, $e, 1); + if (length $suffix1 > 0 && !$is_null) { push @feats, $suffix1."=1";} + my $suffix2 = suffix_to_type($f, $e, 2); + if (length $suffix2 > 0 && !$is_null) { push @feats, $suffix2."=1";} + my $suffix3 = suffix_to_type($f, $e, 3); + if (length $suffix3 > 0 && !$is_null) { push @feats, $suffix3."=1";} + my $suffix1_reverse = suffix_to_type($e, $f, 1); + if (length $suffix1_reverse > 0 && !$is_null) { push @feats, $suffix1_reverse."=1";} + my $suffix2_reverse = suffix_to_type($e, $f, 2); + if (length $suffix2_reverse > 0 && !$is_null) { push @feats, $suffix2_reverse."=1";} + my $suffix3_reverse = suffix_to_type($e, $f, 3); + if (length $suffix3_reverse > 0 && !$is_null) { push @feats, $suffix3_reverse."=1";} + } print "$f ||| $e ||| @feats\n"; } } +# returns a feature string instantiating the pattern "(source_prefix,target)" +sub prefix_to_type +{ + # $f => src token + # $e => tgt token + my ($f, $e, $len_prefix) = @_; + + if (length $f > $len_prefix && index($e.$f, '=') < 0) + { + return substr($f, 0, $len_prefix)."-".$e; + } + else + { + return ""; + } +} + +# returns a feature string instantiating the pattern "(source_prefix,target)" +sub suffix_to_type +{ + # $f => src token + # $e => tgt token + my ($f, $e, $len_prefix) = @_; + + if ( (length $f) > $len_prefix && index($e.$f, '=') < 0) + { + return substr($f, (length $f)-$len_prefix, $len_prefix)."_".$e; + } + else + { + return ""; + } +} sub levenshtein { -- cgit v1.2.3