From a50b6162b1c5b18473db9de98aa1fa73620b9af9 Mon Sep 17 00:00:00 2001 From: redpony Date: Sun, 5 Dec 2010 01:42:19 +0000 Subject: small changes git-svn-id: https://ws10smt.googlecode.com/svn/trunk@740 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/lextrans.cc | 2 +- rescore/rescore_inv_model1.pl | 14 +++++++++----- training/Makefile.am | 9 +++++---- training/mpi_online_optimize.cc | 11 +++++------ word-aligner/makefiles/makefile.grammars | 18 +++++++++--------- word-aligner/support/generate_word_pair_features.pl | 11 +++++------ 6 files changed, 34 insertions(+), 31 deletions(-) diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc index 35d2d15d..149cd68d 100644 --- a/decoder/lextrans.cc +++ b/decoder/lextrans.cc @@ -81,7 +81,7 @@ struct LexicalTransImpl { for (int i = 0; i < ref.size(); ++i) { target_vocab.insert(ref[i][0].label); } - bool all_sources_to_all_targets_ = true; + bool all_sources_to_all_targets_ = false; set trgs_used; for (int i = 0; i < e_len; ++i) { // for each word in the *target* Hypergraph::Node* node = forest->AddNode(kXCAT); diff --git a/rescore/rescore_inv_model1.pl b/rescore/rescore_inv_model1.pl index 4fc3cfcc..780452f5 100755 --- a/rescore/rescore_inv_model1.pl +++ b/rescore/rescore_inv_model1.pl @@ -76,13 +76,17 @@ sub rescore { my @hyps = @$rh; my @feats = @$rf; my $nhyps = scalar @hyps; + my %cache = (); print STDERR "RESCORING SENTENCE id=$id (# hypotheses=$nhyps)...\n"; for (my $i=0; $i < $nhyps; $i++) { - my $score = 0; - if ($reverse_model) { - die "not implemented"; - } else { - $score = m1_prob($src, $hyps[$i]); + my $score = $cache{$hyps[$i]}; + if (!defined $score) { + if ($reverse_model) { + die "not implemented"; + } else { + $score = m1_prob($src, $hyps[$i]); + } + $cache{$hyps[$i]} = $score; } print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score\n"; } diff --git a/training/Makefile.am b/training/Makefile.am index 8218ff0a..cb17aeff 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -9,8 +9,9 @@ bin_PROGRAMS = \ plftools \ collapse_weights \ cllh_filter_grammar \ - mpi_online_optimize \ - mpi_batch_optimize + mpi_online_optimize + +# mpi_batch_optimize noinst_PROGRAMS = \ lbfgs_test \ @@ -21,8 +22,8 @@ TESTS = lbfgs_test optimize_test mpi_online_optimize_SOURCES = mpi_online_optimize.cc online_optimizer.cc mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc -mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +#mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc +#mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz if MPI bin_PROGRAMS += compute_cllh diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 0f994c59..325ba030 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -8,10 +8,6 @@ #include #include -#ifdef HAVE_MPI -#include -#include -#endif #include "verbose.h" #include "hg.h" @@ -26,6 +22,11 @@ #include "sparse_vector.h" #include "sampler.h" +#ifdef HAVE_MPI +#include +#include +namespace mpi = boost::mpi; +#endif using namespace std; namespace po = boost::program_options; @@ -197,8 +198,6 @@ struct TrainingObserver : public DecoderObserver { }; #ifdef HAVE_MPI -namespace mpi = boost::mpi; - namespace boost { namespace mpi { template<> struct is_commutative >, SparseVector > diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index 60417ec5..be0644df 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -54,28 +54,28 @@ voc2class.e: corpus.e $(MKCLS) voc2class.f: corpus.f $(MKCLS) $(MKCLS) -c$(NCLASSES) -n10 -pcorpus.f -Vvoc2class.f opt -corpus.class.e: corpus.e voc2class.e $(CLASSIFY) +corpus.class.e: corpus.e voc2class.e $(CLASSIFY) voc2class.e corpus.e > $@ -corpus.class.f: corpus.f voc2class.f $(CLASSIFY) +corpus.class.f: corpus.f voc2class.f $(CLASSIFY) voc2class.f corpus.f > $@ -corpus.f-e: corpus.f corpus.e $(MERGE_CORPUS) +corpus.f-e: corpus.f corpus.e $(MERGE_CORPUS) corpus.f corpus.e > $@ -corpus.e-f: corpus.f corpus.e $(MERGE_CORPUS) +corpus.e-f: corpus.f corpus.e $(MERGE_CORPUS) corpus.e corpus.f > $@ -corpus.f-e.model1: corpus.f-e $(MODEL1) - $(MODEL1) -v -V corpus.f-e > $@ +corpus.f-e.model1: corpus.f-e + $(MODEL1) -v corpus.f-e > $@ -corpus.e-f.model1: corpus.e-f $(MODEL1) +corpus.e-f.model1: corpus.e-f $(MODEL1) -v -V corpus.e-f > $@ -corpus.f-e.full-model1: corpus.f-e $(MODEL1) +corpus.f-e.full-model1: corpus.f-e $(MODEL1) -t -999999 -v -V corpus.f-e > $@ -corpus.e-f.full-model1: corpus.e-f $(MODEL1) +corpus.e-f.full-model1: corpus.e-f $(MODEL1) -t -999999 -v -V corpus.e-f > $@ corpus.f-e.lex-grammar.gz: corpus.f-e corpus.f-e.model1 corpus.e-f.model1 diff --git a/word-aligner/support/generate_word_pair_features.pl b/word-aligner/support/generate_word_pair_features.pl index b722ee49..b28f6feb 100755 --- a/word-aligner/support/generate_word_pair_features.pl +++ b/word-aligner/support/generate_word_pair_features.pl @@ -10,9 +10,7 @@ my %fclass = (); load_classes($class_e, \%eclass); load_classes($class_f, \%fclass); -our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8_9 Ident8_9 Ident10_11 Ident10_11 Ident12_14 Ident12_14 Ident12_14); -die unless scalar @IDENT_BINS == 15; -our $MAX_IDENT_BIN = 'IdentGT' . scalar @IDENT_BINS; +our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8 Ident9); my $MIN_MAGNITUDE = 0.001; # minimum value of a feature @@ -203,8 +201,8 @@ for my $f (sort keys %fdict) { } } } - my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 3)); - my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 3)); + my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 2)); + my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 2)); my $both_non_numeric = (!$e_num && !$f_num); unless ($total_eandf > 20) { @@ -425,7 +423,8 @@ sub dlenbin { sub identbin { my $x = shift; if ($x == 0) { die; } - if ($x > scalar @IDENT_BINS) { return $MAX_IDENT_BIN; } + $x = int(log($x + 1) / log(1.3)); + if ($x >= scalar @IDENT_BINS) { return $IDENT_BINS[-1]; } return $IDENT_BINS[$x]; } -- cgit v1.2.3