small changes

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@740 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-12-05 01:42:19 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-12-05 01:42:19 +0000
commit: 670356efa26cd3bba3bf7047701e9fad1aeed0cb (patch)
tree: b8d5e3d1364933dca3301a9579ef7531dfd14c1b
parent: d52db01a2e224869c6ea72a4a234e888c6fd756c (diff)
6 files changed, 34 insertions, 31 deletions
diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc
index 35d2d15d..149cd68d 100644
--- a/decoder/lextrans.cc
+++ b/decoder/lextrans.cc
@@ -81,7 +81,7 @@ struct LexicalTransImpl {
     for (int i = 0; i < ref.size(); ++i) {
       target_vocab.insert(ref[i][0].label);
     }
-    bool all_sources_to_all_targets_ = true;
+    bool all_sources_to_all_targets_ = false;
     set<WordID> trgs_used;
     for (int i = 0; i < e_len; ++i) {  // for each word in the *target*
       Hypergraph::Node* node = forest->AddNode(kXCAT);
diff --git a/rescore/rescore_inv_model1.pl b/rescore/rescore_inv_model1.pl
index 4fc3cfcc..780452f5 100755
--- a/rescore/rescore_inv_model1.pl
+++ b/rescore/rescore_inv_model1.pl
@@ -76,13 +76,17 @@ sub rescore {
   my @hyps = @$rh;
   my @feats = @$rf;
   my $nhyps = scalar @hyps;
+  my %cache = ();
   print STDERR "RESCORING SENTENCE id=$id (# hypotheses=$nhyps)...\n";
   for (my $i=0; $i < $nhyps; $i++) {
-    my $score = 0;
-    if ($reverse_model) {
-      die "not implemented";
-    } else {
-      $score = m1_prob($src, $hyps[$i]);
+    my $score = $cache{$hyps[$i]};
+    if (!defined $score) {
+      if ($reverse_model) {
+        die "not implemented";
+      } else {
+        $score = m1_prob($src, $hyps[$i]);
+      }
+      $cache{$hyps[$i]} = $score;
     }
     print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score\n";
   }
diff --git a/training/Makefile.am b/training/Makefile.am
index 8218ff0a..cb17aeff 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -9,8 +9,9 @@ bin_PROGRAMS = \
   plftools \
   collapse_weights \
   cllh_filter_grammar \
-  mpi_online_optimize \
-  mpi_batch_optimize
+  mpi_online_optimize
+
+#  mpi_batch_optimize
 
 noinst_PROGRAMS = \
   lbfgs_test \
@@ -21,8 +22,8 @@ TESTS = lbfgs_test optimize_test
 mpi_online_optimize_SOURCES = mpi_online_optimize.cc online_optimizer.cc
 mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
-mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc
-mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
+#mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc
+#mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
 if MPI
 bin_PROGRAMS += compute_cllh
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 0f994c59..325ba030 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -8,10 +8,6 @@
 
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
-#ifdef HAVE_MPI
-#include <boost/mpi/timer.hpp>
-#include <boost/mpi.hpp>
-#endif
 
 #include "verbose.h"
 #include "hg.h"
@@ -26,6 +22,11 @@
 #include "sparse_vector.h"
 #include "sampler.h"
 
+#ifdef HAVE_MPI
+#include <boost/mpi/timer.hpp>
+#include <boost/mpi.hpp>
+namespace mpi = boost::mpi;
+#endif
 
 using namespace std;
 namespace po = boost::program_options;
@@ -197,8 +198,6 @@ struct TrainingObserver : public DecoderObserver {
 };
 
 #ifdef HAVE_MPI
-namespace mpi = boost::mpi;
-
 namespace boost { namespace mpi {
   template<>
   struct is_commutative<std::plus<SparseVector<double> >, SparseVector<double> > 
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars
index 60417ec5..be0644df 100644
--- a/word-aligner/makefiles/makefile.grammars
+++ b/word-aligner/makefiles/makefile.grammars
@@ -54,28 +54,28 @@ voc2class.e: corpus.e $(MKCLS)
 voc2class.f: corpus.f $(MKCLS)
 	$(MKCLS) -c$(NCLASSES) -n10 -pcorpus.f -Vvoc2class.f opt
 
-corpus.class.e: corpus.e voc2class.e $(CLASSIFY)
+corpus.class.e: corpus.e voc2class.e
 	$(CLASSIFY) voc2class.e corpus.e > $@
 
-corpus.class.f: corpus.f voc2class.f $(CLASSIFY)
+corpus.class.f: corpus.f voc2class.f
 	$(CLASSIFY) voc2class.f corpus.f > $@
 
-corpus.f-e: corpus.f corpus.e $(MERGE_CORPUS)
+corpus.f-e: corpus.f corpus.e
 	$(MERGE_CORPUS) corpus.f corpus.e > $@
 
-corpus.e-f: corpus.f corpus.e $(MERGE_CORPUS)
+corpus.e-f: corpus.f corpus.e
 	$(MERGE_CORPUS) corpus.e corpus.f > $@
 
-corpus.f-e.model1: corpus.f-e $(MODEL1)
-	$(MODEL1) -v -V corpus.f-e > $@
+corpus.f-e.model1: corpus.f-e
+	$(MODEL1) -v corpus.f-e > $@
 
-corpus.e-f.model1: corpus.e-f $(MODEL1)
+corpus.e-f.model1: corpus.e-f
 	$(MODEL1) -v -V corpus.e-f > $@
 
-corpus.f-e.full-model1: corpus.f-e $(MODEL1)
+corpus.f-e.full-model1: corpus.f-e
 	$(MODEL1) -t -999999 -v -V corpus.f-e > $@
 
-corpus.e-f.full-model1: corpus.e-f $(MODEL1)
+corpus.e-f.full-model1: corpus.e-f
 	$(MODEL1) -t -999999 -v -V corpus.e-f > $@
 
 corpus.f-e.lex-grammar.gz: corpus.f-e corpus.f-e.model1 corpus.e-f.model1
diff --git a/word-aligner/support/generate_word_pair_features.pl b/word-aligner/support/generate_word_pair_features.pl
index b722ee49..b28f6feb 100755
--- a/word-aligner/support/generate_word_pair_features.pl
+++ b/word-aligner/support/generate_word_pair_features.pl
@@ -10,9 +10,7 @@ my %fclass = ();
 load_classes($class_e, \%eclass);
 load_classes($class_f, \%fclass);
 
-our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8_9 Ident8_9 Ident10_11 Ident10_11 Ident12_14 Ident12_14 Ident12_14);
-die unless scalar @IDENT_BINS == 15;
-our $MAX_IDENT_BIN = 'IdentGT' . scalar @IDENT_BINS;
+our @IDENT_BINS = qw (Ident0 Ident1 Ident2 Ident3 Ident4 Ident5 Ident6 Ident7 Ident8 Ident9);
 
 my $MIN_MAGNITUDE = 0.001; # minimum value of a feature
 
@@ -203,8 +201,8 @@ for my $f (sort keys %fdict) {
         }
       }
     }
-    my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 3));
-    my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 3));
+    my $f_num = ($of =~ /^-?\d[0-9\.\,]+%?$/ && (length($of) > 2));
+    my $e_num = ($oe =~ /^-?\d[0-9\.\,]+%?$/ && (length($oe) > 2));
     my $both_non_numeric = (!$e_num && !$f_num);
 
     unless ($total_eandf > 20) {
@@ -425,7 +423,8 @@ sub dlenbin {
 sub identbin {
   my $x = shift;
   if ($x == 0) { die; }
-  if ($x > scalar @IDENT_BINS) { return $MAX_IDENT_BIN; }
+  $x = int(log($x + 1) / log(1.3));
+  if ($x >= scalar @IDENT_BINS) { return $IDENT_BINS[-1]; }
   return $IDENT_BINS[$x];
 }
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-12-05 01:42:19 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-12-05 01:42:19 +0000
commit	670356efa26cd3bba3bf7047701e9fad1aeed0cb (patch)
tree	b8d5e3d1364933dca3301a9579ef7531dfd14c1b
parent	d52db01a2e224869c6ea72a4a234e888c6fd756c (diff)