From eecf255ed7aaabb9ce2decd80dd115240e468327 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 29 Apr 2012 17:19:18 +0200 Subject: reverted changes in upstream --- compound-split/compound-split.pl | 2 +- decoder/scfg_translator.cc | 14 +++++++------- dtrain/dtrain.cc | 2 +- dtrain/test/example/dtrain.ini | 2 +- environment/LocalConfig.pm | 2 +- klm/compile.sh | 2 ++ mira/kbest_mira.cc | 2 +- utils/dict.h | 2 +- utils/fast_sparse_vector.h | 5 ++--- utils/sampler.h | 2 +- 10 files changed, 18 insertions(+), 17 deletions(-) mode change 100755 => 100644 klm/compile.sh diff --git a/compound-split/compound-split.pl b/compound-split/compound-split.pl index 807ddb0f..62259146 100755 --- a/compound-split/compound-split.pl +++ b/compound-split/compound-split.pl @@ -32,7 +32,7 @@ $LANG = lc $LANG; die "Can't find $CDEC\n" unless -f $CDEC; die "Can't execute $CDEC\n" unless -x $CDEC; die "Don't know about language: $LANG\n" unless -d "./$LANG"; -my $CONFIG="/mnt/proj/developer/simianer/cdec_head/compound-split/cdec-$LANG.ini"; +my $CONFIG="cdec-$LANG.ini"; die "Can't find $CONFIG" unless -f $CONFIG; die "--output must be '1best' or 'plf'\n" unless ($OUTPUT =~ /^(plf|1best)$/); print STDERR "(Run with --help for options)\n"; diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 646d67fa..15abb600 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -107,15 +107,15 @@ struct SCFGTranslatorImpl { bool operator()(const GrammarPtr& x) const { return x->GetGrammarName() == name_; } const string name_; }; void SetSentenceGrammarFromString(const std::string& grammar_str) { - assert( grammar_str != "" ); + assert(grammar_str != ""); if (!SILENT) cerr << "Setting sentence grammar" << endl; usingSentenceGrammar = true; - istringstream in( grammar_str ); - TextGrammar* sent_grammar = new TextGrammar( &in ); - sent_grammar->SetMaxSpan( max_span_limit ); - sent_grammar->SetGrammarName( "__psg" ); - grammars.erase ( remove_if(grammars.begin(), grammars.end(), NameEquals("__psg")), grammars.end() ); - grammars.push_back( GrammarPtr(sent_grammar) ); + istringstream in(grammar_str); + TextGrammar* sent_grammar = new TextGrammar(&in); + sent_grammar->SetMaxSpan(max_span_limit); + sent_grammar->SetGrammarName("__psg"); + grammars.erase(remove_if(grammars.begin(), grammars.end(), NameEquals("__psg")), grammars.end()); + grammars.push_back(GrammarPtr(sent_grammar)); } bool Translate(const string& input, diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index b662cd26..8b1fc953 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -518,7 +518,7 @@ main(int argc, char** argv) } unsigned nonz = 0; - if (!quiet || hstreaming) nonz = (unsigned)lambdas.size_nonzero(); + if (!quiet || hstreaming) nonz = (unsigned)lambdas.num_nonzero(); if (!quiet) { cerr << _p5 << _p << "WEIGHTS" << endl; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index e43d6b34..f87ee9cf 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -5,7 +5,7 @@ decoder_config=test/example/cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=20 # stop epoch after 20 inputs +stop_after=10 # stop epoch after 20 inputs # interesting stuff epochs=3 # run over input 3 times diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index ecabe75d..abae1e3b 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -66,7 +66,7 @@ my $CCONFIG = { 'QSubMemFlag' => ' ', 'JobControl' => 'fork', 'DefaultJobs' => 2, - } + }, }; our $senvironment_name = 'LOCAL'; diff --git a/klm/compile.sh b/klm/compile.sh old mode 100755 new mode 100644 index 8ca89da4..56f2e9b2 --- a/klm/compile.sh +++ b/klm/compile.sh @@ -3,6 +3,8 @@ #If your code uses ICU, edit util/string_piece.hh and uncomment #define USE_ICU #I use zlib by default. If you don't want to depend on zlib, remove #define USE_ZLIB from util/file_piece.hh +#don't need to use if compiling with moses Makefiles already + set -e for i in util/{bit_packing,ersatz_progress,exception,file_piece,murmur_hash,file,mmap} lm/{bhiksha,binary_format,config,lm_exception,model,quantize,read_arpa,search_hashed,search_trie,trie,trie_sort,virtual_interface,vocab}; do diff --git a/mira/kbest_mira.cc b/mira/kbest_mira.cc index 60c9ac2b..9d83c94a 100644 --- a/mira/kbest_mira.cc +++ b/mira/kbest_mira.cc @@ -52,7 +52,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") - //("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") + ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") ("sample_forest,f", "Instead of a k-best list, sample k hypotheses from the decoder's forest") ("sample_forest_unit_weight_vector,x", "Before sampling (must use -f option), rescale the weight vector used so it has unit length; this may improve the quality of the samples") diff --git a/utils/dict.h b/utils/dict.h index a3400868..595e4f9f 100644 --- a/utils/dict.h +++ b/utils/dict.h @@ -73,7 +73,7 @@ class Dict { inline const std::string& Convert(const WordID& id) const { if (id == 0) return b0_; assert(id <= (int)words_.size()); - //if (id < 0 || id > (int)words_.size()) return b0_; + if (id < 0 || id > (int)words_.size()) return b0_; return words_[id-1]; } diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 2c49948c..af832950 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -196,7 +196,7 @@ class FastSparseVector { else return local_size_; } - size_t size_nonzero() const { + size_t num_nonzero() const { size_t sz = 0; const_iterator it = this->begin(); for (; it != this->end(); ++it) { @@ -229,12 +229,11 @@ class FastSparseVector { return *this; } template - inline FastSparseVector& plus_eq_v_times_s(const FastSparseVector& other, const O scalar) { + inline void plus_eq_v_times_s(const FastSparseVector& other, const O scalar) { const typename FastSparseVector::const_iterator end = other.end(); for (typename FastSparseVector::const_iterator it = other.begin(); it != end; ++it) { get_or_create_bin(it->first) += it->second * scalar; } - return *this; } inline FastSparseVector& operator-=(const FastSparseVector& other) { const typename FastSparseVector::const_iterator end = other.end(); diff --git a/utils/sampler.h b/utils/sampler.h index bdbc01b0..22c873d4 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -32,7 +32,7 @@ struct RandomNumberGenerator { std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl; seed = std::time(NULL); } - //std::cerr << "Seeding random number sequence to " << seed << std::endl; + std::cerr << "Seeding random number sequence to " << seed << std::endl; return seed; } -- cgit v1.2.3