From 0a47dda0e980d1fb6222a1f548649914427555b2 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Fri, 23 Sep 2011 22:02:45 +0200
Subject: more renaming, random pair sampler uses boost rng

---
 dtrain/Makefile.am    |   2 +-
 dtrain/dtrain.cc      |  10 +--
 dtrain/hgsampler.cc   |  74 ++++++++++++++++++++++
 dtrain/hgsampler.h    |  29 +++++++++
 dtrain/ksampler.h     |   2 +-
 dtrain/pairsampling.h |  35 ++++++-----
 dtrain/sample_hg.cc   |  74 ----------------------
 dtrain/sample_hg.h    |  24 --------
 dtrain/score.cc       | 165 ++++++++++++++++++++++----------------------------
 dtrain/score.h        |  53 +++++++---------
 10 files changed, 224 insertions(+), 244 deletions(-)
 create mode 100644 dtrain/hgsampler.cc
 create mode 100644 dtrain/hgsampler.h
 delete mode 100644 dtrain/sample_hg.cc
 delete mode 100644 dtrain/sample_hg.h

(limited to 'dtrain')
diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am
index 9b5df8bf..12084a70 100644
--- a/dtrain/Makefile.am
+++ b/dtrain/Makefile.am
@@ -1,7 +1,7 @@
 # TODO I'm sure I can leave something out.
 bin_PROGRAMS = dtrain
 
-dtrain_SOURCES = dtrain.cc score.cc sample_hg.cc
+dtrain_SOURCES = dtrain.cc score.cc hgsampler.cc
 dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -lboost_filesystem -lboost_iostreams
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 01821b30..01119997 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -347,10 +347,9 @@ main( int argc, char** argv )
             cand_len = kb->sents[i].size();
         }
         NgramCounts counts_tmp = global_counts + counts;
-        // TODO as param
-        score = 0.9 * scorer( counts_tmp,
-                              global_ref_len,
-                              global_hyp_len + cand_len, N, bleu_weights );
+        score = .9*scorer( counts_tmp,
+                        global_ref_len,
+                        global_hyp_len + cand_len, N, bleu_weights );
       } else {
         // other scorers
         cand_len = kb->sents[i].size();
@@ -381,7 +380,8 @@ main( int argc, char** argv )
     if ( !noup ) {
 
       TrainingInstances pairs;
-      sample_all( kb, pairs );
+      sample_all_pairs(kb, pairs);
+      //sample_rand_pairs( kb, pairs, &rng );
        
       for ( TrainingInstances::iterator ti = pairs.begin();
             ti != pairs.end(); ti++ ) {
diff --git a/dtrain/hgsampler.cc b/dtrain/hgsampler.cc
new file mode 100644
index 00000000..7a00a3d3
--- /dev/null
+++ b/dtrain/hgsampler.cc
@@ -0,0 +1,74 @@
+#include "hgsampler.h"
+
+#include <queue>
+
+#include "viterbi.h"
+#include "inside_outside.h"
+
+using namespace std;
+
+struct SampledDerivationWeightFunction {
+  typedef double Weight;
+  explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {}
+  double operator()(const Hypergraph::Edge& e) const {
+    return static_cast<double>(sampled_edges[e.id_]);
+  }
+  const vector<bool>& sampled_edges;
+};
+
+void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
+                                          unsigned n,
+                                          MT19937* rng,
+                                          vector<Hypothesis>* hypos) {
+  hypos->clear();
+  hypos->resize(n);
+
+  // compute inside probabilities
+  vector<prob_t> node_probs;
+  Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
+
+  vector<bool> sampled_edges(hg.edges_.size());
+  queue<unsigned> q;
+  SampleSet<prob_t> ss;
+  for (unsigned i = 0; i < n; ++i) {
+    fill(sampled_edges.begin(), sampled_edges.end(), false);
+    // sample derivation top down
+    assert(q.empty());
+    Hypothesis& hyp = (*hypos)[i];
+    SparseVector<double>& deriv_features = hyp.fmap;
+    q.push(hg.nodes_.size() - 1);
+    prob_t& model_score = hyp.model_score;
+    model_score = prob_t::One();
+    while(!q.empty()) {
+      unsigned cur_node_id = q.front();
+      q.pop();
+      const Hypergraph::Node& node = hg.nodes_[cur_node_id];
+      const unsigned num_in_edges = node.in_edges_.size();
+      unsigned sampled_edge_idx = 0;
+      if (num_in_edges == 1) {
+        sampled_edge_idx = node.in_edges_[0];
+      } else {
+        assert(num_in_edges > 1);
+        ss.clear();
+        for (unsigned j = 0; j < num_in_edges; ++j) {
+          const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
+          prob_t p = edge.edge_prob_;   // edge weight
+          for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
+            p *= node_probs[edge.tail_nodes_[k]];  // tail node inside weight
+          ss.add(p);
+        }
+        sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
+      }
+      sampled_edges[sampled_edge_idx] = true;
+      const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
+      deriv_features += sampled_edge.feature_values_;
+      model_score *= sampled_edge.edge_prob_;
+      //sampled_deriv->push_back(sampled_edge_idx);
+      for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
+        q.push(sampled_edge.tail_nodes_[j]);
+      }
+    }
+    Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
+  }
+}
+
diff --git a/dtrain/hgsampler.h b/dtrain/hgsampler.h
new file mode 100644
index 00000000..b840c07f
--- /dev/null
+++ b/dtrain/hgsampler.h
@@ -0,0 +1,29 @@
+#ifndef _DTRAIN_HGSAMPLER_H_
+#define _DTRAIN_HGSAMPLER_H_
+
+
+#include <vector>
+#include "sparse_vector.h"
+#include "sampler.h"
+#include "wordid.h"
+
+class Hypergraph;
+
+struct HypergraphSampler {
+
+  struct Hypothesis {
+    std::vector<WordID> words;
+    SparseVector<double> fmap;
+    prob_t model_score;
+  };
+
+  static void
+  sample_hypotheses(const Hypergraph& hg,
+                    unsigned n,
+                    MT19937* rng,
+                    std::vector<Hypothesis>* hypos);
+};
+
+
+#endif
+
diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h
index a28b69c9..914e9723 100644
--- a/dtrain/ksampler.h
+++ b/dtrain/ksampler.h
@@ -2,7 +2,7 @@
 #define _DTRAIN_KSAMPLER_H_
 
 #include "kbest.h"
-#include "sample_hg.h"
+#include "hgsampler.h"
 #include "sampler.h"
 
 namespace dtrain
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
index 502901af..9774ba4a 100644
--- a/dtrain/pairsampling.h
+++ b/dtrain/pairsampling.h
@@ -1,9 +1,8 @@
-#ifndef _DTRAIN_SAMPLE_H_
-#define _DTRAIN_SAMPLE_H_
-
+#ifndef _DTRAIN_PAIRSAMPLING_H_
+#define _DTRAIN_PAIRSAMPLING_H_
 
 #include "kbestget.h"
-
+#include "sampler.h" // cdec MT19937
 
 namespace dtrain
 {
@@ -11,19 +10,18 @@ namespace dtrain
 
 struct TPair
 {
-  SparseVector<double> first, second;
-  size_t first_rank, second_rank;
-  double first_score, second_score;
+  SparseVector<double> first,       second;
+  size_t               first_rank,  second_rank;
+  double               first_score, second_score;
 };
 
 typedef vector<TPair> TrainingInstances;
 
-
 void
-sample_all( KBestList* kb, TrainingInstances &training )
+sample_all_pairs(KBestList* kb, TrainingInstances &training)
 {
-  for ( size_t i = 0; i < kb->GetSize()-1; i++ ) {
-    for ( size_t j = i+1; j < kb->GetSize(); j++ ) {
+  for (size_t i = 0; i < kb->GetSize()-1; i++) {
+    for (size_t j = i+1; j < kb->GetSize(); j++) {
       TPair p;
       p.first = kb->feats[i];
       p.second = kb->feats[j];
@@ -31,18 +29,18 @@ sample_all( KBestList* kb, TrainingInstances &training )
       p.second_rank = j;
       p.first_score = kb->scores[i];
       p.second_score = kb->scores[j];
-      training.push_back( p );
+      training.push_back(p);
     }
   }
 }
 
 void
-sample_rand( KBestList* kb, TrainingInstances &training )
+sample_rand_pairs(KBestList* kb, TrainingInstances &training, MT19937* prng)
 {
-  srand( time(NULL) );
-  for ( size_t i = 0; i < kb->GetSize()-1; i++ ) {
-    for ( size_t j = i+1; j < kb->GetSize(); j++ ) {
-      if ( rand() % 2 ) {
+  srand(time(NULL));
+  for (size_t i = 0; i < kb->GetSize()-1; i++) {
+    for (size_t j = i+1; j < kb->GetSize(); j++) {
+      if (prng->next() < .5) {
         TPair p;
         p.first = kb->feats[i];
         p.second = kb->feats[j];
@@ -50,10 +48,11 @@ sample_rand( KBestList* kb, TrainingInstances &training )
         p.second_rank = j;
         p.first_score = kb->scores[i];
         p.second_score = kb->scores[j];
-        training.push_back( p );
+        training.push_back(p);
       }
     }
   }
+  cout << training.size() << " sampled" << endl;
 }
 
 
diff --git a/dtrain/sample_hg.cc b/dtrain/sample_hg.cc
deleted file mode 100644
index 33872fb8..00000000
--- a/dtrain/sample_hg.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "sample_hg.h"
-
-#include <queue>
-
-#include "viterbi.h"
-#include "inside_outside.h"
-
-using namespace std;
-
-struct SampledDerivationWeightFunction {
-  typedef double Weight;
-  explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {}
-  double operator()(const Hypergraph::Edge& e) const {
-    return static_cast<double>(sampled_edges[e.id_]);
-  }
-  const vector<bool>& sampled_edges;
-};
-
-void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
-                                          unsigned n,
-                                          MT19937* rng,
-                                          vector<Hypothesis>* hypos) {
-  hypos->clear();
-  hypos->resize(n);
-
-  // compute inside probabilities
-  vector<prob_t> node_probs;
-  Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
-
-  vector<bool> sampled_edges(hg.edges_.size());
-  queue<unsigned> q;
-  SampleSet<prob_t> ss;
-  for (unsigned i = 0; i < n; ++i) {
-    fill(sampled_edges.begin(), sampled_edges.end(), false);
-    // sample derivation top down
-    assert(q.empty());
-    Hypothesis& hyp = (*hypos)[i];
-    SparseVector<double>& deriv_features = hyp.fmap;
-    q.push(hg.nodes_.size() - 1);
-    prob_t& model_score = hyp.model_score;
-    model_score = prob_t::One();
-    while(!q.empty()) {
-      unsigned cur_node_id = q.front();
-      q.pop();
-      const Hypergraph::Node& node = hg.nodes_[cur_node_id];
-      const unsigned num_in_edges = node.in_edges_.size();
-      unsigned sampled_edge_idx = 0;
-      if (num_in_edges == 1) {
-        sampled_edge_idx = node.in_edges_[0];
-      } else {
-        assert(num_in_edges > 1);
-        ss.clear();
-        for (unsigned j = 0; j < num_in_edges; ++j) {
-          const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
-          prob_t p = edge.edge_prob_;   // edge weight
-          for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
-            p *= node_probs[edge.tail_nodes_[k]];  // tail node inside weight
-          ss.add(p);
-        }
-        sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
-      }
-      sampled_edges[sampled_edge_idx] = true;
-      const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
-      deriv_features += sampled_edge.feature_values_;
-      model_score *= sampled_edge.edge_prob_;
-      //sampled_deriv->push_back(sampled_edge_idx);
-      for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
-        q.push(sampled_edge.tail_nodes_[j]);
-      }
-    }
-    Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
-  }
-}
-
diff --git a/dtrain/sample_hg.h b/dtrain/sample_hg.h
deleted file mode 100644
index 932fd369..00000000
--- a/dtrain/sample_hg.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _SAMPLE_HG_H_
-#define _SAMPLE_HG_H_
-
-#include <vector>
-#include "sparse_vector.h"
-#include "sampler.h"
-#include "wordid.h"
-
-class Hypergraph;
-
-struct HypergraphSampler {
-  struct Hypothesis {
-    std::vector<WordID> words;
-    SparseVector<double> fmap;
-    prob_t model_score;
-  };
-
-  static void sample_hypotheses(const Hypergraph& hg,
-                                unsigned n,
-                                MT19937* rng,
-                                std::vector<Hypothesis>* hypos);
-};
-
-#endif
diff --git a/dtrain/score.cc b/dtrain/score.cc
index 1e98c11d..d08e87f3 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -1,166 +1,149 @@
 #include "score.h"
 
-
 namespace dtrain
 {
 
 
-/******************************************************************************
- * NGRAMS
- *
- *
- * make_ngrams
- *
- */
-typedef map<vector<WordID>, size_t> Ngrams;
 Ngrams
-make_ngrams( vector<WordID>& s, size_t N )
+make_ngrams(vector<WordID>& s, size_t N)
 {
   Ngrams ngrams;
   vector<WordID> ng;
-  for ( size_t i = 0; i < s.size(); i++ ) {
+  for (size_t i = 0; i < s.size(); i++) {
     ng.clear();
-    for ( size_t j = i; j < min( i+N, s.size() ); j++ ) {
-      ng.push_back( s[j] );
+    for (size_t j = i; j < min(i+N, s.size()); j++) {
+      ng.push_back(s[j]);
       ngrams[ng]++;
     }
   }
   return ngrams;
 }
 
-
-/*
- * ngram_matches
- *
- */
 NgramCounts
-make_ngram_counts( vector<WordID> hyp, vector<WordID> ref, size_t N )
+make_ngram_counts(vector<WordID> hyp, vector<WordID> ref, size_t N)
 {
-  Ngrams hyp_ngrams = make_ngrams( hyp, N );
-  Ngrams ref_ngrams = make_ngrams( ref, N );
-  NgramCounts counts( N );
+  Ngrams hyp_ngrams = make_ngrams(hyp, N);
+  Ngrams ref_ngrams = make_ngrams(ref, N);
+  NgramCounts counts(N);
   Ngrams::iterator it;
   Ngrams::iterator ti;
-  for ( it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++ ) {
-    ti = ref_ngrams.find( it->first );
-    if ( ti != ref_ngrams.end() ) {
-      counts.add( it->second, ti->second, it->first.size() - 1 );
+  for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) {
+    ti = ref_ngrams.find(it->first);
+    if (ti != ref_ngrams.end()) {
+      counts.add(it->second, ti->second, it->first.size() - 1);
     } else {
-      counts.add( it->second, 0, it->first.size() - 1 );
+      counts.add(it->second, 0, it->first.size() - 1);
     }
   }
   return counts;
 }
 
-
-/******************************************************************************
- * SCORERS
- *
+/*
+ * bleu
  *
- * brevity_penaly
+ * as in "BLEU: a Method for Automatic Evaluation
+ *        of Machine Translation"
+ * (Papineni et al. '02)
  *
+ * NOTE: 0 if one n in {1..N} has 0 count
  */
 double
-brevity_penaly( const size_t hyp_len, const size_t ref_len )
+brevity_penaly(const size_t hyp_len, const size_t ref_len)
 {
-  if ( hyp_len > ref_len ) return 1;
-  return exp( 1 - (double)ref_len/(double)hyp_len );
+  if (hyp_len > ref_len) return 1;
+  return exp(1 - (double)ref_len/(double)hyp_len);
 }
-
-
-/*
- * bleu
- * as in "BLEU: a Method for Automatic Evaluation of Machine Translation" (Papineni et al. '02)
- * page TODO
- * 0 if for N one of the counts = 0
- */
 double
-bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-      size_t N, vector<float> weights  )
+bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
+      size_t N, vector<float> weights )
 {
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
-  if ( ref_len < N ) N = ref_len;
+  if (hyp_len == 0 || ref_len == 0) return 0;
+  if (ref_len < N) N = ref_len;
   float N_ = (float)N;
-  if ( weights.empty() )
+  if (weights.empty())
   {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
+    for (size_t i = 0; i < N; i++) weights.push_back(1/N_);
   }
   double sum = 0;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( counts.clipped[i] == 0 || counts.sum[i] == 0 ) return 0;
-    sum += weights[i] * log( (double)counts.clipped[i] / (double)counts.sum[i] );
+  for (size_t i = 0; i < N; i++) {
+    if (counts.clipped[i] == 0 || counts.sum[i] == 0) return 0;
+    sum += weights[i] * log((double)counts.clipped[i] / (double)counts.sum[i]);
   }
-  return brevity_penaly( hyp_len, ref_len ) * exp( sum );
+  return brevity_penaly(hyp_len, ref_len) * exp(sum);
 }
 
-
 /*
- * stupid_bleu
- * as in "ORANGE: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation (Lin & Och '04)
- * page TODO
- * 0 iff no 1gram match
+ * 'stupid' bleu
+ *
+ * as in "ORANGE: a Method for Evaluating
+ *        Automatic Evaluation Metrics
+ *        for Machine Translation"
+ * (Lin & Och '04)
+ *
+ * NOTE: 0 iff no 1gram match
  */
 double
-stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-             size_t N, vector<float> weights  )
+stupid_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
+             size_t N, vector<float> weights )
 {
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
-  if ( ref_len < N ) N = ref_len;
+  if (hyp_len == 0 || ref_len == 0) return 0;
+  if (ref_len < N) N = ref_len;
   float N_ = (float)N;
-  if ( weights.empty() )
+  if (weights.empty())
   {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
+    for (size_t i = 0; i < N; i++) weights.push_back(1/N_);
   }
   double sum = 0;
   float add = 0;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( i == 1 ) add = 1;
-    sum += weights[i] * log( ((double)counts.clipped[i] + add) / ((double)counts.sum[i] + add) );
+  for (size_t i = 0; i < N; i++) {
+    if (i == 1) add = 1;
+    sum += weights[i] * log(((double)counts.clipped[i] + add) / ((double)counts.sum[i] + add));
   }
-  return brevity_penaly( hyp_len, ref_len ) * exp( sum );
+  return brevity_penaly(hyp_len, ref_len) * exp(sum);
 }
 
-
 /*
- * smooth_bleu
- * as in "An End-to-End Discriminative Approach to Machine Translation" (Liang et al. '06)
- * page TODO
- * max. 0.9375
+ * smooth bleu
+ *
+ * as in "An End-to-End Discriminative Approach
+ *        to Machine Translation"
+ * (Liang et al. '06)
+ *
+ * NOTE: max is 0.9375
  */
 double
-smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-             const size_t N, vector<float> weights  )
+smooth_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
+            const size_t N, vector<float> weights )
 {
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
+  if (hyp_len == 0 || ref_len == 0) return 0;
   float N_ = (float)N;
-  if ( weights.empty() )
+  if (weights.empty())
   {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
+    for (size_t i = 0; i < N; i++) weights.push_back(1/N_);
   }
   double sum = 0;
   float j = 1;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( counts.clipped[i] == 0 || counts.sum[i] == 0) continue;
-    sum += exp((weights[i] * log((double)counts.clipped[i]/(double)counts.sum[i]))) / pow( 2, N_-j+1 );
+  for (size_t i = 0; i < N; i++) {
+    if (counts.clipped[i] == 0 || counts.sum[i] == 0) continue;
+    sum += exp((weights[i] * log((double)counts.clipped[i]/(double)counts.sum[i]))) / pow(2, N_-j+1);
     j++;
   }
-  return brevity_penaly( hyp_len, ref_len ) * sum;
+  return brevity_penaly(hyp_len, ref_len) * sum;
 }
 
-
 /*
- * approx_bleu
- * as in "Online Large-Margin Training for Statistical Machine Translation" (Watanabe et al. '07)
- * CHIANG, RESNIK, synt struct features
- * .9*
- * page TODO
+ * approx. bleu
  *
+ * as in "Online Large-Margin Training of Syntactic
+ *        and Structural Translation Features"
+ * (Chiang et al. '08)
  */
 double
-approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-     const size_t N, vector<float> weights )
+approx_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
+            const size_t N, vector<float> weights)
 {
-  return bleu( counts, hyp_len, ref_len, N, weights );
+  return brevity_penaly(hyp_len, ref_len) 
+         * 0.9 * bleu(counts, hyp_len, ref_len, N, weights);
 }
 
 
diff --git a/dtrain/score.h b/dtrain/score.h
index e88387c5..0afb6237 100644
--- a/dtrain/score.h
+++ b/dtrain/score.h
@@ -1,29 +1,23 @@
 #ifndef _DTRAIN_SCORE_H_
 #define _DTRAIN_SCORE_H_
 
-
 #include <iostream>
 #include <vector>
 #include <map>
 #include <cassert>
 #include <cmath>
 
-#include "wordid.h"
+#include "wordid.h" // cdec
 
 using namespace std;
 
-
 namespace dtrain
 {
 
 
-/*
- * NgramCounts
- *
- */
 struct NgramCounts
 {
-  NgramCounts( const size_t N ) : N_( N ) {
+  NgramCounts(const size_t N) : N_(N) {
     reset();
   } 
   size_t N_;
@@ -31,17 +25,17 @@ struct NgramCounts
   map<size_t, size_t> sum;
 
   void
-  operator+=( const NgramCounts& rhs )
+  operator+=(const NgramCounts& rhs)
   {
-    assert( N_ == rhs.N_ );
-    for ( size_t i = 0; i < N_; i++ ) {
+    assert(N_ == rhs.N_);
+    for (size_t i = 0; i < N_; i++) {
       this->clipped[i] += rhs.clipped.find(i)->second;
       this->sum[i] += rhs.sum.find(i)->second;
     }
   }
 
   const NgramCounts
-  operator+( const NgramCounts &other ) const
+  operator+(const NgramCounts &other) const
   {
     NgramCounts result = *this;
     result += other;
@@ -49,10 +43,10 @@ struct NgramCounts
   }
 
   void
-  add( size_t count, size_t ref_count, size_t i )
+  add(size_t count, size_t ref_count, size_t i)
   {
-    assert( i < N_ );
-    if ( count > ref_count ) {
+    assert(i < N_);
+    if (count > ref_count) {
       clipped[i] += ref_count;
       sum[i] += count;
     } else {
@@ -65,7 +59,7 @@ struct NgramCounts
   reset()
   {
     size_t i;
-    for ( i = 0; i < N_; i++ ) {
+    for (i = 0; i < N_; i++) {
       clipped[i] = 0;
       sum[i] = 0;
     }
@@ -74,27 +68,26 @@ struct NgramCounts
   void
   print()
   {
-    for ( size_t i = 0; i < N_; i++ ) {
+    for (size_t i = 0; i < N_; i++) {
       cout << i+1 << "grams (clipped):\t" << clipped[i] << endl;
       cout << i+1 << "grams:\t\t\t" << sum[i] << endl;
     }
   }
 };
 
-
 typedef map<vector<WordID>, size_t> Ngrams;
-Ngrams make_ngrams( vector<WordID>& s, size_t N );
-NgramCounts make_ngram_counts( vector<WordID> hyp, vector<WordID> ref, size_t N );
-
-double brevity_penaly( const size_t hyp_len, const size_t ref_len );
-double bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
-             vector<float> weights = vector<float>() );
-double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, size_t N,
-                    vector<float> weights = vector<float>() );
-double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
-                    vector<float> weights = vector<float>() );
-double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
-                    vector<float> weights = vector<float>() );
+Ngrams make_ngrams(vector<WordID>& s, size_t N);
+NgramCounts make_ngram_counts(vector<WordID> hyp, vector<WordID> ref, size_t N);
+
+double brevity_penaly(const size_t hyp_len, const size_t ref_len);
+double bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+             vector<float> weights = vector<float>());
+double stupid_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len, size_t N,
+                    vector<float> weights = vector<float>());
+double smooth_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+                    vector<float> weights = vector<float>());
+double approx_bleu(NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+                    vector<float> weights = vector<float>());
 
 
 } // namespace
-- 
cgit v1.2.3