1 files changed, 0 insertions, 330 deletions
diff --git a/dtrain/dcommon.cc b/dtrain/dcommon.cc
deleted file mode 100644
index 6657bed6..00000000
--- a/dtrain/dcommon.cc
+++ /dev/null
@@ -1,330 +0,0 @@
-#include "dcommon.h"
-
-
-
-
-/******************************************************************************
- * NGRAMS
- *
- *
- * make_ngrams
- *
- */
-typedef map<vector<WordID>, size_t> Ngrams;
-Ngrams
-make_ngrams( vector<WordID>& s, size_t N )
-{
-  Ngrams ngrams;
-  vector<WordID> ng;
-  for ( size_t i = 0; i < s.size(); i++ ) {
-    ng.clear();
-    for ( size_t j = i; j < min( i+N, s.size() ); j++ ) {
-      ng.push_back( s[j] );
-      ngrams[ng]++;
-    }
-  }
-  return ngrams;
-}
-
-
-/*
- * ngram_matches
- *
- */
-NgramCounts
-make_ngram_counts( vector<WordID> hyp, vector<WordID> ref, size_t N )
-{
-  Ngrams hyp_ngrams = make_ngrams( hyp, N );
-  Ngrams ref_ngrams = make_ngrams( ref, N );
-  NgramCounts counts( N );
-  Ngrams::iterator it;
-  Ngrams::iterator ti;
-  for ( it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++ ) {
-    ti = ref_ngrams.find( it->first );
-    if ( ti != ref_ngrams.end() ) {
-      counts.add( it->second, ti->second, it->first.size() - 1 );
-    } else {
-      counts.add( it->second, 0, it->first.size() - 1 );
-    }
-  }
-  return counts;
-}
-
-
-
-
-/******************************************************************************
- * SCORES
- *
- *
- * brevity_penaly
- *
- */
-double
-brevity_penaly( const size_t hyp_len, const size_t ref_len )
-{
-  if ( hyp_len > ref_len ) return 1;
-  return exp( 1 - (double)ref_len/(double)hyp_len );
-}
-
-
-/*
- * bleu
- * as in "BLEU: a Method for Automatic Evaluation of Machine Translation" (Papineni et al. '02)
- * page TODO
- * 0 if for N one of the counts = 0
- */
-double
-bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-      size_t N, vector<float> weights  )
-{
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
-  if ( ref_len < N ) N = ref_len;
-  float N_ = (float)N;
-  if ( weights.empty() )
-  {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
-  }
-  double sum = 0;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( counts.clipped[i] == 0 || counts.sum[i] == 0 ) return 0;
-    sum += weights[i] * log( (double)counts.clipped[i] / (double)counts.sum[i] );
-  }
-  return brevity_penaly( hyp_len, ref_len ) * exp( sum );
-}
-
-
-/*
- * stupid_bleu
- * as in "ORANGE: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation (Lin & Och '04)
- * page TODO
- * 0 iff no 1gram match
- */
-double
-stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-             size_t N, vector<float> weights  )
-{
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
-  if ( ref_len < N ) N = ref_len;
-  float N_ = (float)N;
-  if ( weights.empty() )
-  {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
-  }
-  double sum = 0;
-  float add = 0;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( i == 1 ) add = 1;
-    sum += weights[i] * log( ((double)counts.clipped[i] + add) / ((double)counts.sum[i] + add) );
-  }
-  return brevity_penaly( hyp_len, ref_len ) * exp( sum );
-}
-
-
-/*
- * smooth_bleu
- * as in "An End-to-End Discriminative Approach to Machine Translation" (Liang et al. '06)
- * page TODO
- * max. 0.9375
- */
-double
-smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-             const size_t N, vector<float> weights  )
-{
-  if ( hyp_len == 0 || ref_len == 0 ) return 0;
-  float N_ = (float)N;
-  if ( weights.empty() )
-  {
-    for ( size_t i = 0; i < N; i++ ) weights.push_back( 1/N_ );
-  }
-  double sum = 0;
-  float j = 1;
-  for ( size_t i = 0; i < N; i++ ) {
-    if ( counts.clipped[i] == 0 || counts.sum[i] == 0) continue;
-    sum += exp((weights[i] * log((double)counts.clipped[i]/(double)counts.sum[i]))) / pow( 2, N_-j+1 );
-    j++;
-  }
-  return brevity_penaly( hyp_len, ref_len ) * sum;
-}
-
-
-/*
- * approx_bleu
- * as in "Online Large-Margin Training for Statistical Machine Translation" (Watanabe et al. '07)
- * page TODO
- *
- */
-double
-approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len,
-     const size_t N, vector<float> weights )
-{
-  return bleu( counts, hyp_len, ref_len, N, weights );
-}
-
-
-
-
-/******************************************************************************
- * UTILS
- *
- *
- * register_and_convert
- *
- */
-void
-register_and_convert(const vector<string>& strs, vector<WordID>& ids)
-{
-  vector<string>::const_iterator it;
-  for ( it = strs.begin(); it < strs.end(); it++ ) {
-    ids.push_back( TD::Convert( *it ) );
-  }
-}
-
-
-/*
- * approx_equal
- *
- */
-double
-approx_equal( double x, double y )
-{
-  const double EPSILON = 1E-5;
-  if ( x == 0 ) return fabs( y ) <= EPSILON;
-  if ( y == 0 ) return fabs( x ) <= EPSILON;
-  return fabs( x - y ) / max( fabs(x), fabs(y) ) <= EPSILON;
-}
-
-
-/*
- * print_FD
- *
- */
-void
-print_FD()
-{
-  for ( size_t i = 0; i < FD::NumFeats(); i++ ) cout << FD::Convert(i)<< endl;
-}
-
-
-
-
-/******************************************************************************
- * TESTS
- *
- *
- * test_ngrams
- *
- */
-void
-test_ngrams()
-{
-  cout << "Testing ngrams..." << endl << endl;
-  size_t N = 5;
-  cout << "N = " << N << endl;
-  vector<int> a; // hyp
-  vector<int> b; // ref
-  cout << "a ";
-  for (size_t i = 1; i <= 8; i++) {
-    cout << i << " ";
-    a.push_back(i);
-  }
-  cout << endl << "b ";
-  for (size_t i = 1; i <= 4; i++) {
-    cout << i << " ";
-    b.push_back(i);
-  }
-  cout << endl << endl;
-  NgramCounts c = make_ngram_counts( a, b, N );
-  assert( c.clipped[N-1] == 0 );
-  assert( c.sum[N-1] == 4 );
-  c.print();
-  c += c;
-  cout << endl;
-  c.print();
-  cout << endl;
-}
-
-
-/*
- * test_metrics
- *
- */
-void
-test_metrics()
-{
-  cout << "Testing metrics..." << endl << endl;
-  using namespace boost::assign;
-  vector<string> a, b;
-  vector<double> expect_vanilla, expect_smooth, expect_stupid;
-  a +=              "a a a a", "a a a a", "a",   "a", "b",        "a a a a", "a a",  "a a a", "a b a"; // hyp
-  b +=              "b b b b", "a a a a", "a",   "b", "b b b b",  "a",       "a a",  "a a a", "a b b"; // ref
-  expect_vanilla += 0,         1,         1,      0,  0,          .25,       1,      1,       0;
-  expect_smooth  += 0,          .9375,     .0625, 0,   .00311169, .0441942,   .1875,  .4375,   .161587;
-  expect_stupid  += 0,         1,         1,      0,   .0497871,  .25,       1,      1,        .605707;
-  vector<string> aa, bb;
-  vector<WordID> aai, bbi;
-  double vanilla, smooth, stupid;
-  size_t N = 4;
-  cout << "N = " << N << endl << endl;
-  for ( size_t i = 0; i < a.size(); i++ ) {
-    cout << " hyp: " << a[i] << endl;
-    cout << " ref: " << b[i] << endl;
-    aa.clear(); bb.clear(); aai.clear(); bbi.clear();
-    boost::split( aa, a[i], boost::is_any_of(" ") );
-    boost::split( bb, b[i], boost::is_any_of(" ") );
-    register_and_convert( aa, aai );
-    register_and_convert( bb, bbi );
-    NgramCounts counts = make_ngram_counts( aai, bbi, N );
-    vanilla =        bleu( counts, aa.size(), bb.size(), N);
-    smooth  = smooth_bleu( counts, aa.size(), bb.size(), N);
-    stupid  = stupid_bleu( counts, aa.size(), bb.size(), N);
-    assert( approx_equal(vanilla, expect_vanilla[i]) );
-    assert( approx_equal(smooth, expect_smooth[i]) );
-    assert( approx_equal(stupid, expect_stupid[i]) );
-    cout << setw(14) << "bleu = "      << vanilla << endl;
-    cout << setw(14) << "smooth bleu = " << smooth << endl;
-    cout << setw(14) << "stupid bleu = " << stupid << endl << endl;
-  }
-  cout << endl;
-}
-
-
-/*
- * test_SetWeights
- *
- */
-void
-test_SetWeights()
-{
-  cout << "Testing Weights::SetWeight..." << endl << endl;
-  Weights weights;
-  SparseVector<double> lambdas;
-  weights.InitSparseVector( &lambdas );
-  weights.SetWeight( &lambdas, "test", 0 );
-  weights.SetWeight( &lambdas, "test1", 1 );
-  WordID fid = FD::Convert( "test2" );
-  weights.SetWeight( &lambdas, fid, 2 );
-  string fn = "weights-test";
-  cout << "FD::NumFeats() " << FD::NumFeats() << endl;
-  assert( FD::NumFeats() == 4 );
-  weights.WriteToFile( fn, true );
-  cout << endl;
-}
-
-
-/*
- * run_tests
- *
- */
-void
-run_tests()
-{
-  cout << endl;
-  test_ngrams();
-  cout << endl;
-  test_metrics();
-  cout << endl;
-  test_SetWeights();
-  exit(0);
-}
-