summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/README.md2
-rw-r--r--dtrain/dtrain.cc8
-rw-r--r--dtrain/dtrain.h2
-rw-r--r--dtrain/score.cc62
-rw-r--r--dtrain/score.h12
5 files changed, 78 insertions, 8 deletions
diff --git a/dtrain/README.md b/dtrain/README.md
index 7aefcc55..843874fa 100644
--- a/dtrain/README.md
+++ b/dtrain/README.md
@@ -58,6 +58,8 @@ Next
+ bigger LM, feats (target side Ng., word alignments etc.)
+ merge kbest lists
+ proper eval, pairwise ranking, forced transl
++ smmoth bleu variants X
++ MMERT exp
Legal
-----
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 7f44d4cf..eea58393 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -165,8 +165,12 @@ main(int argc, char** argv)
scorer = dynamic_cast<StupidBleuScorer*>(new StupidBleuScorer);
} else if (scorer_str == "smooth_bleu") {
scorer = dynamic_cast<SmoothBleuScorer*>(new SmoothBleuScorer);
- } else if (scorer_str == "smooth_single_bleu") {
- scorer = dynamic_cast<SmoothSingleBleuScorer*>(new SmoothSingleBleuScorer);
+ } else if (scorer_str == "sum_bleu") {
+ scorer = dynamic_cast<SumBleuScorer*>(new SumBleuScorer);
+ } else if (scorer_str == "sumexp_bleu") {
+ scorer = dynamic_cast<SumExpBleuScorer*>(new SumExpBleuScorer);
+ } else if (scorer_str == "sumwhatever_bleu") {
+ scorer = dynamic_cast<SumWhateverBleuScorer*>(new SumWhateverBleuScorer);
} else if (scorer_str == "approx_bleu") {
scorer = dynamic_cast<ApproxBleuScorer*>(new ApproxBleuScorer(N, approx_bleu_d));
} else if (scorer_str == "lc_bleu") {
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index d8dc14b6..7e084a79 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -3,7 +3,7 @@
#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs
// DO NOT USE WITH SVM!
-#undef DTRAIN_LOCAL
+#define DTRAIN_LOCAL
#define DTRAIN_DOTS 10 // after how many inputs to display a '.'
#define DTRAIN_GRAMMAR_DELIM "########EOS########"
#define DTRAIN_SCALE 100000
diff --git a/dtrain/score.cc b/dtrain/score.cc
index 5bb0bcaa..4a7cac6e 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -80,7 +80,7 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* to Machine Translation"
* (Liang et al. '06)
*
- * NOTE: max is 0.9375
+ * NOTE: max is 0.9375 (with N=4)
*/
score_t
SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
@@ -108,9 +108,13 @@ SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
return brevity_penalty(hyp_len, ref_len) * sum;
}
-// variant of smooth_bleu; i-Bleu scores only single 'i'
+/*
+ * 'sum' bleu
+ *
+ * sum up Ngram precisions
+ */
score_t
-SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+SumBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -128,7 +132,57 @@ SmoothSingleBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
return brevity_penalty(hyp_len, ref_len) * sum;
}
-// TODO single variants!
+/*
+ * 'sum' (exp) bleu
+ *
+ * sum up exp(Ngram precisions)
+ */
+score_t
+SumExpBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+ const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
+ if (hyp_len == 0 || ref_len == 0) return 0.;
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+ unsigned M = N_;
+ if (ref_len < N_) M = ref_len;
+ score_t sum = 0.;
+ unsigned j = 1;
+ for (unsigned i = 0; i < M; i++) {
+ if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
+ sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2., N_-j+1);
+ j++;
+ }
+ return brevity_penalty(hyp_len, ref_len) * sum;
+}
+
+/*
+ * 'sum' (whatever) bleu
+ *
+ * sum up exp(weight * log(Ngram precisions))
+ */
+score_t
+SumWhateverBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+ const unsigned /*rank*/, const unsigned /*src_len*/)
+{
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
+ if (hyp_len == 0 || ref_len == 0) return 0.;
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
+ unsigned M = N_;
+ vector<score_t> v = w_;
+ if (ref_len < N_) {
+ M = ref_len;
+ for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M);
+ }
+ score_t sum = 0.;
+ unsigned j = 1;
+ for (unsigned i = 0; i < M; i++) {
+ if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break;
+ sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2., N_-j+1);
+ j++;
+ }
+ return brevity_penalty(hyp_len, ref_len) * sum;
+}
/*
* approx. bleu
diff --git a/dtrain/score.h b/dtrain/score.h
index c5be2829..f317c903 100644
--- a/dtrain/score.h
+++ b/dtrain/score.h
@@ -153,7 +153,17 @@ struct SmoothBleuScorer : public LocalScorer
score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
};
-struct SmoothSingleBleuScorer : public LocalScorer
+struct SumBleuScorer : public LocalScorer
+{
+ score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SumExpBleuScorer : public LocalScorer
+{
+ score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+};
+
+struct SumWhateverBleuScorer : public LocalScorer
{
score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
};