summaryrefslogtreecommitdiff
path: root/vest
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-03 16:49:30 -0500
committerChris Dyer <redpony@gmail.com>2009-12-03 16:49:30 -0500
commit9fae7b75c64f4dfb6e45ff8f2ec61b597f37d225 (patch)
tree61a187c4fa6db8527f90f3060d18357f7b60b44a /vest
parent671c21451542e2dd20e45b4033d44d8e8735f87b (diff)
fix build in new layout
Diffstat (limited to 'vest')
-rw-r--r--vest/comb_scorer.h17
-rw-r--r--vest/error_surface.h24
-rw-r--r--vest/line_optimizer.h44
-rw-r--r--vest/scorer.h51
-rw-r--r--vest/ter.h18
-rw-r--r--vest/viterbi_envelope.h79
6 files changed, 233 insertions, 0 deletions
diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h
new file mode 100644
index 00000000..70b1ec75
--- /dev/null
+++ b/vest/comb_scorer.h
@@ -0,0 +1,17 @@
+#ifndef _COMB_SCORER_
+#define _COMB_SCORER_
+
+#include "scorer.h"
+
+class BLEUTERCombinationScorer : public SentenceScorer {
+ public:
+ BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs);
+ ~BLEUTERCombinationScorer();
+ Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
+ static Score* ScoreFromString(const std::string& in);
+ private:
+ SentenceScorer* bleu_;
+ SentenceScorer* ter_;
+};
+
+#endif
diff --git a/vest/error_surface.h b/vest/error_surface.h
new file mode 100644
index 00000000..a8734f54
--- /dev/null
+++ b/vest/error_surface.h
@@ -0,0 +1,24 @@
+#ifndef _ERROR_SURFACE_H_
+#define _ERROR_SURFACE_H_
+
+#include <vector>
+#include <string>
+
+#include "scorer.h"
+
+class Score;
+
+struct ErrorSegment {
+ double x;
+ Score* delta;
+ ErrorSegment() : x(0), delta(NULL) {}
+};
+
+class ErrorSurface : public std::vector<ErrorSegment> {
+ public:
+ ~ErrorSurface();
+ void Serialize(std::string* out) const;
+ void Deserialize(ScoreType type, const std::string& in);
+};
+
+#endif
diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h
new file mode 100644
index 00000000..43164360
--- /dev/null
+++ b/vest/line_optimizer.h
@@ -0,0 +1,44 @@
+#ifndef LINE_OPTIMIZER_H_
+#define LINE_OPTIMIZER_H_
+
+#include <vector>
+
+#include "error_surface.h"
+#include "sampler.h"
+
+template <typename T> class SparseVector;
+class Weights;
+
+struct LineOptimizer {
+
+ // use MINIMIZE_SCORE for things like TER, WER
+ // MAXIMIZE_SCORE for things like BLEU
+ enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE };
+
+ // merge all the error surfaces together into a global
+ // error surface and find (the middle of) the best segment
+ static double LineOptimize(
+ const std::vector<ErrorSurface>& envs,
+ const LineOptimizer::ScoreType type,
+ float* best_score,
+ const double epsilon = 1.0/65536.0);
+
+ // return a random vector of length 1 where all dimensions
+ // not listed in dimensions will be 0.
+ static void RandomUnitVector(const std::vector<int>& dimensions,
+ SparseVector<double>* axis,
+ RandomNumberGenerator<boost::mt19937>* rng);
+
+ // generate a list of directions to optimize; the list will
+ // contain the orthogonal vectors corresponding to the dimensions in
+ // primary and then additional_random_directions directions in those
+ // dimensions as well. All vectors will be length 1.
+ static void CreateOptimizationDirections(
+ const std::vector<int>& primary,
+ int additional_random_directions,
+ RandomNumberGenerator<boost::mt19937>* rng,
+ std::vector<SparseVector<double> >* dirs);
+
+};
+
+#endif
diff --git a/vest/scorer.h b/vest/scorer.h
new file mode 100644
index 00000000..b0bba640
--- /dev/null
+++ b/vest/scorer.h
@@ -0,0 +1,51 @@
+#ifndef SCORER_H_
+#define SCORER_H_
+
+#include <vector>
+#include <string>
+
+#include "wordid.h"
+
+class ViterbiEnvelope;
+class ErrorSurface;
+
+enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER };
+ScoreType ScoreTypeFromString(const std::string& st);
+
+class Score {
+ public:
+ virtual ~Score();
+ virtual float ComputeScore() const = 0;
+ virtual void ScoreDetails(std::string* details) const = 0;
+ virtual void PlusEquals(const Score& rhs) = 0;
+ virtual void Subtract(const Score& rhs, Score* res) const = 0;
+ virtual Score* GetZero() const = 0;
+ virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
+ // to another score results in no score change
+ // under any circumstances
+ virtual void Encode(std::string* out) const = 0;
+};
+
+class SentenceScorer {
+ public:
+ virtual ~SentenceScorer();
+ void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es) const;
+ virtual Score* ScoreCandidate(const std::vector<WordID>& hyp) const = 0;
+ static Score* CreateScoreFromString(const ScoreType type, const std::string& in);
+ static SentenceScorer* CreateSentenceScorer(const ScoreType type,
+ const std::vector<std::vector<WordID> >& refs);
+};
+
+class DocScorer {
+ public:
+ ~DocScorer();
+ DocScorer(
+ const ScoreType type,
+ const std::vector<std::string>& ref_files);
+ int size() const { return scorers_.size(); }
+ const SentenceScorer* operator[](size_t i) const { return scorers_[i]; }
+ private:
+ std::vector<SentenceScorer*> scorers_;
+};
+
+#endif
diff --git a/vest/ter.h b/vest/ter.h
new file mode 100644
index 00000000..fe4ba36c
--- /dev/null
+++ b/vest/ter.h
@@ -0,0 +1,18 @@
+#ifndef _TER_H_
+#define _TER_H_
+
+#include "scorer.h"
+
+class TERScorerImpl;
+
+class TERScorer : public SentenceScorer {
+ public:
+ TERScorer(const std::vector<std::vector<WordID> >& references);
+ ~TERScorer();
+ Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
+ static Score* ScoreFromString(const std::string& data);
+ private:
+ std::vector<TERScorerImpl*> impl_;
+};
+
+#endif
diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h
new file mode 100644
index 00000000..39d2b537
--- /dev/null
+++ b/vest/viterbi_envelope.h
@@ -0,0 +1,79 @@
+#ifndef _VITERBI_ENVELOPE_H_
+#define _VITERBI_ENVELOPE_H_
+
+#include <vector>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+
+#include "hg.h"
+#include "sparse_vector.h"
+
+static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
+static const double kPlusInfinity = std::numeric_limits<double>::infinity();
+
+struct Segment {
+ Segment() : x(), m(), b() {}
+ Segment(double _m, double _b) :
+ x(kMinusInfinity), m(_m), b(_b) {}
+ Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) :
+ x(_x), m(_m), b(_b), p1(p1_), p2(p2_) {}
+ Segment(double _m, double _b, TRulePtr _rule) :
+ x(kMinusInfinity), m(_m), b(_b), rule(_rule) {}
+
+ double x; // x intersection with previous segment in env, or -inf if none
+ double m; // this line's slope
+ double b; // intercept with y-axis
+
+ // we keep a pointer to the "parents" of this segment so we can reconstruct
+ // the Viterbi translation corresponding to this segment
+ boost::shared_ptr<Segment> p1;
+ boost::shared_ptr<Segment> p2;
+
+ // only Segments created from an edge using the ViterbiEnvelopeWeightFunction
+ // have rules
+ TRulePtr rule;
+
+ // recursively recover the Viterbi translation that will result from setting
+ // the weights to origin + axis * x, where x is any value from this->x up
+ // until the next largest x in the containing ViterbiEnvelope
+ void ConstructTranslation(std::vector<WordID>* trans) const;
+};
+
+// this is the semiring value type,
+// it defines constructors for 0, 1, and the operations + and *
+struct ViterbiEnvelope {
+ // create semiring zero
+ ViterbiEnvelope() : is_sorted(true) {} // zero
+ // for debugging:
+ ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); }
+ // create semiring 1 or 0
+ explicit ViterbiEnvelope(int i);
+ ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {}
+ const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other);
+ const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other);
+ bool IsMultiplicativeIdentity() const {
+ return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->rule); }
+ const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const {
+ if (!is_sorted) Sort();
+ return segs;
+ }
+ size_t size() const { return segs.size(); }
+
+ private:
+ bool IsEdgeEnvelope() const {
+ return segs.size() == 1 && segs[0]->rule; }
+ void Sort() const;
+ mutable bool is_sorted;
+ mutable std::vector<boost::shared_ptr<Segment> > segs;
+};
+std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env);
+
+struct ViterbiEnvelopeWeightFunction {
+ ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori,
+ const SparseVector<double>& dir) : origin(ori), direction(dir) {}
+ ViterbiEnvelope operator()(const Hypergraph::Edge& e) const;
+ const SparseVector<double> origin;
+ const SparseVector<double> direction;
+};
+
+#endif