From 9fae7b75c64f4dfb6e45ff8f2ec61b597f37d225 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 3 Dec 2009 16:49:30 -0500 Subject: fix build in new layout --- vest/comb_scorer.h | 17 +++++++++++ vest/error_surface.h | 24 +++++++++++++++ vest/line_optimizer.h | 44 +++++++++++++++++++++++++++ vest/scorer.h | 51 +++++++++++++++++++++++++++++++ vest/ter.h | 18 +++++++++++ vest/viterbi_envelope.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 233 insertions(+) create mode 100644 vest/comb_scorer.h create mode 100644 vest/error_surface.h create mode 100644 vest/line_optimizer.h create mode 100644 vest/scorer.h create mode 100644 vest/ter.h create mode 100644 vest/viterbi_envelope.h (limited to 'vest') diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h new file mode 100644 index 00000000..70b1ec75 --- /dev/null +++ b/vest/comb_scorer.h @@ -0,0 +1,17 @@ +#ifndef _COMB_SCORER_ +#define _COMB_SCORER_ + +#include "scorer.h" + +class BLEUTERCombinationScorer : public SentenceScorer { + public: + BLEUTERCombinationScorer(const std::vector >& refs); + ~BLEUTERCombinationScorer(); + Score* ScoreCandidate(const std::vector& hyp) const; + static Score* ScoreFromString(const std::string& in); + private: + SentenceScorer* bleu_; + SentenceScorer* ter_; +}; + +#endif diff --git a/vest/error_surface.h b/vest/error_surface.h new file mode 100644 index 00000000..a8734f54 --- /dev/null +++ b/vest/error_surface.h @@ -0,0 +1,24 @@ +#ifndef _ERROR_SURFACE_H_ +#define _ERROR_SURFACE_H_ + +#include +#include + +#include "scorer.h" + +class Score; + +struct ErrorSegment { + double x; + Score* delta; + ErrorSegment() : x(0), delta(NULL) {} +}; + +class ErrorSurface : public std::vector { + public: + ~ErrorSurface(); + void Serialize(std::string* out) const; + void Deserialize(ScoreType type, const std::string& in); +}; + +#endif diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h new file mode 100644 index 00000000..43164360 --- /dev/null +++ b/vest/line_optimizer.h @@ -0,0 +1,44 @@ +#ifndef LINE_OPTIMIZER_H_ +#define LINE_OPTIMIZER_H_ + +#include + +#include "error_surface.h" +#include "sampler.h" + +template class SparseVector; +class Weights; + +struct LineOptimizer { + + // use MINIMIZE_SCORE for things like TER, WER + // MAXIMIZE_SCORE for things like BLEU + enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE }; + + // merge all the error surfaces together into a global + // error surface and find (the middle of) the best segment + static double LineOptimize( + const std::vector& envs, + const LineOptimizer::ScoreType type, + float* best_score, + const double epsilon = 1.0/65536.0); + + // return a random vector of length 1 where all dimensions + // not listed in dimensions will be 0. + static void RandomUnitVector(const std::vector& dimensions, + SparseVector* axis, + RandomNumberGenerator* rng); + + // generate a list of directions to optimize; the list will + // contain the orthogonal vectors corresponding to the dimensions in + // primary and then additional_random_directions directions in those + // dimensions as well. All vectors will be length 1. + static void CreateOptimizationDirections( + const std::vector& primary, + int additional_random_directions, + RandomNumberGenerator* rng, + std::vector >* dirs); + +}; + +#endif diff --git a/vest/scorer.h b/vest/scorer.h new file mode 100644 index 00000000..b0bba640 --- /dev/null +++ b/vest/scorer.h @@ -0,0 +1,51 @@ +#ifndef SCORER_H_ +#define SCORER_H_ + +#include +#include + +#include "wordid.h" + +class ViterbiEnvelope; +class ErrorSurface; + +enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER }; +ScoreType ScoreTypeFromString(const std::string& st); + +class Score { + public: + virtual ~Score(); + virtual float ComputeScore() const = 0; + virtual void ScoreDetails(std::string* details) const = 0; + virtual void PlusEquals(const Score& rhs) = 0; + virtual void Subtract(const Score& rhs, Score* res) const = 0; + virtual Score* GetZero() const = 0; + virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta + // to another score results in no score change + // under any circumstances + virtual void Encode(std::string* out) const = 0; +}; + +class SentenceScorer { + public: + virtual ~SentenceScorer(); + void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es) const; + virtual Score* ScoreCandidate(const std::vector& hyp) const = 0; + static Score* CreateScoreFromString(const ScoreType type, const std::string& in); + static SentenceScorer* CreateSentenceScorer(const ScoreType type, + const std::vector >& refs); +}; + +class DocScorer { + public: + ~DocScorer(); + DocScorer( + const ScoreType type, + const std::vector& ref_files); + int size() const { return scorers_.size(); } + const SentenceScorer* operator[](size_t i) const { return scorers_[i]; } + private: + std::vector scorers_; +}; + +#endif diff --git a/vest/ter.h b/vest/ter.h new file mode 100644 index 00000000..fe4ba36c --- /dev/null +++ b/vest/ter.h @@ -0,0 +1,18 @@ +#ifndef _TER_H_ +#define _TER_H_ + +#include "scorer.h" + +class TERScorerImpl; + +class TERScorer : public SentenceScorer { + public: + TERScorer(const std::vector >& references); + ~TERScorer(); + Score* ScoreCandidate(const std::vector& hyp) const; + static Score* ScoreFromString(const std::string& data); + private: + std::vector impl_; +}; + +#endif diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h new file mode 100644 index 00000000..39d2b537 --- /dev/null +++ b/vest/viterbi_envelope.h @@ -0,0 +1,79 @@ +#ifndef _VITERBI_ENVELOPE_H_ +#define _VITERBI_ENVELOPE_H_ + +#include +#include +#include + +#include "hg.h" +#include "sparse_vector.h" + +static const double kMinusInfinity = -std::numeric_limits::infinity(); +static const double kPlusInfinity = std::numeric_limits::infinity(); + +struct Segment { + Segment() : x(), m(), b() {} + Segment(double _m, double _b) : + x(kMinusInfinity), m(_m), b(_b) {} + Segment(double _x, double _m, double _b, const boost::shared_ptr& p1_, const boost::shared_ptr& p2_) : + x(_x), m(_m), b(_b), p1(p1_), p2(p2_) {} + Segment(double _m, double _b, TRulePtr _rule) : + x(kMinusInfinity), m(_m), b(_b), rule(_rule) {} + + double x; // x intersection with previous segment in env, or -inf if none + double m; // this line's slope + double b; // intercept with y-axis + + // we keep a pointer to the "parents" of this segment so we can reconstruct + // the Viterbi translation corresponding to this segment + boost::shared_ptr p1; + boost::shared_ptr p2; + + // only Segments created from an edge using the ViterbiEnvelopeWeightFunction + // have rules + TRulePtr rule; + + // recursively recover the Viterbi translation that will result from setting + // the weights to origin + axis * x, where x is any value from this->x up + // until the next largest x in the containing ViterbiEnvelope + void ConstructTranslation(std::vector* trans) const; +}; + +// this is the semiring value type, +// it defines constructors for 0, 1, and the operations + and * +struct ViterbiEnvelope { + // create semiring zero + ViterbiEnvelope() : is_sorted(true) {} // zero + // for debugging: + ViterbiEnvelope(const std::vector >& s) : segs(s) { Sort(); } + // create semiring 1 or 0 + explicit ViterbiEnvelope(int i); + ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr(seg)) {} + const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other); + const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other); + bool IsMultiplicativeIdentity() const { + return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->rule); } + const std::vector >& GetSortedSegs() const { + if (!is_sorted) Sort(); + return segs; + } + size_t size() const { return segs.size(); } + + private: + bool IsEdgeEnvelope() const { + return segs.size() == 1 && segs[0]->rule; } + void Sort() const; + mutable bool is_sorted; + mutable std::vector > segs; +}; +std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env); + +struct ViterbiEnvelopeWeightFunction { + ViterbiEnvelopeWeightFunction(const SparseVector& ori, + const SparseVector& dir) : origin(ori), direction(dir) {} + ViterbiEnvelope operator()(const Hypergraph::Edge& e) const; + const SparseVector origin; + const SparseVector direction; +}; + +#endif -- cgit v1.2.3