diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-03 16:49:30 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-03 16:49:30 -0500 |
commit | 9fae7b75c64f4dfb6e45ff8f2ec61b597f37d225 (patch) | |
tree | 61a187c4fa6db8527f90f3060d18357f7b60b44a /vest | |
parent | 671c21451542e2dd20e45b4033d44d8e8735f87b (diff) |
fix build in new layout
Diffstat (limited to 'vest')
-rw-r--r-- | vest/comb_scorer.h | 17 | ||||
-rw-r--r-- | vest/error_surface.h | 24 | ||||
-rw-r--r-- | vest/line_optimizer.h | 44 | ||||
-rw-r--r-- | vest/scorer.h | 51 | ||||
-rw-r--r-- | vest/ter.h | 18 | ||||
-rw-r--r-- | vest/viterbi_envelope.h | 79 |
6 files changed, 233 insertions, 0 deletions
diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h new file mode 100644 index 00000000..70b1ec75 --- /dev/null +++ b/vest/comb_scorer.h @@ -0,0 +1,17 @@ +#ifndef _COMB_SCORER_ +#define _COMB_SCORER_ + +#include "scorer.h" + +class BLEUTERCombinationScorer : public SentenceScorer { + public: + BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs); + ~BLEUTERCombinationScorer(); + Score* ScoreCandidate(const std::vector<WordID>& hyp) const; + static Score* ScoreFromString(const std::string& in); + private: + SentenceScorer* bleu_; + SentenceScorer* ter_; +}; + +#endif diff --git a/vest/error_surface.h b/vest/error_surface.h new file mode 100644 index 00000000..a8734f54 --- /dev/null +++ b/vest/error_surface.h @@ -0,0 +1,24 @@ +#ifndef _ERROR_SURFACE_H_ +#define _ERROR_SURFACE_H_ + +#include <vector> +#include <string> + +#include "scorer.h" + +class Score; + +struct ErrorSegment { + double x; + Score* delta; + ErrorSegment() : x(0), delta(NULL) {} +}; + +class ErrorSurface : public std::vector<ErrorSegment> { + public: + ~ErrorSurface(); + void Serialize(std::string* out) const; + void Deserialize(ScoreType type, const std::string& in); +}; + +#endif diff --git a/vest/line_optimizer.h b/vest/line_optimizer.h new file mode 100644 index 00000000..43164360 --- /dev/null +++ b/vest/line_optimizer.h @@ -0,0 +1,44 @@ +#ifndef LINE_OPTIMIZER_H_ +#define LINE_OPTIMIZER_H_ + +#include <vector> + +#include "error_surface.h" +#include "sampler.h" + +template <typename T> class SparseVector; +class Weights; + +struct LineOptimizer { + + // use MINIMIZE_SCORE for things like TER, WER + // MAXIMIZE_SCORE for things like BLEU + enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE }; + + // merge all the error surfaces together into a global + // error surface and find (the middle of) the best segment + static double LineOptimize( + const std::vector<ErrorSurface>& envs, + const LineOptimizer::ScoreType type, + float* best_score, + const double epsilon = 1.0/65536.0); + + // return a random vector of length 1 where all dimensions + // not listed in dimensions will be 0. + static void RandomUnitVector(const std::vector<int>& dimensions, + SparseVector<double>* axis, + RandomNumberGenerator<boost::mt19937>* rng); + + // generate a list of directions to optimize; the list will + // contain the orthogonal vectors corresponding to the dimensions in + // primary and then additional_random_directions directions in those + // dimensions as well. All vectors will be length 1. + static void CreateOptimizationDirections( + const std::vector<int>& primary, + int additional_random_directions, + RandomNumberGenerator<boost::mt19937>* rng, + std::vector<SparseVector<double> >* dirs); + +}; + +#endif diff --git a/vest/scorer.h b/vest/scorer.h new file mode 100644 index 00000000..b0bba640 --- /dev/null +++ b/vest/scorer.h @@ -0,0 +1,51 @@ +#ifndef SCORER_H_ +#define SCORER_H_ + +#include <vector> +#include <string> + +#include "wordid.h" + +class ViterbiEnvelope; +class ErrorSurface; + +enum ScoreType { IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, BLEU_minus_TER_over_2, SER }; +ScoreType ScoreTypeFromString(const std::string& st); + +class Score { + public: + virtual ~Score(); + virtual float ComputeScore() const = 0; + virtual void ScoreDetails(std::string* details) const = 0; + virtual void PlusEquals(const Score& rhs) = 0; + virtual void Subtract(const Score& rhs, Score* res) const = 0; + virtual Score* GetZero() const = 0; + virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta + // to another score results in no score change + // under any circumstances + virtual void Encode(std::string* out) const = 0; +}; + +class SentenceScorer { + public: + virtual ~SentenceScorer(); + void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es) const; + virtual Score* ScoreCandidate(const std::vector<WordID>& hyp) const = 0; + static Score* CreateScoreFromString(const ScoreType type, const std::string& in); + static SentenceScorer* CreateSentenceScorer(const ScoreType type, + const std::vector<std::vector<WordID> >& refs); +}; + +class DocScorer { + public: + ~DocScorer(); + DocScorer( + const ScoreType type, + const std::vector<std::string>& ref_files); + int size() const { return scorers_.size(); } + const SentenceScorer* operator[](size_t i) const { return scorers_[i]; } + private: + std::vector<SentenceScorer*> scorers_; +}; + +#endif diff --git a/vest/ter.h b/vest/ter.h new file mode 100644 index 00000000..fe4ba36c --- /dev/null +++ b/vest/ter.h @@ -0,0 +1,18 @@ +#ifndef _TER_H_ +#define _TER_H_ + +#include "scorer.h" + +class TERScorerImpl; + +class TERScorer : public SentenceScorer { + public: + TERScorer(const std::vector<std::vector<WordID> >& references); + ~TERScorer(); + Score* ScoreCandidate(const std::vector<WordID>& hyp) const; + static Score* ScoreFromString(const std::string& data); + private: + std::vector<TERScorerImpl*> impl_; +}; + +#endif diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h new file mode 100644 index 00000000..39d2b537 --- /dev/null +++ b/vest/viterbi_envelope.h @@ -0,0 +1,79 @@ +#ifndef _VITERBI_ENVELOPE_H_ +#define _VITERBI_ENVELOPE_H_ + +#include <vector> +#include <iostream> +#include <boost/shared_ptr.hpp> + +#include "hg.h" +#include "sparse_vector.h" + +static const double kMinusInfinity = -std::numeric_limits<double>::infinity(); +static const double kPlusInfinity = std::numeric_limits<double>::infinity(); + +struct Segment { + Segment() : x(), m(), b() {} + Segment(double _m, double _b) : + x(kMinusInfinity), m(_m), b(_b) {} + Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) : + x(_x), m(_m), b(_b), p1(p1_), p2(p2_) {} + Segment(double _m, double _b, TRulePtr _rule) : + x(kMinusInfinity), m(_m), b(_b), rule(_rule) {} + + double x; // x intersection with previous segment in env, or -inf if none + double m; // this line's slope + double b; // intercept with y-axis + + // we keep a pointer to the "parents" of this segment so we can reconstruct + // the Viterbi translation corresponding to this segment + boost::shared_ptr<Segment> p1; + boost::shared_ptr<Segment> p2; + + // only Segments created from an edge using the ViterbiEnvelopeWeightFunction + // have rules + TRulePtr rule; + + // recursively recover the Viterbi translation that will result from setting + // the weights to origin + axis * x, where x is any value from this->x up + // until the next largest x in the containing ViterbiEnvelope + void ConstructTranslation(std::vector<WordID>* trans) const; +}; + +// this is the semiring value type, +// it defines constructors for 0, 1, and the operations + and * +struct ViterbiEnvelope { + // create semiring zero + ViterbiEnvelope() : is_sorted(true) {} // zero + // for debugging: + ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); } + // create semiring 1 or 0 + explicit ViterbiEnvelope(int i); + ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {} + const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other); + const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other); + bool IsMultiplicativeIdentity() const { + return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->rule); } + const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const { + if (!is_sorted) Sort(); + return segs; + } + size_t size() const { return segs.size(); } + + private: + bool IsEdgeEnvelope() const { + return segs.size() == 1 && segs[0]->rule; } + void Sort() const; + mutable bool is_sorted; + mutable std::vector<boost::shared_ptr<Segment> > segs; +}; +std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env); + +struct ViterbiEnvelopeWeightFunction { + ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori, + const SparseVector<double>& dir) : origin(ori), direction(dir) {} + ViterbiEnvelope operator()(const Hypergraph::Edge& e) const; + const SparseVector<double> origin; + const SparseVector<double> direction; +}; + +#endif |