diff options
Diffstat (limited to 'vest')
-rw-r--r-- | vest/aer_scorer.cc | 21 | ||||
-rw-r--r-- | vest/aer_scorer.h | 6 | ||||
-rw-r--r-- | vest/comb_scorer.cc | 35 | ||||
-rw-r--r-- | vest/comb_scorer.h | 9 | ||||
-rw-r--r-- | vest/error_surface.cc | 5 | ||||
-rw-r--r-- | vest/error_surface.h | 4 | ||||
-rw-r--r-- | vest/line_optimizer.cc | 5 | ||||
-rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 27 | ||||
-rw-r--r-- | vest/scorer.cc | 111 | ||||
-rw-r--r-- | vest/scorer.h | 44 | ||||
-rw-r--r-- | vest/ter.cc | 31 | ||||
-rw-r--r-- | vest/ter.h | 6 |
12 files changed, 142 insertions, 162 deletions
diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc index d3f28804..253076c5 100644 --- a/vest/aer_scorer.cc +++ b/vest/aer_scorer.cc @@ -30,11 +30,11 @@ class AERScore : public Score { } - virtual Score* GetZero() const { - return new AERScore; + virtual ScoreP GetZero() const { + return ScoreP(new AERScore); } - virtual Score* GetOne() const { - return new AERScore; + virtual ScoreP GetOne() const { + return ScoreP(new AERScore); } virtual void Subtract(const Score& rhs, Score* out) const { AERScore* res = static_cast<AERScore*>(out); @@ -95,12 +95,11 @@ static inline bool Safe(const Array2D<bool>& a, int i, int j) { return false; } -Score* AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const { - Score* a = NULL; - return a; +ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const { + return ScoreP(); } -Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { +ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { boost::shared_ptr<Array2D<bool> > hyp = AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp)); @@ -121,15 +120,15 @@ Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { for (int j = 0; j < hyp->height(); ++j) if ((*hyp)(i,j)) ++p; - return new AERScore(m,p,r); + return ScoreP(new AERScore(m,p,r)); } -Score* AERScorer::ScoreFromString(const string& in) { +ScoreP AERScorer::ScoreFromString(const string& in) { AERScore* res = new AERScore; res->num_matches = *(const int *)&in[sizeof(int) * 0]; res->num_predicted = *(const int *)&in[sizeof(int) * 1]; res->num_in_ref = *(const int *)&in[sizeof(int) * 2]; - return res; + return ScoreP(res); } const std::string* AERScorer::GetSource() const { return &src_; } diff --git a/vest/aer_scorer.h b/vest/aer_scorer.h index d0df35d5..6d53d359 100644 --- a/vest/aer_scorer.h +++ b/vest/aer_scorer.h @@ -11,9 +11,9 @@ class AERScorer : public SentenceScorer { // when constructing alignment strings from a hypergraph, the source // is necessary. AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = ""); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& in); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& in); const std::string* GetSource() const; private: std::string src_; diff --git a/vest/comb_scorer.cc b/vest/comb_scorer.cc index 3dd077a6..a921aa4d 100644 --- a/vest/comb_scorer.cc +++ b/vest/comb_scorer.cc @@ -14,7 +14,7 @@ class BLEUTERCombinationScore : public Score { } void ScoreDetails(string* details) const { char buf[160]; - sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", + sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f); *details = buf; } @@ -31,23 +31,23 @@ class BLEUTERCombinationScore : public Score { - Score* GetOne() const { + ScoreP GetOne() const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu->GetOne(); res->ter = ter->GetOne(); - return res; + return ScoreP(res); } - Score* GetZero() const { + ScoreP GetZero() const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu->GetZero(); res->ter = ter->GetZero(); - return res; + return ScoreP(res); } void Subtract(const Score& rhs, Score* res) const { bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu, - static_cast<BLEUTERCombinationScore*>(res)->bleu); + static_cast<BLEUTERCombinationScore*>(res)->bleu.get()); ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter, - static_cast<BLEUTERCombinationScore*>(res)->ter); + static_cast<BLEUTERCombinationScore*>(res)->ter.get()); } void Encode(std::string* out) const { string bs, ts; @@ -62,13 +62,11 @@ class BLEUTERCombinationScore : public Score { return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity(); } private: - Score* bleu; - Score* ter; + ScoreP bleu; + ScoreP ter; }; BLEUTERCombinationScore::~BLEUTERCombinationScore() { - delete bleu; - delete ter; } BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) { @@ -77,26 +75,23 @@ BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> > } BLEUTERCombinationScorer::~BLEUTERCombinationScorer() { - delete bleu_; - delete ter_; } -Score* BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const { - Score* a = NULL; - return a; +ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const { + return ScoreP(); } -Score* BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { +ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu_->ScoreCandidate(hyp); res->ter = ter_->ScoreCandidate(hyp); - return res; + return ScoreP(res); } -Score* BLEUTERCombinationScorer::ScoreFromString(const std::string& in) { +ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) { int bss = in[0]; BLEUTERCombinationScore* r = new BLEUTERCombinationScore; r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss)); r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss)); - return r; + return ScoreP(r); } diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h index 1a4f3324..346be576 100644 --- a/vest/comb_scorer.h +++ b/vest/comb_scorer.h @@ -7,12 +7,11 @@ class BLEUTERCombinationScorer : public SentenceScorer { public: BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs); ~BLEUTERCombinationScorer(); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& in); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& in); private: - SentenceScorer* bleu_; - SentenceScorer* ter_; + ScorerP bleu_,ter_; }; #endif diff --git a/vest/error_surface.cc b/vest/error_surface.cc index 4e0af35c..754aa8de 100644 --- a/vest/error_surface.cc +++ b/vest/error_surface.cc @@ -6,9 +6,6 @@ using namespace std; ErrorSurface::~ErrorSurface() { - for (ErrorSurface::iterator i = begin(); i != end(); ++i) - //delete i->delta; - ; } void ErrorSurface::Serialize(std::string* out) const { @@ -29,7 +26,7 @@ void ErrorSurface::Serialize(std::string* out) const { } void ErrorSurface::Deserialize(ScoreType type, const std::string& in) { - istringstream is(in, ios::binary); + istringstream is(in, ios::binary); int segments; is.read((char*)&segments, sizeof(segments)); this->resize(segments); diff --git a/vest/error_surface.h b/vest/error_surface.h index a8734f54..ad728cfa 100644 --- a/vest/error_surface.h +++ b/vest/error_surface.h @@ -10,8 +10,8 @@ class Score; struct ErrorSegment { double x; - Score* delta; - ErrorSegment() : x(0), delta(NULL) {} + ScoreP delta; + ErrorSegment() : x(0), delta() {} }; class ErrorSurface : public std::vector<ErrorSegment> { diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc index e8b40237..70a00cbc 100644 --- a/vest/line_optimizer.cc +++ b/vest/line_optimizer.cc @@ -32,7 +32,8 @@ double LineOptimizer::LineOptimize( } sort(all_ints.begin(), all_ints.end(), IntervalComp()); double last_boundary = all_ints.front()->x; - Score* acc = all_ints.front()->delta->GetZero(); + ScoreP accp = all_ints.front()->delta->GetZero(); + Score *acc=accp.get(); float& cur_best_score = *best_score; cur_best_score = (type == MAXIMIZE_SCORE ? -numeric_limits<float>::max() : numeric_limits<float>::max()); @@ -72,7 +73,6 @@ double LineOptimizer::LineOptimize( pos = last_boundary + 1000.0; } } - delete acc; return pos; } @@ -92,7 +92,6 @@ void LineOptimizer::CreateOptimizationDirections( vector<SparseVector<double> >* dirs , bool include_orthogonal ) { - const int num_directions = features_to_optimize.size() + additional_random_directions; dirs->clear(); typedef SparseVector<double> Dir; vector<Dir> &out=*dirs; diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 5b513f9b..f66b5082 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -99,7 +99,6 @@ struct oracle_directions { ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") ("no_old_to_hope","don't emit the usual old -> hope oracle") ("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") - ("verbose",po::bool_switch(&verbose),"detailed logs") ; } void InitCommandLine(int argc, char *argv[], po::variables_map *conf) { @@ -133,6 +132,7 @@ struct oracle_directions { } UseConf(*conf); + verbose=oracle.verbose; return; bad_cmdline: cerr << dcmdline_options << endl; @@ -158,15 +158,6 @@ struct oracle_directions { vector<string> optimize_features; void UseConf(po::variables_map const& conf) { oracle.UseConf(conf); - // po::value<X>(&var) takes care of below: - // fear_to_hope=conf.count("fear_to_hope"); - // n_random=conf["random_directions"].as<unsigned int>(); - // forest_repository=conf["forest_repository"].as<string>(); - // dev_set_size=conf["dev_set_size"].as<unsigned int>(); - // n_oracle=conf["oracle_directions"].as<unsigned>(); - // oracle_batch=conf["oracle_batch"].as<unsigned>(); - // max_similarity=conf["max_similarity"].as<double>(); - // weights_file=conf["weights"].as<string>(); include_primary=!conf.count("no_primary"); old_to_hope=!conf.count("no_old_to_hope"); @@ -201,9 +192,11 @@ struct oracle_directions { model_scores.resize(model_hyps.size()); for (int i=0;i<model_hyps.size();++i) { //FIXME: what is scoreccand? with / without clipping? do without for consistency w/ oracle - Score *s=oracle.ds[i]->ScoreCandidate(model_hyps[i]); - model_scores[i].reset(s); - oracle.doc_score->PlusEquals(*s); + model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]); + if (verbose) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl; + if (verbose) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl; + oracle.doc_score->PlusEquals(*model_scores[i]); + if (verbose) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl; } //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating) } @@ -252,12 +245,12 @@ struct oracle_directions { Timer t("Loading forest from JSON "+forest_file(i)); HypergraphIO::ReadFromJSON(rf.stream(), &hg); } - if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails(); - o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin,&cerr); + if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl; + o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin); if (verbose) { cerr << o; - cerr<<" ; after: "<<ds().ScoreDetails() - <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails() + cerr<<"After oracle: "<<ds().ScoreDetails()<<endl + <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl; if (have_doc) cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl; diff --git a/vest/scorer.cc b/vest/scorer.cc index d8628418..5cad948d 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -1,6 +1,4 @@ #include "scorer.h" -#define DEBUG_SCORER - #include <boost/lexical_cast.hpp> #include <map> @@ -24,6 +22,7 @@ #include "stringlib.h" #include "lattice.h" + using boost::shared_ptr; using namespace std; @@ -107,8 +106,8 @@ class SERScore : public Score { correct += static_cast<const SERScore&>(delta).correct; total += static_cast<const SERScore&>(delta).total; } - Score* GetZero() const { return new SERScore; } - Score* GetOne() const { return new SERScore; } + ScoreP GetZero() const { return ScoreP(new SERScore); } + ScoreP GetOne() const { return ScoreP(new SERScore); } void Subtract(const Score& rhs, Score* res) const { SERScore* r = static_cast<SERScore*>(res); r->correct = correct - static_cast<const SERScore&>(rhs).correct; @@ -131,18 +130,17 @@ std::string SentenceScorer::verbose_desc() const { class SERScorer : public SentenceScorer { public: SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {} - Score* ScoreCCandidate(const vector<WordID>& /* hyp */) const { - Score* a = NULL; - return a; + ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const { + return ScoreP(); } - Score* ScoreCandidate(const vector<WordID>& hyp) const { + ScoreP ScoreCandidate(const vector<WordID>& hyp) const { SERScore* res = new SERScore; res->total = 1; for (int i = 0; i < refs_.size(); ++i) if (refs_[i] == hyp) res->correct = 1; - return res; + return ScoreP(res); } - static Score* ScoreFromString(const string& data) { + static ScoreP ScoreFromString(const string& data) { assert(!"Not implemented"); } private: @@ -164,8 +162,8 @@ class BLEUScore : public Score { void PlusEquals(const Score& delta); void PlusEquals(const Score& delta, const float scale); void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len); - Score* GetZero() const; - Score* GetOne() const; + ScoreP GetZero() const; + ScoreP GetOne() const; void Subtract(const Score& rhs, Score* res) const; void Encode(string* out) const; bool IsAdditiveIdentity() const { @@ -189,9 +187,9 @@ class BLEUScorerBase : public SentenceScorer { BLEUScorerBase(const vector<vector<WordID> >& references, int n ); - Score* ScoreCandidate(const vector<WordID>& hyp) const; - Score* ScoreCCandidate(const vector<WordID>& hyp) const; - static Score* ScoreFromString(const string& in); + ScoreP ScoreCandidate(const vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const string& in); virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0; private: @@ -272,7 +270,7 @@ class BLEUScorerBase : public SentenceScorer { vector<int> lengths_; }; -Score* BLEUScorerBase::ScoreFromString(const string& in) { +ScoreP BLEUScorerBase::ScoreFromString(const string& in) { istringstream is(in); int n; is >> n; @@ -283,7 +281,7 @@ Score* BLEUScorerBase::ScoreFromString(const string& in) { is >> r->correct_ngram_hit_counts[i]; is >> r->hyp_ngram_counts[i]; } - return r; + return ScoreP(r); } class IBM_BLEUScorer : public BLEUScorerBase { @@ -343,51 +341,48 @@ class Koehn_BLEUScorer : public BLEUScorerBase { float avg_; }; -SentenceScorer* SentenceScorer::CreateSentenceScorer(const ScoreType type, +ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type, const vector<vector<WordID> >& refs, - const string& src) { + const string& src) +{ + SentenceScorer *r=0; switch (type) { - case IBM_BLEU: return new IBM_BLEUScorer(refs, 4); - case IBM_BLEU_3 : return new IBM_BLEUScorer(refs,3); - case NIST_BLEU: return new NIST_BLEUScorer(refs, 4); - case Koehn_BLEU: return new Koehn_BLEUScorer(refs, 4); - case AER: return new AERScorer(refs, src); - case TER: return new TERScorer(refs); - case SER: return new SERScorer(refs); - case BLEU_minus_TER_over_2: return new BLEUTERCombinationScorer(refs); + case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break; + case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break; + case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break; + case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break; + case AER: r = new AERScorer(refs, src);break; + case TER: r = new TERScorer(refs);break; + case SER: r = new SERScorer(refs);break; + case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break; default: assert(!"Not implemented!"); } + return ScorerP(r); } -Score* SentenceScorer::GetOne() const { +ScoreP SentenceScorer::GetOne() const { Sentence s; return ScoreCCandidate(s)->GetOne(); } -Score* SentenceScorer::GetZero() const { +ScoreP SentenceScorer::GetZero() const { Sentence s; return ScoreCCandidate(s)->GetZero(); } -Score* Score::GetOne(ScoreType type) { +ScoreP Score::GetOne(ScoreType type) { std::vector<SentenceScorer::Sentence > refs; - SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); - Score *s=ps->GetOne(); - delete ps; - return s; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne(); } -Score* Score::GetZero(ScoreType type) { +ScoreP Score::GetZero(ScoreType type) { std::vector<SentenceScorer::Sentence > refs; - SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); - Score *s=ps->GetZero(); - delete ps; - return s; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero(); } -Score* SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) { +ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) { switch (type) { case IBM_BLEU: case IBM_BLEU_3: @@ -411,7 +406,7 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface vector<WordID> prev_trans; const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs(); env->resize(ienv.size()); - Score* prev_score = NULL; + ScoreP prev_score; int j = 0; for (int i = 0; i < ienv.size(); ++i) { const Segment& seg = *ienv[i]; @@ -453,26 +448,25 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface } // cerr << "Identical translation, skipping scoring\n"; } else { - Score* score = ScoreCandidate(trans); + ScoreP score = ScoreCandidate(trans); // cerr << "score= " << score->ComputeScore() << "\n"; - Score* cur_delta = score->GetZero(); + ScoreP cur_delta_p = score->GetZero(); + Score* cur_delta = cur_delta_p.get(); // just record the score diffs if (!prev_score) prev_score = score->GetZero(); score->Subtract(*prev_score, cur_delta); - delete prev_score; prev_trans.swap(trans); prev_score = score; if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) { ErrorSegment& out = (*env)[j]; - out.delta = cur_delta; + out.delta = cur_delta_p; out.x = seg.x; - ++j; + ++j; } } } - delete prev_score; // cerr << " In segments: " << ienv.size() << endl; // cerr << "Out segments: " << j << endl; assert(j > 0); @@ -588,12 +582,12 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or } -Score* BLEUScore::GetZero() const { - return new BLEUScore(hyp_ngram_counts.size()); +ScoreP BLEUScore::GetZero() const { + return ScoreP(new BLEUScore(hyp_ngram_counts.size())); } -Score* BLEUScore::GetOne() const { - return new BLEUScore(hyp_ngram_counts.size(),1); +ScoreP BLEUScore::GetOne() const { + return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1)); } @@ -615,17 +609,17 @@ BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references, } } -Score* BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const { +ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const { BLEUScore* bs = new BLEUScore(n_); for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) i->second.second = 0; ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true); bs->ref_len = ComputeRefLength(hyp); bs->hyp_len = hyp.size(); - return bs; + return ScoreP(bs); } -Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { +ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { BLEUScore* bs = new BLEUScore(n_); for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) i->second.second = 0; @@ -633,7 +627,7 @@ Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip); bs->ref_len = ComputeRefLength(hyp); bs->hyp_len = hyp.size(); - return bs; + return ScoreP(bs); } @@ -643,7 +637,7 @@ DocScorer::~DocScorer() { void DocScorer::Init( const ScoreType type, const vector<string>& ref_files, - const string& src_file) { + const string& src_file, bool verbose) { scorers_.clear(); // TODO stop using valarray, start using ReadFile cerr << "Loading references (" << ref_files.size() << " files)\n"; @@ -686,9 +680,8 @@ void DocScorer::Init( ProcessAndStripSGML(&src_line, &dummy); } scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line))); -#ifdef DEBUG_SCORER - cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl; -#endif + if (verbose) + cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl; ++line; } } diff --git a/vest/scorer.h b/vest/scorer.h index cc6b7335..29ba5377 100644 --- a/vest/scorer.h +++ b/vest/scorer.h @@ -3,9 +3,14 @@ #include <vector> #include <string> #include <boost/shared_ptr.hpp> - +//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces) #include "wordid.h" +class Score; +class SentenceScorer; +typedef boost::shared_ptr<Score> ScoreP; +typedef boost::shared_ptr<SentenceScorer> ScorerP; + class ViterbiEnvelope; class ErrorSurface; class Hypergraph; // needed for alignment @@ -16,7 +21,6 @@ std::string StringFromScoreType(ScoreType st); class Score { public: - typedef boost::shared_ptr<Score> ScoreP; virtual ~Score(); virtual float ComputeScore() const = 0; virtual float ComputePartialScore() const =0; @@ -29,21 +33,19 @@ class Score { virtual void PlusEquals(const Score& rhs, const float scale) = 0; virtual void PlusEquals(const Score& rhs) = 0; virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0; - virtual void Subtract(const Score& rhs, Score* res) const = 0; - virtual Score* GetZero() const = 0; - virtual Score* GetOne() const = 0; + virtual void Subtract(const Score& rhs, Score *res) const = 0; + virtual ScoreP GetZero() const = 0; + virtual ScoreP GetOne() const = 0; virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta // to another score results in no score change // under any circumstances virtual void Encode(std::string* out) const = 0; - static Score* GetZero(ScoreType type); - static Score* GetOne(ScoreType type); + static ScoreP GetZero(ScoreType type); + static ScoreP GetOne(ScoreType type); }; class SentenceScorer { public: - typedef boost::shared_ptr<Score> ScoreP; - typedef boost::shared_ptr<SentenceScorer> ScorerP; typedef std::vector<WordID> Sentence; typedef std::vector<Sentence> Sentences; std::string desc; @@ -52,14 +54,14 @@ class SentenceScorer { std::string verbose_desc() const; virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length virtual ~SentenceScorer(); - virtual Score* GetOne() const; - virtual Score* GetZero() const; + virtual ScoreP GetOne() const; + virtual ScoreP GetZero() const; void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const; - virtual Score* ScoreCandidate(const Sentence& hyp) const = 0; - virtual Score* ScoreCCandidate(const Sentence& hyp) const =0; + virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; virtual const std::string* GetSource() const; - static Score* CreateScoreFromString(const ScoreType type, const std::string& in); - static SentenceScorer* CreateSentenceScorer(const ScoreType type, + static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in); + static ScorerP CreateSentenceScorer(const ScoreType type, const std::vector<Sentence >& refs, const std::string& src = ""); }; @@ -71,19 +73,23 @@ class DocScorer { DocScorer() { } void Init(const ScoreType type, const std::vector<std::string>& ref_files, - const std::string& src_file = ""); + const std::string& src_file = "", + bool verbose=false + ); DocScorer(const ScoreType type, const std::vector<std::string>& ref_files, - const std::string& src_file = "") + const std::string& src_file = "", + bool verbose=false + ) { - Init(type,ref_files,src_file); + Init(type,ref_files,src_file,verbose); } int size() const { return scorers_.size(); } - typedef boost::shared_ptr<SentenceScorer> ScorerP; ScorerP operator[](size_t i) const { return scorers_[i]; } private: std::vector<ScorerP> scorers_; }; + #endif diff --git a/vest/ter.cc b/vest/ter.cc index 6e16e1cf..b4ebc4f5 100644 --- a/vest/ter.cc +++ b/vest/ter.cc @@ -91,7 +91,7 @@ class TERScorerImpl { typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap; mutable NgramToIntsMap nmap_; - + static float MinimumEditDistance( const vector<WordID>& hyp, const vector<WordID>& ref, @@ -128,7 +128,7 @@ class TERScorerImpl { } } } - + // trace back along the best path and record the transition types path->clear(); int i = hyp.size(); @@ -220,7 +220,7 @@ class TERScorerImpl { cerr << "in=" << TD::GetString(in) << endl; cerr << "out=" << TD::GetString(*out) << endl; } - assert(out->size() == in.size()); + assert(out->size() == in.size()); // cerr << "ps: " << TD::GetString(*out) << endl; } @@ -338,7 +338,7 @@ class TERScorerImpl { *newerr = curerr; vector<TransType> cur_best_path; vector<WordID> cur_best_hyp; - + bool res = false; for (int i = shifts.size() - 1; i >=0; --i) { float curfix = curerr - (cur_best_shift_cost + *newerr); @@ -438,11 +438,11 @@ class TERScore : public Score { stats += static_cast<const TERScore&>(delta).stats; } - Score* GetZero() const { - return new TERScore; + ScoreP GetZero() const { + return ScoreP(new TERScore); } - Score* GetOne() const { - return new TERScore; + ScoreP GetOne() const { + return ScoreP(new TERScore); } void Subtract(const Score& rhs, Score* res) const { static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats; @@ -465,7 +465,7 @@ class TERScore : public Score { valarray<int> stats; }; -Score* TERScorer::ScoreFromString(const std::string& data) { +ScoreP TERScorer::ScoreFromString(const std::string& data) { istringstream is(data); TERScore* r = new TERScore; is >> r->stats[TERScore::kINSERTIONS] @@ -473,13 +473,13 @@ Score* TERScorer::ScoreFromString(const std::string& data) { >> r->stats[TERScore::kSUBSTITUTIONS] >> r->stats[TERScore::kSHIFTS] >> r->stats[TERScore::kREF_WORDCOUNT]; - return r; + return ScoreP(r); } void TERScore::ScoreDetails(std::string* details) const { char buf[200]; sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", - ComputeScore() * 100.0f, + ComputeScore() * 100.0f, stats[kINSERTIONS], stats[kDELETIONS], stats[kSUBSTITUTIONS], @@ -498,12 +498,11 @@ TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) { impl_[i] = new TERScorerImpl(refs[i]); } -Score* TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const { - Score* a = NULL; - return a; +ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const { + return ScoreP(); } -Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { +ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { float best_score = numeric_limits<float>::max(); TERScore* res = new TERScore; int avg_len = 0; @@ -528,5 +527,5 @@ Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { best_score = score; } } - return res; + return ScoreP(res); } @@ -9,9 +9,9 @@ class TERScorer : public SentenceScorer { public: TERScorer(const std::vector<std::vector<WordID> >& references); ~TERScorer(); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& data); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& data); private: std::vector<TERScorerImpl*> impl_; }; |