diff options
-rw-r--r-- | decoder/apply_models.cc | 3 | ||||
-rw-r--r-- | decoder/cdec.cc | 2 | ||||
-rw-r--r-- | decoder/ff_bleu.cc | 6 | ||||
-rwxr-xr-x | decoder/oracle_bleu.h | 22 | ||||
-rw-r--r-- | decoder/sparse_vector.h | 8 | ||||
-rw-r--r-- | vest/aer_scorer.cc | 21 | ||||
-rw-r--r-- | vest/aer_scorer.h | 6 | ||||
-rw-r--r-- | vest/comb_scorer.cc | 35 | ||||
-rw-r--r-- | vest/comb_scorer.h | 9 | ||||
-rw-r--r-- | vest/error_surface.cc | 5 | ||||
-rw-r--r-- | vest/error_surface.h | 4 | ||||
-rw-r--r-- | vest/line_optimizer.cc | 5 | ||||
-rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 27 | ||||
-rw-r--r-- | vest/scorer.cc | 111 | ||||
-rw-r--r-- | vest/scorer.h | 44 | ||||
-rw-r--r-- | vest/ter.cc | 31 | ||||
-rw-r--r-- | vest/ter.h | 6 |
17 files changed, 170 insertions, 175 deletions
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index ba573984..0e83582f 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -409,7 +409,8 @@ void ApplyModelSet(const Hypergraph& in, const ModelSet& models, const IntersectionConfiguration& config, Hypergraph* out) { - if (models.stateless() && config.algorithm == 0) { + //force exhaustive if there's no state req. for model + if (models.stateless() || config.algorithm == 0) { NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state ma.Apply(); } else if (config.algorithm == 1) { diff --git a/decoder/cdec.cc b/decoder/cdec.cc index a9c1cb3b..be554774 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -613,7 +613,7 @@ int main(int argc, char** argv) { /*Oracle Rescoring*/ if(get_oracle_forest) { - Oracle o=oracle.ComputeOracle(smeta,&forest,FeatureVector(feature_weights),&cerr,10,conf["forest_output"].as<std::string>()); + Oracle o=oracle.ComputeOracle(smeta,&forest,FeatureVector(feature_weights),10,conf["forest_output"].as<std::string>()); cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl; cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; o.hope.Print(cerr," +Oracle BLEU"); diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index f8d62aa2..19564bd0 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -182,7 +182,8 @@ class BLEUModelImpl { cerr << ")\n"; */ - Score *node_score = smeta.GetDocScorer()[smeta.GetSentenceID()]->ScoreCCandidate(vs); + ScoreP node_score_p = smeta.GetDocScorer()[smeta.GetSentenceID()]->ScoreCCandidate(vs); + Score *node_score=node_score_p.get(); string details; node_score->ScoreDetails(&details); const Score *base_score= &smeta.GetScore(); @@ -194,6 +195,7 @@ class BLEUModelImpl { //how it seems to be done in code //TODO: might need to reverse the -1/+1 of the oracle/neg examples + //TO VLADIMIR: the polarity would be reversed if you switched error (1-BLEU) for BLEU. approx_bleu = ( rule.FWords() * oracledoc_factor ) * node_score->ComputeScore(); //how I thought it was done from the paper //approx_bleu = ( rule.FWords()+ smeta.GetDocLen() ) * node_score->ComputeScore(); @@ -277,7 +279,7 @@ void BLEUModel::TraversalFeaturesImpl(const SentenceMetadata& smeta, const DocScorer *ds = &smeta.GetDocScorer(); */ - cerr<< "Loading sentence " << smeta.GetSentenceID() << endl; +// cerr<< "ff_bleu loading sentence " << smeta.GetSentenceID() << endl; //} features->set_value(fid_, pimpl_->LookupWords(*edge.rule_, ant_states, state, smeta)); //cerr << "FID" << fid_ << " " << DebugStateToString(state) << endl; diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 4800e9c1..470d311d 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -37,9 +37,12 @@ struct Translation { ViterbiESentence(hg,&sentence); features=ViterbiFeatures(hg,feature_weights,true); } - void Print(std::ostream &out,std::string pre=" +Oracle BLEU ") const { + void Print(std::ostream &out,std::string pre=" +Oracle BLEU ",bool include_0_fid=true) const { out<<pre<<"Viterbi: "<<TD::GetString(sentence)<<"\n"; - out<<pre<<"features: "<<features<<std::endl; + out<<pre<<"features: "<<features; + if (include_0_fid && features.nonzero(0)) + out<< " dummy-feature(0)="<<features[0]; + out<<std::endl; } bool is_null() { return features.empty() /* && sentence.empty() */; @@ -91,6 +94,7 @@ struct OracleBleu { ("references,R", value<Refs >(&refs), "Translation reference files") ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc") ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") + ("verbose",bool_switch(&verbose),"detailed logs") ; } int order; @@ -122,6 +126,7 @@ struct OracleBleu { double bleu_weight; // you have to call notify(conf) yourself, once, in main or similar + bool verbose; void UseConf(boost::program_options::variables_map const& /* conf */) { using namespace std; // bleu_weight=conf["bleu_weight"].as<double>(); @@ -162,12 +167,12 @@ struct OracleBleu { return; } assert(refs.size()); - ds.Init(loss,refs); + ds.Init(loss,refs,"",verbose); ensure_doc_score(); -// doc_score.reset(); std::cerr << "Loaded " << ds.size() << " references for scoring with " << StringFromScoreType(loss) << std::endl; } + // metadata has plain pointer, not shared, so we need to exist as long as it does SentenceMetadata MakeMetadata(Hypergraph const& forest,int sent_id) { std::vector<WordID> srcsent; ViterbiFSentence(forest,&srcsent); @@ -180,7 +185,7 @@ struct OracleBleu { } // destroys forest (replaces it w/ rescored oracle one) - Oracle ComputeOracle(SentenceMetadata const& smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,std::ostream *log=0,unsigned kbest=0,std::string const& forest_output="") { + Oracle ComputeOracle(SentenceMetadata const& smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,unsigned kbest=0,std::string const& forest_output="") { Hypergraph &forest=*forest_in_out; Oracle r; int sent_id=smeta.GetSentenceID(); @@ -189,7 +194,7 @@ struct OracleBleu { { Timer t("Forest Oracle rescoring:"); Hypergraph oracle_forest; - Rescore(smeta,forest,&oracle_forest,feature_weights,bleu_weight,log); + Rescore(smeta,forest,&oracle_forest,feature_weights,bleu_weight); forest.swap(oracle_forest); } r.hope=Translation(forest); @@ -202,10 +207,10 @@ struct OracleBleu { // if doc_score wasn't init, add 1 counts to ngram acc. void ensure_doc_score() { - if (!doc_score) { doc_score.reset(Score::GetOne(loss)); } + if (!doc_score) { doc_score=Score::GetOne(loss); } } - void Rescore(SentenceMetadata const& smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0,std::ostream *log=&std::cerr) { + void Rescore(SentenceMetadata const& smeta,Hypergraph const& forest,Hypergraph *dest_forest,WeightVector const& feature_weights,double bleu_weight=1.0) { // the sentence bleu stats will get added to doc only if you call IncludeLastScore ensure_doc_score(); sentscore=GetScore(forest,smeta.GetSentenceID()); @@ -216,7 +221,6 @@ struct OracleBleu { feature_weights_.set_value(0,bleu_weight); feature_weights.init_vector(&w); ModelSet oracle_models(w,vector<FeatureFunction const*>(1,pff.get())); - if (log) *log << "Going to call Apply Model " << endl; ApplyModelSet(forest, smeta, oracle_models, diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index f41bedf5..5e785210 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -1,5 +1,7 @@ #ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ +/* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value. nobody wants to see it. except that vlad is also creative and efficient and stored the oracle bleu there. */ + // this is a modified version of code originally written // by Phil Blunsom @@ -54,6 +56,12 @@ public: } + // warning: exploits the fact that 0 values are always removed from map. change this if you change that. + bool nonzero(int index) const { + return values_.find(index) != values_.end(); + } + + const T operator[](int index) const { typename MapType::const_iterator found = values_.find(index); if (found == values_.end()) diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc index d3f28804..253076c5 100644 --- a/vest/aer_scorer.cc +++ b/vest/aer_scorer.cc @@ -30,11 +30,11 @@ class AERScore : public Score { } - virtual Score* GetZero() const { - return new AERScore; + virtual ScoreP GetZero() const { + return ScoreP(new AERScore); } - virtual Score* GetOne() const { - return new AERScore; + virtual ScoreP GetOne() const { + return ScoreP(new AERScore); } virtual void Subtract(const Score& rhs, Score* out) const { AERScore* res = static_cast<AERScore*>(out); @@ -95,12 +95,11 @@ static inline bool Safe(const Array2D<bool>& a, int i, int j) { return false; } -Score* AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const { - Score* a = NULL; - return a; +ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const { + return ScoreP(); } -Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { +ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { boost::shared_ptr<Array2D<bool> > hyp = AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp)); @@ -121,15 +120,15 @@ Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { for (int j = 0; j < hyp->height(); ++j) if ((*hyp)(i,j)) ++p; - return new AERScore(m,p,r); + return ScoreP(new AERScore(m,p,r)); } -Score* AERScorer::ScoreFromString(const string& in) { +ScoreP AERScorer::ScoreFromString(const string& in) { AERScore* res = new AERScore; res->num_matches = *(const int *)&in[sizeof(int) * 0]; res->num_predicted = *(const int *)&in[sizeof(int) * 1]; res->num_in_ref = *(const int *)&in[sizeof(int) * 2]; - return res; + return ScoreP(res); } const std::string* AERScorer::GetSource() const { return &src_; } diff --git a/vest/aer_scorer.h b/vest/aer_scorer.h index d0df35d5..6d53d359 100644 --- a/vest/aer_scorer.h +++ b/vest/aer_scorer.h @@ -11,9 +11,9 @@ class AERScorer : public SentenceScorer { // when constructing alignment strings from a hypergraph, the source // is necessary. AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = ""); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& in); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& in); const std::string* GetSource() const; private: std::string src_; diff --git a/vest/comb_scorer.cc b/vest/comb_scorer.cc index 3dd077a6..a921aa4d 100644 --- a/vest/comb_scorer.cc +++ b/vest/comb_scorer.cc @@ -14,7 +14,7 @@ class BLEUTERCombinationScore : public Score { } void ScoreDetails(string* details) const { char buf[160]; - sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", + sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f", ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f); *details = buf; } @@ -31,23 +31,23 @@ class BLEUTERCombinationScore : public Score { - Score* GetOne() const { + ScoreP GetOne() const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu->GetOne(); res->ter = ter->GetOne(); - return res; + return ScoreP(res); } - Score* GetZero() const { + ScoreP GetZero() const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu->GetZero(); res->ter = ter->GetZero(); - return res; + return ScoreP(res); } void Subtract(const Score& rhs, Score* res) const { bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu, - static_cast<BLEUTERCombinationScore*>(res)->bleu); + static_cast<BLEUTERCombinationScore*>(res)->bleu.get()); ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter, - static_cast<BLEUTERCombinationScore*>(res)->ter); + static_cast<BLEUTERCombinationScore*>(res)->ter.get()); } void Encode(std::string* out) const { string bs, ts; @@ -62,13 +62,11 @@ class BLEUTERCombinationScore : public Score { return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity(); } private: - Score* bleu; - Score* ter; + ScoreP bleu; + ScoreP ter; }; BLEUTERCombinationScore::~BLEUTERCombinationScore() { - delete bleu; - delete ter; } BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) { @@ -77,26 +75,23 @@ BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> > } BLEUTERCombinationScorer::~BLEUTERCombinationScorer() { - delete bleu_; - delete ter_; } -Score* BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const { - Score* a = NULL; - return a; +ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const { + return ScoreP(); } -Score* BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { +ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { BLEUTERCombinationScore* res = new BLEUTERCombinationScore; res->bleu = bleu_->ScoreCandidate(hyp); res->ter = ter_->ScoreCandidate(hyp); - return res; + return ScoreP(res); } -Score* BLEUTERCombinationScorer::ScoreFromString(const std::string& in) { +ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) { int bss = in[0]; BLEUTERCombinationScore* r = new BLEUTERCombinationScore; r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss)); r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss)); - return r; + return ScoreP(r); } diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h index 1a4f3324..346be576 100644 --- a/vest/comb_scorer.h +++ b/vest/comb_scorer.h @@ -7,12 +7,11 @@ class BLEUTERCombinationScorer : public SentenceScorer { public: BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs); ~BLEUTERCombinationScorer(); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& in); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& in); private: - SentenceScorer* bleu_; - SentenceScorer* ter_; + ScorerP bleu_,ter_; }; #endif diff --git a/vest/error_surface.cc b/vest/error_surface.cc index 4e0af35c..754aa8de 100644 --- a/vest/error_surface.cc +++ b/vest/error_surface.cc @@ -6,9 +6,6 @@ using namespace std; ErrorSurface::~ErrorSurface() { - for (ErrorSurface::iterator i = begin(); i != end(); ++i) - //delete i->delta; - ; } void ErrorSurface::Serialize(std::string* out) const { @@ -29,7 +26,7 @@ void ErrorSurface::Serialize(std::string* out) const { } void ErrorSurface::Deserialize(ScoreType type, const std::string& in) { - istringstream is(in, ios::binary); + istringstream is(in, ios::binary); int segments; is.read((char*)&segments, sizeof(segments)); this->resize(segments); diff --git a/vest/error_surface.h b/vest/error_surface.h index a8734f54..ad728cfa 100644 --- a/vest/error_surface.h +++ b/vest/error_surface.h @@ -10,8 +10,8 @@ class Score; struct ErrorSegment { double x; - Score* delta; - ErrorSegment() : x(0), delta(NULL) {} + ScoreP delta; + ErrorSegment() : x(0), delta() {} }; class ErrorSurface : public std::vector<ErrorSegment> { diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc index e8b40237..70a00cbc 100644 --- a/vest/line_optimizer.cc +++ b/vest/line_optimizer.cc @@ -32,7 +32,8 @@ double LineOptimizer::LineOptimize( } sort(all_ints.begin(), all_ints.end(), IntervalComp()); double last_boundary = all_ints.front()->x; - Score* acc = all_ints.front()->delta->GetZero(); + ScoreP accp = all_ints.front()->delta->GetZero(); + Score *acc=accp.get(); float& cur_best_score = *best_score; cur_best_score = (type == MAXIMIZE_SCORE ? -numeric_limits<float>::max() : numeric_limits<float>::max()); @@ -72,7 +73,6 @@ double LineOptimizer::LineOptimize( pos = last_boundary + 1000.0; } } - delete acc; return pos; } @@ -92,7 +92,6 @@ void LineOptimizer::CreateOptimizationDirections( vector<SparseVector<double> >* dirs , bool include_orthogonal ) { - const int num_directions = features_to_optimize.size() + additional_random_directions; dirs->clear(); typedef SparseVector<double> Dir; vector<Dir> &out=*dirs; diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index 5b513f9b..f66b5082 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -99,7 +99,6 @@ struct oracle_directions { ("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)") ("no_old_to_hope","don't emit the usual old -> hope oracle") ("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU") - ("verbose",po::bool_switch(&verbose),"detailed logs") ; } void InitCommandLine(int argc, char *argv[], po::variables_map *conf) { @@ -133,6 +132,7 @@ struct oracle_directions { } UseConf(*conf); + verbose=oracle.verbose; return; bad_cmdline: cerr << dcmdline_options << endl; @@ -158,15 +158,6 @@ struct oracle_directions { vector<string> optimize_features; void UseConf(po::variables_map const& conf) { oracle.UseConf(conf); - // po::value<X>(&var) takes care of below: - // fear_to_hope=conf.count("fear_to_hope"); - // n_random=conf["random_directions"].as<unsigned int>(); - // forest_repository=conf["forest_repository"].as<string>(); - // dev_set_size=conf["dev_set_size"].as<unsigned int>(); - // n_oracle=conf["oracle_directions"].as<unsigned>(); - // oracle_batch=conf["oracle_batch"].as<unsigned>(); - // max_similarity=conf["max_similarity"].as<double>(); - // weights_file=conf["weights"].as<string>(); include_primary=!conf.count("no_primary"); old_to_hope=!conf.count("no_old_to_hope"); @@ -201,9 +192,11 @@ struct oracle_directions { model_scores.resize(model_hyps.size()); for (int i=0;i<model_hyps.size();++i) { //FIXME: what is scoreccand? with / without clipping? do without for consistency w/ oracle - Score *s=oracle.ds[i]->ScoreCandidate(model_hyps[i]); - model_scores[i].reset(s); - oracle.doc_score->PlusEquals(*s); + model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]); + if (verbose) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl; + if (verbose) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl; + oracle.doc_score->PlusEquals(*model_scores[i]); + if (verbose) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl; } //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating) } @@ -252,12 +245,12 @@ struct oracle_directions { Timer t("Loading forest from JSON "+forest_file(i)); HypergraphIO::ReadFromJSON(rf.stream(), &hg); } - if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails(); - o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin,&cerr); + if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl; + o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin); if (verbose) { cerr << o; - cerr<<" ; after: "<<ds().ScoreDetails() - <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails() + cerr<<"After oracle: "<<ds().ScoreDetails()<<endl + <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl; if (have_doc) cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl; diff --git a/vest/scorer.cc b/vest/scorer.cc index d8628418..5cad948d 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -1,6 +1,4 @@ #include "scorer.h" -#define DEBUG_SCORER - #include <boost/lexical_cast.hpp> #include <map> @@ -24,6 +22,7 @@ #include "stringlib.h" #include "lattice.h" + using boost::shared_ptr; using namespace std; @@ -107,8 +106,8 @@ class SERScore : public Score { correct += static_cast<const SERScore&>(delta).correct; total += static_cast<const SERScore&>(delta).total; } - Score* GetZero() const { return new SERScore; } - Score* GetOne() const { return new SERScore; } + ScoreP GetZero() const { return ScoreP(new SERScore); } + ScoreP GetOne() const { return ScoreP(new SERScore); } void Subtract(const Score& rhs, Score* res) const { SERScore* r = static_cast<SERScore*>(res); r->correct = correct - static_cast<const SERScore&>(rhs).correct; @@ -131,18 +130,17 @@ std::string SentenceScorer::verbose_desc() const { class SERScorer : public SentenceScorer { public: SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {} - Score* ScoreCCandidate(const vector<WordID>& /* hyp */) const { - Score* a = NULL; - return a; + ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const { + return ScoreP(); } - Score* ScoreCandidate(const vector<WordID>& hyp) const { + ScoreP ScoreCandidate(const vector<WordID>& hyp) const { SERScore* res = new SERScore; res->total = 1; for (int i = 0; i < refs_.size(); ++i) if (refs_[i] == hyp) res->correct = 1; - return res; + return ScoreP(res); } - static Score* ScoreFromString(const string& data) { + static ScoreP ScoreFromString(const string& data) { assert(!"Not implemented"); } private: @@ -164,8 +162,8 @@ class BLEUScore : public Score { void PlusEquals(const Score& delta); void PlusEquals(const Score& delta, const float scale); void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len); - Score* GetZero() const; - Score* GetOne() const; + ScoreP GetZero() const; + ScoreP GetOne() const; void Subtract(const Score& rhs, Score* res) const; void Encode(string* out) const; bool IsAdditiveIdentity() const { @@ -189,9 +187,9 @@ class BLEUScorerBase : public SentenceScorer { BLEUScorerBase(const vector<vector<WordID> >& references, int n ); - Score* ScoreCandidate(const vector<WordID>& hyp) const; - Score* ScoreCCandidate(const vector<WordID>& hyp) const; - static Score* ScoreFromString(const string& in); + ScoreP ScoreCandidate(const vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const string& in); virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0; private: @@ -272,7 +270,7 @@ class BLEUScorerBase : public SentenceScorer { vector<int> lengths_; }; -Score* BLEUScorerBase::ScoreFromString(const string& in) { +ScoreP BLEUScorerBase::ScoreFromString(const string& in) { istringstream is(in); int n; is >> n; @@ -283,7 +281,7 @@ Score* BLEUScorerBase::ScoreFromString(const string& in) { is >> r->correct_ngram_hit_counts[i]; is >> r->hyp_ngram_counts[i]; } - return r; + return ScoreP(r); } class IBM_BLEUScorer : public BLEUScorerBase { @@ -343,51 +341,48 @@ class Koehn_BLEUScorer : public BLEUScorerBase { float avg_; }; -SentenceScorer* SentenceScorer::CreateSentenceScorer(const ScoreType type, +ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type, const vector<vector<WordID> >& refs, - const string& src) { + const string& src) +{ + SentenceScorer *r=0; switch (type) { - case IBM_BLEU: return new IBM_BLEUScorer(refs, 4); - case IBM_BLEU_3 : return new IBM_BLEUScorer(refs,3); - case NIST_BLEU: return new NIST_BLEUScorer(refs, 4); - case Koehn_BLEU: return new Koehn_BLEUScorer(refs, 4); - case AER: return new AERScorer(refs, src); - case TER: return new TERScorer(refs); - case SER: return new SERScorer(refs); - case BLEU_minus_TER_over_2: return new BLEUTERCombinationScorer(refs); + case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break; + case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break; + case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break; + case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break; + case AER: r = new AERScorer(refs, src);break; + case TER: r = new TERScorer(refs);break; + case SER: r = new SERScorer(refs);break; + case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break; default: assert(!"Not implemented!"); } + return ScorerP(r); } -Score* SentenceScorer::GetOne() const { +ScoreP SentenceScorer::GetOne() const { Sentence s; return ScoreCCandidate(s)->GetOne(); } -Score* SentenceScorer::GetZero() const { +ScoreP SentenceScorer::GetZero() const { Sentence s; return ScoreCCandidate(s)->GetZero(); } -Score* Score::GetOne(ScoreType type) { +ScoreP Score::GetOne(ScoreType type) { std::vector<SentenceScorer::Sentence > refs; - SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); - Score *s=ps->GetOne(); - delete ps; - return s; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne(); } -Score* Score::GetZero(ScoreType type) { +ScoreP Score::GetZero(ScoreType type) { std::vector<SentenceScorer::Sentence > refs; - SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs); - Score *s=ps->GetZero(); - delete ps; - return s; + return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero(); } -Score* SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) { +ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) { switch (type) { case IBM_BLEU: case IBM_BLEU_3: @@ -411,7 +406,7 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface vector<WordID> prev_trans; const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs(); env->resize(ienv.size()); - Score* prev_score = NULL; + ScoreP prev_score; int j = 0; for (int i = 0; i < ienv.size(); ++i) { const Segment& seg = *ienv[i]; @@ -453,26 +448,25 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface } // cerr << "Identical translation, skipping scoring\n"; } else { - Score* score = ScoreCandidate(trans); + ScoreP score = ScoreCandidate(trans); // cerr << "score= " << score->ComputeScore() << "\n"; - Score* cur_delta = score->GetZero(); + ScoreP cur_delta_p = score->GetZero(); + Score* cur_delta = cur_delta_p.get(); // just record the score diffs if (!prev_score) prev_score = score->GetZero(); score->Subtract(*prev_score, cur_delta); - delete prev_score; prev_trans.swap(trans); prev_score = score; if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) { ErrorSegment& out = (*env)[j]; - out.delta = cur_delta; + out.delta = cur_delta_p; out.x = seg.x; - ++j; + ++j; } } } - delete prev_score; // cerr << " In segments: " << ienv.size() << endl; // cerr << "Out segments: " << j << endl; assert(j > 0); @@ -588,12 +582,12 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or } -Score* BLEUScore::GetZero() const { - return new BLEUScore(hyp_ngram_counts.size()); +ScoreP BLEUScore::GetZero() const { + return ScoreP(new BLEUScore(hyp_ngram_counts.size())); } -Score* BLEUScore::GetOne() const { - return new BLEUScore(hyp_ngram_counts.size(),1); +ScoreP BLEUScore::GetOne() const { + return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1)); } @@ -615,17 +609,17 @@ BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references, } } -Score* BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const { +ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const { BLEUScore* bs = new BLEUScore(n_); for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) i->second.second = 0; ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true); bs->ref_len = ComputeRefLength(hyp); bs->hyp_len = hyp.size(); - return bs; + return ScoreP(bs); } -Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { +ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { BLEUScore* bs = new BLEUScore(n_); for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i) i->second.second = 0; @@ -633,7 +627,7 @@ Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const { ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip); bs->ref_len = ComputeRefLength(hyp); bs->hyp_len = hyp.size(); - return bs; + return ScoreP(bs); } @@ -643,7 +637,7 @@ DocScorer::~DocScorer() { void DocScorer::Init( const ScoreType type, const vector<string>& ref_files, - const string& src_file) { + const string& src_file, bool verbose) { scorers_.clear(); // TODO stop using valarray, start using ReadFile cerr << "Loading references (" << ref_files.size() << " files)\n"; @@ -686,9 +680,8 @@ void DocScorer::Init( ProcessAndStripSGML(&src_line, &dummy); } scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line))); -#ifdef DEBUG_SCORER - cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl; -#endif + if (verbose) + cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl; ++line; } } diff --git a/vest/scorer.h b/vest/scorer.h index cc6b7335..29ba5377 100644 --- a/vest/scorer.h +++ b/vest/scorer.h @@ -3,9 +3,14 @@ #include <vector> #include <string> #include <boost/shared_ptr.hpp> - +//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces) #include "wordid.h" +class Score; +class SentenceScorer; +typedef boost::shared_ptr<Score> ScoreP; +typedef boost::shared_ptr<SentenceScorer> ScorerP; + class ViterbiEnvelope; class ErrorSurface; class Hypergraph; // needed for alignment @@ -16,7 +21,6 @@ std::string StringFromScoreType(ScoreType st); class Score { public: - typedef boost::shared_ptr<Score> ScoreP; virtual ~Score(); virtual float ComputeScore() const = 0; virtual float ComputePartialScore() const =0; @@ -29,21 +33,19 @@ class Score { virtual void PlusEquals(const Score& rhs, const float scale) = 0; virtual void PlusEquals(const Score& rhs) = 0; virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0; - virtual void Subtract(const Score& rhs, Score* res) const = 0; - virtual Score* GetZero() const = 0; - virtual Score* GetOne() const = 0; + virtual void Subtract(const Score& rhs, Score *res) const = 0; + virtual ScoreP GetZero() const = 0; + virtual ScoreP GetOne() const = 0; virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta // to another score results in no score change // under any circumstances virtual void Encode(std::string* out) const = 0; - static Score* GetZero(ScoreType type); - static Score* GetOne(ScoreType type); + static ScoreP GetZero(ScoreType type); + static ScoreP GetOne(ScoreType type); }; class SentenceScorer { public: - typedef boost::shared_ptr<Score> ScoreP; - typedef boost::shared_ptr<SentenceScorer> ScorerP; typedef std::vector<WordID> Sentence; typedef std::vector<Sentence> Sentences; std::string desc; @@ -52,14 +54,14 @@ class SentenceScorer { std::string verbose_desc() const; virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length virtual ~SentenceScorer(); - virtual Score* GetOne() const; - virtual Score* GetZero() const; + virtual ScoreP GetOne() const; + virtual ScoreP GetZero() const; void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const; - virtual Score* ScoreCandidate(const Sentence& hyp) const = 0; - virtual Score* ScoreCCandidate(const Sentence& hyp) const =0; + virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0; + virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0; virtual const std::string* GetSource() const; - static Score* CreateScoreFromString(const ScoreType type, const std::string& in); - static SentenceScorer* CreateSentenceScorer(const ScoreType type, + static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in); + static ScorerP CreateSentenceScorer(const ScoreType type, const std::vector<Sentence >& refs, const std::string& src = ""); }; @@ -71,19 +73,23 @@ class DocScorer { DocScorer() { } void Init(const ScoreType type, const std::vector<std::string>& ref_files, - const std::string& src_file = ""); + const std::string& src_file = "", + bool verbose=false + ); DocScorer(const ScoreType type, const std::vector<std::string>& ref_files, - const std::string& src_file = "") + const std::string& src_file = "", + bool verbose=false + ) { - Init(type,ref_files,src_file); + Init(type,ref_files,src_file,verbose); } int size() const { return scorers_.size(); } - typedef boost::shared_ptr<SentenceScorer> ScorerP; ScorerP operator[](size_t i) const { return scorers_[i]; } private: std::vector<ScorerP> scorers_; }; + #endif diff --git a/vest/ter.cc b/vest/ter.cc index 6e16e1cf..b4ebc4f5 100644 --- a/vest/ter.cc +++ b/vest/ter.cc @@ -91,7 +91,7 @@ class TERScorerImpl { typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap; mutable NgramToIntsMap nmap_; - + static float MinimumEditDistance( const vector<WordID>& hyp, const vector<WordID>& ref, @@ -128,7 +128,7 @@ class TERScorerImpl { } } } - + // trace back along the best path and record the transition types path->clear(); int i = hyp.size(); @@ -220,7 +220,7 @@ class TERScorerImpl { cerr << "in=" << TD::GetString(in) << endl; cerr << "out=" << TD::GetString(*out) << endl; } - assert(out->size() == in.size()); + assert(out->size() == in.size()); // cerr << "ps: " << TD::GetString(*out) << endl; } @@ -338,7 +338,7 @@ class TERScorerImpl { *newerr = curerr; vector<TransType> cur_best_path; vector<WordID> cur_best_hyp; - + bool res = false; for (int i = shifts.size() - 1; i >=0; --i) { float curfix = curerr - (cur_best_shift_cost + *newerr); @@ -438,11 +438,11 @@ class TERScore : public Score { stats += static_cast<const TERScore&>(delta).stats; } - Score* GetZero() const { - return new TERScore; + ScoreP GetZero() const { + return ScoreP(new TERScore); } - Score* GetOne() const { - return new TERScore; + ScoreP GetOne() const { + return ScoreP(new TERScore); } void Subtract(const Score& rhs, Score* res) const { static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats; @@ -465,7 +465,7 @@ class TERScore : public Score { valarray<int> stats; }; -Score* TERScorer::ScoreFromString(const std::string& data) { +ScoreP TERScorer::ScoreFromString(const std::string& data) { istringstream is(data); TERScore* r = new TERScore; is >> r->stats[TERScore::kINSERTIONS] @@ -473,13 +473,13 @@ Score* TERScorer::ScoreFromString(const std::string& data) { >> r->stats[TERScore::kSUBSTITUTIONS] >> r->stats[TERScore::kSHIFTS] >> r->stats[TERScore::kREF_WORDCOUNT]; - return r; + return ScoreP(r); } void TERScore::ScoreDetails(std::string* details) const { char buf[200]; sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", - ComputeScore() * 100.0f, + ComputeScore() * 100.0f, stats[kINSERTIONS], stats[kDELETIONS], stats[kSUBSTITUTIONS], @@ -498,12 +498,11 @@ TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) { impl_[i] = new TERScorerImpl(refs[i]); } -Score* TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const { - Score* a = NULL; - return a; +ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const { + return ScoreP(); } -Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { +ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { float best_score = numeric_limits<float>::max(); TERScore* res = new TERScore; int avg_len = 0; @@ -528,5 +527,5 @@ Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { best_score = score; } } - return res; + return ScoreP(res); } @@ -9,9 +9,9 @@ class TERScorer : public SentenceScorer { public: TERScorer(const std::vector<std::vector<WordID> >& references); ~TERScorer(); - Score* ScoreCandidate(const std::vector<WordID>& hyp) const; - Score* ScoreCCandidate(const std::vector<WordID>& hyp) const; - static Score* ScoreFromString(const std::string& data); + ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const; + ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const; + static ScoreP ScoreFromString(const std::string& data); private: std::vector<TERScorerImpl*> impl_; }; |