summaryrefslogtreecommitdiff
path: root/vest
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-19 22:51:33 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-19 22:51:33 +0000
commit904130a611019d3ecb7878e21035f6915b4b5702 (patch)
tree34d4e44d8cc1edb83f0e5ee4021c5c8e11d2de0f /vest
parenta0206fd7c45da0b31501713ac65834f69745b696 (diff)
shared_ptr for scores. todo: intrusive.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@327 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'vest')
-rw-r--r--vest/aer_scorer.cc21
-rw-r--r--vest/aer_scorer.h6
-rw-r--r--vest/comb_scorer.cc35
-rw-r--r--vest/comb_scorer.h9
-rw-r--r--vest/error_surface.cc5
-rw-r--r--vest/error_surface.h4
-rw-r--r--vest/line_optimizer.cc5
-rw-r--r--vest/mr_vest_generate_mapper_input.cc27
-rw-r--r--vest/scorer.cc111
-rw-r--r--vest/scorer.h44
-rw-r--r--vest/ter.cc31
-rw-r--r--vest/ter.h6
12 files changed, 142 insertions, 162 deletions
diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc
index d3f28804..253076c5 100644
--- a/vest/aer_scorer.cc
+++ b/vest/aer_scorer.cc
@@ -30,11 +30,11 @@ class AERScore : public Score {
}
- virtual Score* GetZero() const {
- return new AERScore;
+ virtual ScoreP GetZero() const {
+ return ScoreP(new AERScore);
}
- virtual Score* GetOne() const {
- return new AERScore;
+ virtual ScoreP GetOne() const {
+ return ScoreP(new AERScore);
}
virtual void Subtract(const Score& rhs, Score* out) const {
AERScore* res = static_cast<AERScore*>(out);
@@ -95,12 +95,11 @@ static inline bool Safe(const Array2D<bool>& a, int i, int j) {
return false;
}
-Score* AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
- Score* a = NULL;
- return a;
+ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
+ return ScoreP();
}
-Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
+ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
boost::shared_ptr<Array2D<bool> > hyp =
AlignerTools::ReadPharaohAlignmentGrid(TD::GetString(shyp));
@@ -121,15 +120,15 @@ Score* AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
for (int j = 0; j < hyp->height(); ++j)
if ((*hyp)(i,j)) ++p;
- return new AERScore(m,p,r);
+ return ScoreP(new AERScore(m,p,r));
}
-Score* AERScorer::ScoreFromString(const string& in) {
+ScoreP AERScorer::ScoreFromString(const string& in) {
AERScore* res = new AERScore;
res->num_matches = *(const int *)&in[sizeof(int) * 0];
res->num_predicted = *(const int *)&in[sizeof(int) * 1];
res->num_in_ref = *(const int *)&in[sizeof(int) * 2];
- return res;
+ return ScoreP(res);
}
const std::string* AERScorer::GetSource() const { return &src_; }
diff --git a/vest/aer_scorer.h b/vest/aer_scorer.h
index d0df35d5..6d53d359 100644
--- a/vest/aer_scorer.h
+++ b/vest/aer_scorer.h
@@ -11,9 +11,9 @@ class AERScorer : public SentenceScorer {
// when constructing alignment strings from a hypergraph, the source
// is necessary.
AERScorer(const std::vector<std::vector<WordID> >& refs, const std::string& src = "");
- Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
- Score* ScoreCCandidate(const std::vector<WordID>& hyp) const;
- static Score* ScoreFromString(const std::string& in);
+ ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+ ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+ static ScoreP ScoreFromString(const std::string& in);
const std::string* GetSource() const;
private:
std::string src_;
diff --git a/vest/comb_scorer.cc b/vest/comb_scorer.cc
index 3dd077a6..a921aa4d 100644
--- a/vest/comb_scorer.cc
+++ b/vest/comb_scorer.cc
@@ -14,7 +14,7 @@ class BLEUTERCombinationScore : public Score {
}
void ScoreDetails(string* details) const {
char buf[160];
- sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f",
+ sprintf(buf, "Combi = %.2f, BLEU = %.2f, TER = %.2f",
ComputeScore()*100.0f, bleu->ComputeScore()*100.0f, ter->ComputeScore()*100.0f);
*details = buf;
}
@@ -31,23 +31,23 @@ class BLEUTERCombinationScore : public Score {
- Score* GetOne() const {
+ ScoreP GetOne() const {
BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
res->bleu = bleu->GetOne();
res->ter = ter->GetOne();
- return res;
+ return ScoreP(res);
}
- Score* GetZero() const {
+ ScoreP GetZero() const {
BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
res->bleu = bleu->GetZero();
res->ter = ter->GetZero();
- return res;
+ return ScoreP(res);
}
void Subtract(const Score& rhs, Score* res) const {
bleu->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).bleu,
- static_cast<BLEUTERCombinationScore*>(res)->bleu);
+ static_cast<BLEUTERCombinationScore*>(res)->bleu.get());
ter->Subtract(*static_cast<const BLEUTERCombinationScore&>(rhs).ter,
- static_cast<BLEUTERCombinationScore*>(res)->ter);
+ static_cast<BLEUTERCombinationScore*>(res)->ter.get());
}
void Encode(std::string* out) const {
string bs, ts;
@@ -62,13 +62,11 @@ class BLEUTERCombinationScore : public Score {
return bleu->IsAdditiveIdentity() && ter->IsAdditiveIdentity();
}
private:
- Score* bleu;
- Score* ter;
+ ScoreP bleu;
+ ScoreP ter;
};
BLEUTERCombinationScore::~BLEUTERCombinationScore() {
- delete bleu;
- delete ter;
}
BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >& refs) {
@@ -77,26 +75,23 @@ BLEUTERCombinationScorer::BLEUTERCombinationScorer(const vector<vector<WordID> >
}
BLEUTERCombinationScorer::~BLEUTERCombinationScorer() {
- delete bleu_;
- delete ter_;
}
-Score* BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
- Score* a = NULL;
- return a;
+ScoreP BLEUTERCombinationScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
+ return ScoreP();
}
-Score* BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+ScoreP BLEUTERCombinationScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
BLEUTERCombinationScore* res = new BLEUTERCombinationScore;
res->bleu = bleu_->ScoreCandidate(hyp);
res->ter = ter_->ScoreCandidate(hyp);
- return res;
+ return ScoreP(res);
}
-Score* BLEUTERCombinationScorer::ScoreFromString(const std::string& in) {
+ScoreP BLEUTERCombinationScorer::ScoreFromString(const std::string& in) {
int bss = in[0];
BLEUTERCombinationScore* r = new BLEUTERCombinationScore;
r->bleu = SentenceScorer::CreateScoreFromString(IBM_BLEU, in.substr(1, bss));
r->ter = SentenceScorer::CreateScoreFromString(TER, in.substr(1 + bss));
- return r;
+ return ScoreP(r);
}
diff --git a/vest/comb_scorer.h b/vest/comb_scorer.h
index 1a4f3324..346be576 100644
--- a/vest/comb_scorer.h
+++ b/vest/comb_scorer.h
@@ -7,12 +7,11 @@ class BLEUTERCombinationScorer : public SentenceScorer {
public:
BLEUTERCombinationScorer(const std::vector<std::vector<WordID> >& refs);
~BLEUTERCombinationScorer();
- Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
- Score* ScoreCCandidate(const std::vector<WordID>& hyp) const;
- static Score* ScoreFromString(const std::string& in);
+ ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+ ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+ static ScoreP ScoreFromString(const std::string& in);
private:
- SentenceScorer* bleu_;
- SentenceScorer* ter_;
+ ScorerP bleu_,ter_;
};
#endif
diff --git a/vest/error_surface.cc b/vest/error_surface.cc
index 4e0af35c..754aa8de 100644
--- a/vest/error_surface.cc
+++ b/vest/error_surface.cc
@@ -6,9 +6,6 @@
using namespace std;
ErrorSurface::~ErrorSurface() {
- for (ErrorSurface::iterator i = begin(); i != end(); ++i)
- //delete i->delta;
- ;
}
void ErrorSurface::Serialize(std::string* out) const {
@@ -29,7 +26,7 @@ void ErrorSurface::Serialize(std::string* out) const {
}
void ErrorSurface::Deserialize(ScoreType type, const std::string& in) {
- istringstream is(in, ios::binary);
+ istringstream is(in, ios::binary);
int segments;
is.read((char*)&segments, sizeof(segments));
this->resize(segments);
diff --git a/vest/error_surface.h b/vest/error_surface.h
index a8734f54..ad728cfa 100644
--- a/vest/error_surface.h
+++ b/vest/error_surface.h
@@ -10,8 +10,8 @@ class Score;
struct ErrorSegment {
double x;
- Score* delta;
- ErrorSegment() : x(0), delta(NULL) {}
+ ScoreP delta;
+ ErrorSegment() : x(0), delta() {}
};
class ErrorSurface : public std::vector<ErrorSegment> {
diff --git a/vest/line_optimizer.cc b/vest/line_optimizer.cc
index e8b40237..70a00cbc 100644
--- a/vest/line_optimizer.cc
+++ b/vest/line_optimizer.cc
@@ -32,7 +32,8 @@ double LineOptimizer::LineOptimize(
}
sort(all_ints.begin(), all_ints.end(), IntervalComp());
double last_boundary = all_ints.front()->x;
- Score* acc = all_ints.front()->delta->GetZero();
+ ScoreP accp = all_ints.front()->delta->GetZero();
+ Score *acc=accp.get();
float& cur_best_score = *best_score;
cur_best_score = (type == MAXIMIZE_SCORE ?
-numeric_limits<float>::max() : numeric_limits<float>::max());
@@ -72,7 +73,6 @@ double LineOptimizer::LineOptimize(
pos = last_boundary + 1000.0;
}
}
- delete acc;
return pos;
}
@@ -92,7 +92,6 @@ void LineOptimizer::CreateOptimizationDirections(
vector<SparseVector<double> >* dirs
, bool include_orthogonal
) {
- const int num_directions = features_to_optimize.size() + additional_random_directions;
dirs->clear();
typedef SparseVector<double> Dir;
vector<Dir> &out=*dirs;
diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc
index 5b513f9b..f66b5082 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/vest/mr_vest_generate_mapper_input.cc
@@ -99,7 +99,6 @@ struct oracle_directions {
("fear_to_hope,f",po::bool_switch(&fear_to_hope),"for each of the oracle_directions, also include a direction from fear to hope (as well as origin to hope)")
("no_old_to_hope","don't emit the usual old -> hope oracle")
("decoder_translations",po::value<string>(&decoder_translations_file)->default_value(""),"one per line decoder 1best translations for computing document BLEU vs. sentences-seen-so-far BLEU")
- ("verbose",po::bool_switch(&verbose),"detailed logs")
;
}
void InitCommandLine(int argc, char *argv[], po::variables_map *conf) {
@@ -133,6 +132,7 @@ struct oracle_directions {
}
UseConf(*conf);
+ verbose=oracle.verbose;
return;
bad_cmdline:
cerr << dcmdline_options << endl;
@@ -158,15 +158,6 @@ struct oracle_directions {
vector<string> optimize_features;
void UseConf(po::variables_map const& conf) {
oracle.UseConf(conf);
- // po::value<X>(&var) takes care of below:
- // fear_to_hope=conf.count("fear_to_hope");
- // n_random=conf["random_directions"].as<unsigned int>();
- // forest_repository=conf["forest_repository"].as<string>();
- // dev_set_size=conf["dev_set_size"].as<unsigned int>();
- // n_oracle=conf["oracle_directions"].as<unsigned>();
- // oracle_batch=conf["oracle_batch"].as<unsigned>();
- // max_similarity=conf["max_similarity"].as<double>();
- // weights_file=conf["weights"].as<string>();
include_primary=!conf.count("no_primary");
old_to_hope=!conf.count("no_old_to_hope");
@@ -201,9 +192,11 @@ struct oracle_directions {
model_scores.resize(model_hyps.size());
for (int i=0;i<model_hyps.size();++i) {
//FIXME: what is scoreccand? with / without clipping? do without for consistency w/ oracle
- Score *s=oracle.ds[i]->ScoreCandidate(model_hyps[i]);
- model_scores[i].reset(s);
- oracle.doc_score->PlusEquals(*s);
+ model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]);
+ if (verbose) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
+ if (verbose) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl;
+ oracle.doc_score->PlusEquals(*model_scores[i]);
+ if (verbose) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl;
}
//TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating)
}
@@ -252,12 +245,12 @@ struct oracle_directions {
Timer t("Loading forest from JSON "+forest_file(i));
HypergraphIO::ReadFromJSON(rf.stream(), &hg);
}
- if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails();
- o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin,&cerr);
+ if (verbose) cerr<<"Before oracle["<<i<<"]: "<<ds().ScoreDetails()<<endl;
+ o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin);
if (verbose) {
cerr << o;
- cerr<<" ; after: "<<ds().ScoreDetails()
- <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()
+ cerr<<"After oracle: "<<ds().ScoreDetails()<<endl
+ <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl
<<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl;
if (have_doc)
cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl;
diff --git a/vest/scorer.cc b/vest/scorer.cc
index d8628418..5cad948d 100644
--- a/vest/scorer.cc
+++ b/vest/scorer.cc
@@ -1,6 +1,4 @@
#include "scorer.h"
-#define DEBUG_SCORER
-
#include <boost/lexical_cast.hpp>
#include <map>
@@ -24,6 +22,7 @@
#include "stringlib.h"
#include "lattice.h"
+
using boost::shared_ptr;
using namespace std;
@@ -107,8 +106,8 @@ class SERScore : public Score {
correct += static_cast<const SERScore&>(delta).correct;
total += static_cast<const SERScore&>(delta).total;
}
- Score* GetZero() const { return new SERScore; }
- Score* GetOne() const { return new SERScore; }
+ ScoreP GetZero() const { return ScoreP(new SERScore); }
+ ScoreP GetOne() const { return ScoreP(new SERScore); }
void Subtract(const Score& rhs, Score* res) const {
SERScore* r = static_cast<SERScore*>(res);
r->correct = correct - static_cast<const SERScore&>(rhs).correct;
@@ -131,18 +130,17 @@ std::string SentenceScorer::verbose_desc() const {
class SERScorer : public SentenceScorer {
public:
SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}
- Score* ScoreCCandidate(const vector<WordID>& /* hyp */) const {
- Score* a = NULL;
- return a;
+ ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const {
+ return ScoreP();
}
- Score* ScoreCandidate(const vector<WordID>& hyp) const {
+ ScoreP ScoreCandidate(const vector<WordID>& hyp) const {
SERScore* res = new SERScore;
res->total = 1;
for (int i = 0; i < refs_.size(); ++i)
if (refs_[i] == hyp) res->correct = 1;
- return res;
+ return ScoreP(res);
}
- static Score* ScoreFromString(const string& data) {
+ static ScoreP ScoreFromString(const string& data) {
assert(!"Not implemented");
}
private:
@@ -164,8 +162,8 @@ class BLEUScore : public Score {
void PlusEquals(const Score& delta);
void PlusEquals(const Score& delta, const float scale);
void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len);
- Score* GetZero() const;
- Score* GetOne() const;
+ ScoreP GetZero() const;
+ ScoreP GetOne() const;
void Subtract(const Score& rhs, Score* res) const;
void Encode(string* out) const;
bool IsAdditiveIdentity() const {
@@ -189,9 +187,9 @@ class BLEUScorerBase : public SentenceScorer {
BLEUScorerBase(const vector<vector<WordID> >& references,
int n
);
- Score* ScoreCandidate(const vector<WordID>& hyp) const;
- Score* ScoreCCandidate(const vector<WordID>& hyp) const;
- static Score* ScoreFromString(const string& in);
+ ScoreP ScoreCandidate(const vector<WordID>& hyp) const;
+ ScoreP ScoreCCandidate(const vector<WordID>& hyp) const;
+ static ScoreP ScoreFromString(const string& in);
virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
private:
@@ -272,7 +270,7 @@ class BLEUScorerBase : public SentenceScorer {
vector<int> lengths_;
};
-Score* BLEUScorerBase::ScoreFromString(const string& in) {
+ScoreP BLEUScorerBase::ScoreFromString(const string& in) {
istringstream is(in);
int n;
is >> n;
@@ -283,7 +281,7 @@ Score* BLEUScorerBase::ScoreFromString(const string& in) {
is >> r->correct_ngram_hit_counts[i];
is >> r->hyp_ngram_counts[i];
}
- return r;
+ return ScoreP(r);
}
class IBM_BLEUScorer : public BLEUScorerBase {
@@ -343,51 +341,48 @@ class Koehn_BLEUScorer : public BLEUScorerBase {
float avg_;
};
-SentenceScorer* SentenceScorer::CreateSentenceScorer(const ScoreType type,
+ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type,
const vector<vector<WordID> >& refs,
- const string& src) {
+ const string& src)
+{
+ SentenceScorer *r=0;
switch (type) {
- case IBM_BLEU: return new IBM_BLEUScorer(refs, 4);
- case IBM_BLEU_3 : return new IBM_BLEUScorer(refs,3);
- case NIST_BLEU: return new NIST_BLEUScorer(refs, 4);
- case Koehn_BLEU: return new Koehn_BLEUScorer(refs, 4);
- case AER: return new AERScorer(refs, src);
- case TER: return new TERScorer(refs);
- case SER: return new SERScorer(refs);
- case BLEU_minus_TER_over_2: return new BLEUTERCombinationScorer(refs);
+ case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break;
+ case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break;
+ case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break;
+ case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break;
+ case AER: r = new AERScorer(refs, src);break;
+ case TER: r = new TERScorer(refs);break;
+ case SER: r = new SERScorer(refs);break;
+ case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break;
default:
assert(!"Not implemented!");
}
+ return ScorerP(r);
}
-Score* SentenceScorer::GetOne() const {
+ScoreP SentenceScorer::GetOne() const {
Sentence s;
return ScoreCCandidate(s)->GetOne();
}
-Score* SentenceScorer::GetZero() const {
+ScoreP SentenceScorer::GetZero() const {
Sentence s;
return ScoreCCandidate(s)->GetZero();
}
-Score* Score::GetOne(ScoreType type) {
+ScoreP Score::GetOne(ScoreType type) {
std::vector<SentenceScorer::Sentence > refs;
- SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs);
- Score *s=ps->GetOne();
- delete ps;
- return s;
+ return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne();
}
-Score* Score::GetZero(ScoreType type) {
+ScoreP Score::GetZero(ScoreType type) {
std::vector<SentenceScorer::Sentence > refs;
- SentenceScorer *ps=SentenceScorer::CreateSentenceScorer(type,refs);
- Score *s=ps->GetZero();
- delete ps;
- return s;
+ return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero();
}
-Score* SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
+ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
switch (type) {
case IBM_BLEU:
case IBM_BLEU_3:
@@ -411,7 +406,7 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface
vector<WordID> prev_trans;
const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
env->resize(ienv.size());
- Score* prev_score = NULL;
+ ScoreP prev_score;
int j = 0;
for (int i = 0; i < ienv.size(); ++i) {
const Segment& seg = *ienv[i];
@@ -453,26 +448,25 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface
}
// cerr << "Identical translation, skipping scoring\n";
} else {
- Score* score = ScoreCandidate(trans);
+ ScoreP score = ScoreCandidate(trans);
// cerr << "score= " << score->ComputeScore() << "\n";
- Score* cur_delta = score->GetZero();
+ ScoreP cur_delta_p = score->GetZero();
+ Score* cur_delta = cur_delta_p.get();
// just record the score diffs
if (!prev_score)
prev_score = score->GetZero();
score->Subtract(*prev_score, cur_delta);
- delete prev_score;
prev_trans.swap(trans);
prev_score = score;
if ((!minimize_segments) || (!cur_delta->IsAdditiveIdentity())) {
ErrorSegment& out = (*env)[j];
- out.delta = cur_delta;
+ out.delta = cur_delta_p;
out.x = seg.x;
- ++j;
+ ++j;
}
}
}
- delete prev_score;
// cerr << " In segments: " << ienv.size() << endl;
// cerr << "Out segments: " << j << endl;
assert(j > 0);
@@ -588,12 +582,12 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or
}
-Score* BLEUScore::GetZero() const {
- return new BLEUScore(hyp_ngram_counts.size());
+ScoreP BLEUScore::GetZero() const {
+ return ScoreP(new BLEUScore(hyp_ngram_counts.size()));
}
-Score* BLEUScore::GetOne() const {
- return new BLEUScore(hyp_ngram_counts.size(),1);
+ScoreP BLEUScore::GetOne() const {
+ return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1));
}
@@ -615,17 +609,17 @@ BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
}
}
-Score* BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
+ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
BLEUScore* bs = new BLEUScore(n_);
for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
i->second.second = 0;
ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true);
bs->ref_len = ComputeRefLength(hyp);
bs->hyp_len = hyp.size();
- return bs;
+ return ScoreP(bs);
}
-Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
+ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
BLEUScore* bs = new BLEUScore(n_);
for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
i->second.second = 0;
@@ -633,7 +627,7 @@ Score* BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip);
bs->ref_len = ComputeRefLength(hyp);
bs->hyp_len = hyp.size();
- return bs;
+ return ScoreP(bs);
}
@@ -643,7 +637,7 @@ DocScorer::~DocScorer() {
void DocScorer::Init(
const ScoreType type,
const vector<string>& ref_files,
- const string& src_file) {
+ const string& src_file, bool verbose) {
scorers_.clear();
// TODO stop using valarray, start using ReadFile
cerr << "Loading references (" << ref_files.size() << " files)\n";
@@ -686,9 +680,8 @@ void DocScorer::Init(
ProcessAndStripSGML(&src_line, &dummy);
}
scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line)));
-#ifdef DEBUG_SCORER
- cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
-#endif
+ if (verbose)
+ cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
++line;
}
}
diff --git a/vest/scorer.h b/vest/scorer.h
index cc6b7335..29ba5377 100644
--- a/vest/scorer.h
+++ b/vest/scorer.h
@@ -3,9 +3,14 @@
#include <vector>
#include <string>
#include <boost/shared_ptr.hpp>
-
+//TODO: use intrusive shared_ptr in Score (because there are many of them on ErrorSurfaces)
#include "wordid.h"
+class Score;
+class SentenceScorer;
+typedef boost::shared_ptr<Score> ScoreP;
+typedef boost::shared_ptr<SentenceScorer> ScorerP;
+
class ViterbiEnvelope;
class ErrorSurface;
class Hypergraph; // needed for alignment
@@ -16,7 +21,6 @@ std::string StringFromScoreType(ScoreType st);
class Score {
public:
- typedef boost::shared_ptr<Score> ScoreP;
virtual ~Score();
virtual float ComputeScore() const = 0;
virtual float ComputePartialScore() const =0;
@@ -29,21 +33,19 @@ class Score {
virtual void PlusEquals(const Score& rhs, const float scale) = 0;
virtual void PlusEquals(const Score& rhs) = 0;
virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len) = 0;
- virtual void Subtract(const Score& rhs, Score* res) const = 0;
- virtual Score* GetZero() const = 0;
- virtual Score* GetOne() const = 0;
+ virtual void Subtract(const Score& rhs, Score *res) const = 0;
+ virtual ScoreP GetZero() const = 0;
+ virtual ScoreP GetOne() const = 0;
virtual bool IsAdditiveIdentity() const = 0; // returns true if adding this delta
// to another score results in no score change
// under any circumstances
virtual void Encode(std::string* out) const = 0;
- static Score* GetZero(ScoreType type);
- static Score* GetOne(ScoreType type);
+ static ScoreP GetZero(ScoreType type);
+ static ScoreP GetOne(ScoreType type);
};
class SentenceScorer {
public:
- typedef boost::shared_ptr<Score> ScoreP;
- typedef boost::shared_ptr<SentenceScorer> ScorerP;
typedef std::vector<WordID> Sentence;
typedef std::vector<Sentence> Sentences;
std::string desc;
@@ -52,14 +54,14 @@ class SentenceScorer {
std::string verbose_desc() const;
virtual float ComputeRefLength(const Sentence& hyp) const; // default: avg of refs.length
virtual ~SentenceScorer();
- virtual Score* GetOne() const;
- virtual Score* GetZero() const;
+ virtual ScoreP GetOne() const;
+ virtual ScoreP GetZero() const;
void ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface* es, const ScoreType type, const Hypergraph& hg) const;
- virtual Score* ScoreCandidate(const Sentence& hyp) const = 0;
- virtual Score* ScoreCCandidate(const Sentence& hyp) const =0;
+ virtual ScoreP ScoreCandidate(const Sentence& hyp) const = 0;
+ virtual ScoreP ScoreCCandidate(const Sentence& hyp) const =0;
virtual const std::string* GetSource() const;
- static Score* CreateScoreFromString(const ScoreType type, const std::string& in);
- static SentenceScorer* CreateSentenceScorer(const ScoreType type,
+ static ScoreP CreateScoreFromString(const ScoreType type, const std::string& in);
+ static ScorerP CreateSentenceScorer(const ScoreType type,
const std::vector<Sentence >& refs,
const std::string& src = "");
};
@@ -71,19 +73,23 @@ class DocScorer {
DocScorer() { }
void Init(const ScoreType type,
const std::vector<std::string>& ref_files,
- const std::string& src_file = "");
+ const std::string& src_file = "",
+ bool verbose=false
+ );
DocScorer(const ScoreType type,
const std::vector<std::string>& ref_files,
- const std::string& src_file = "")
+ const std::string& src_file = "",
+ bool verbose=false
+ )
{
- Init(type,ref_files,src_file);
+ Init(type,ref_files,src_file,verbose);
}
int size() const { return scorers_.size(); }
- typedef boost::shared_ptr<SentenceScorer> ScorerP;
ScorerP operator[](size_t i) const { return scorers_[i]; }
private:
std::vector<ScorerP> scorers_;
};
+
#endif
diff --git a/vest/ter.cc b/vest/ter.cc
index 6e16e1cf..b4ebc4f5 100644
--- a/vest/ter.cc
+++ b/vest/ter.cc
@@ -91,7 +91,7 @@ class TERScorerImpl {
typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap;
mutable NgramToIntsMap nmap_;
-
+
static float MinimumEditDistance(
const vector<WordID>& hyp,
const vector<WordID>& ref,
@@ -128,7 +128,7 @@ class TERScorerImpl {
}
}
}
-
+
// trace back along the best path and record the transition types
path->clear();
int i = hyp.size();
@@ -220,7 +220,7 @@ class TERScorerImpl {
cerr << "in=" << TD::GetString(in) << endl;
cerr << "out=" << TD::GetString(*out) << endl;
}
- assert(out->size() == in.size());
+ assert(out->size() == in.size());
// cerr << "ps: " << TD::GetString(*out) << endl;
}
@@ -338,7 +338,7 @@ class TERScorerImpl {
*newerr = curerr;
vector<TransType> cur_best_path;
vector<WordID> cur_best_hyp;
-
+
bool res = false;
for (int i = shifts.size() - 1; i >=0; --i) {
float curfix = curerr - (cur_best_shift_cost + *newerr);
@@ -438,11 +438,11 @@ class TERScore : public Score {
stats += static_cast<const TERScore&>(delta).stats;
}
- Score* GetZero() const {
- return new TERScore;
+ ScoreP GetZero() const {
+ return ScoreP(new TERScore);
}
- Score* GetOne() const {
- return new TERScore;
+ ScoreP GetOne() const {
+ return ScoreP(new TERScore);
}
void Subtract(const Score& rhs, Score* res) const {
static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats;
@@ -465,7 +465,7 @@ class TERScore : public Score {
valarray<int> stats;
};
-Score* TERScorer::ScoreFromString(const std::string& data) {
+ScoreP TERScorer::ScoreFromString(const std::string& data) {
istringstream is(data);
TERScore* r = new TERScore;
is >> r->stats[TERScore::kINSERTIONS]
@@ -473,13 +473,13 @@ Score* TERScorer::ScoreFromString(const std::string& data) {
>> r->stats[TERScore::kSUBSTITUTIONS]
>> r->stats[TERScore::kSHIFTS]
>> r->stats[TERScore::kREF_WORDCOUNT];
- return r;
+ return ScoreP(r);
}
void TERScore::ScoreDetails(std::string* details) const {
char buf[200];
sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)",
- ComputeScore() * 100.0f,
+ ComputeScore() * 100.0f,
stats[kINSERTIONS],
stats[kDELETIONS],
stats[kSUBSTITUTIONS],
@@ -498,12 +498,11 @@ TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) {
impl_[i] = new TERScorerImpl(refs[i]);
}
-Score* TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
- Score* a = NULL;
- return a;
+ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const {
+ return ScoreP();
}
-Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
+ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
float best_score = numeric_limits<float>::max();
TERScore* res = new TERScore;
int avg_len = 0;
@@ -528,5 +527,5 @@ Score* TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const {
best_score = score;
}
}
- return res;
+ return ScoreP(res);
}
diff --git a/vest/ter.h b/vest/ter.h
index 21007874..43314791 100644
--- a/vest/ter.h
+++ b/vest/ter.h
@@ -9,9 +9,9 @@ class TERScorer : public SentenceScorer {
public:
TERScorer(const std::vector<std::vector<WordID> >& references);
~TERScorer();
- Score* ScoreCandidate(const std::vector<WordID>& hyp) const;
- Score* ScoreCCandidate(const std::vector<WordID>& hyp) const;
- static Score* ScoreFromString(const std::string& data);
+ ScoreP ScoreCandidate(const std::vector<WordID>& hyp) const;
+ ScoreP ScoreCCandidate(const std::vector<WordID>& hyp) const;
+ static ScoreP ScoreFromString(const std::string& data);
private:
std::vector<TERScorerImpl*> impl_;
};