summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-17 18:45:16 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-17 18:45:16 +0000
commit2435d1443a7c846fab9d9f3d187af2e98fcae2e2 (patch)
treece48f645ddc6000bdb1a928c902fc8ab91a5a0be
parent41e6416bcc72c069cd04a9fb09a897ae867d4a6d (diff)
fix bug when vest starts with an origin = 0
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@578 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--vest/lo_test.cc35
-rw-r--r--vest/scorer_test.cc182
-rw-r--r--vest/viterbi_envelope.cc1
-rw-r--r--vest/viterbi_envelope.h2
4 files changed, 37 insertions, 183 deletions
diff --git a/vest/lo_test.cc b/vest/lo_test.cc
index 9200eb34..f5638600 100644
--- a/vest/lo_test.cc
+++ b/vest/lo_test.cc
@@ -193,6 +193,41 @@ TEST_F(OptTest, TestS1) {
cerr << TD::GetString(t2) << endl;
}
+TEST_F(OptTest,TestZeroOrigin) {
+ const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}";
+ Hypergraph hg;
+ istringstream instr(json);
+ HypergraphIO::ReadFromJSON(&instr, &hg);
+ SparseVector<double> wts;
+ wts.set_value(FD::Convert("PassThrough"), -0.929201533002898);
+ hg.Reweight(wts);
+
+ vector<pair<vector<WordID>, prob_t> > list;
+ std::vector<SparseVector<double> > features;
+ KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, 10);
+ for (int i = 0; i < 10; ++i) {
+ const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
+ kbest.LazyKthBest(hg.nodes_.size() - 1, i);
+ if (!d) break;
+ cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
+ }
+
+ SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0);
+ ViterbiEnvelopeWeightFunction wf(wts, axis); // wts = starting point, axis = search direction
+ vector<ViterbiEnvelope> envs(1);
+ envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+
+ ScoreType type = ScoreTypeFromString("ibm_bleu");
+ vector<vector<WordID> > mr(4);
+ TD::ConvertSentence("untitled", &mr[0]);
+ TD::ConvertSentence("with no title", &mr[1]);
+ TD::ConvertSentence("without a title", &mr[2]);
+ TD::ConvertSentence("without title", &mr[3]);
+ ScorerP scorer1 = SentenceScorer::CreateSentenceScorer(type, mr);
+ vector<ErrorSurface> es(1);
+ ComputeErrorSurface(*scorer1, envs[0], &es[0], IBM_BLEU, hg);
+}
+
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
diff --git a/vest/scorer_test.cc b/vest/scorer_test.cc
deleted file mode 100644
index a07a8c4b..00000000
--- a/vest/scorer_test.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <valarray>
-#include <gtest/gtest.h>
-
-#include "tdict.h"
-#include "scorer.h"
-#include "aer_scorer.h"
-
-using namespace std;
-
-class ScorerTest : public testing::Test {
- protected:
- virtual void SetUp() {
- refs0.resize(4);
- refs1.resize(4);
- TD::ConvertSentence("export of high-tech products in guangdong in first two months this year reached 3.76 billion us dollars", &refs0[0]);
- TD::ConvertSentence("guangdong's export of new high technology products amounts to us $ 3.76 billion in first two months of this year", &refs0[1]);
- TD::ConvertSentence("guangdong exports us $ 3.76 billion worth of high technology products in the first two months of this year", &refs0[2]);
- TD::ConvertSentence("in the first 2 months this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars .", &refs0[3]);
- TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter chen ji ) the latest statistics show that from january through february this year , the export of high-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% over the same period last year and accounted for 25.5 \% of the total export in the province .", &refs1[0]);
- TD::ConvertSentence("xinhua news agency , guangzhou , march 16 ( reporter : chen ji ) -- latest statistic indicates that guangdong's export of new high technology products amounts to us $ 3.76 billion , up 34.8 \% over corresponding period and accounts for 25.5 \% of the total exports of the province .", &refs1[1]);
- TD::ConvertSentence("xinhua news agency report of march 16 from guangzhou ( by staff reporter chen ji ) - latest statistics indicate guangdong province exported us $ 3.76 billion worth of high technology products , up 34.8 percent from the same period last year , which account for 25.5 percent of the total exports of the province .", &refs1[2]);
- TD::ConvertSentence("guangdong , march 16 , ( xinhua ) -- ( chen ji reports ) as the newest statistics shows , in january and feberuary this year , the export volume of new hi-tech products in guangdong province reached 3.76 billion us dollars , up 34.8 \% than last year , making up 25.5 \% of the province's total .", &refs1[3]);
- TD::ConvertSentence("one guangdong province will next export us $ 3.76 high-tech product two months first this year 3.76 billion us dollars", &hyp1);
- TD::ConvertSentence("xinhua news agency , guangzhou , 16th of march ( reporter chen ) -- latest statistics suggest that guangdong exports new advanced technology product totals $ 3.76 million , 34.8 percent last corresponding period and accounts for 25.5 percent of the total export province .", &hyp2);
- }
-
- virtual void TearDown() { }
-
- vector<vector<WordID> > refs0;
- vector<vector<WordID> > refs1;
- vector<WordID> hyp1;
- vector<WordID> hyp2;
-};
-
-TEST_F(ScorerTest, TestCreateFromFiles) {
- vector<string> files;
- files.push_back("test_data/re.txt.0");
- files.push_back("test_data/re.txt.1");
- files.push_back("test_data/re.txt.2");
- files.push_back("test_data/re.txt.3");
- DocScorer ds(IBM_BLEU, files);
-}
-
-TEST_F(ScorerTest, TestBLEUScorer) {
- ScorerP s1 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs0);
- ScorerP s2 = SentenceScorer::CreateSentenceScorer(IBM_BLEU, refs1);
- ScoreP b1 = s1->ScoreCandidate(hyp1);
- EXPECT_FLOAT_EQ(0.23185077, b1->ComputeScore());
- ScoreP b2 = s2->ScoreCandidate(hyp2);
- EXPECT_FLOAT_EQ(0.38101241, b2->ComputeScore());
- b1->PlusEquals(*b2);
- EXPECT_FLOAT_EQ(0.348854, b1->ComputeScore());
- EXPECT_FALSE(b1->IsAdditiveIdentity());
- string details;
- b1->ScoreDetails(&details);
- EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
- cerr << details << endl;
- string enc;
- b1->Encode(&enc);
- ScoreP b3 = SentenceScorer::CreateScoreFromString(IBM_BLEU, enc);
- details.clear();
- cerr << "Encoded BLEU score size: " << enc.size() << endl;
- b3->ScoreDetails(&details);
- cerr << details << endl;
- EXPECT_FALSE(b3->IsAdditiveIdentity());
- EXPECT_EQ("BLEU = 34.89, 81.5|50.8|29.5|18.6 (brev=0.898)", details);
- ScoreP bz = b3->GetZero();
- EXPECT_TRUE(bz->IsAdditiveIdentity());
-}
-
-TEST_F(ScorerTest, TestTERScorer) {
- ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, refs0);
- ScorerP s2 = SentenceScorer::CreateSentenceScorer(TER, refs1);
- string details;
- ScoreP t1 = s1->ScoreCandidate(hyp1);
- t1->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- cerr << t1->ComputeScore() << endl;
- ScoreP t2 = s2->ScoreCandidate(hyp2);
- t2->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- cerr << t2->ComputeScore() << endl;
- t1->PlusEquals(*t2);
- cerr << t1->ComputeScore() << endl;
- t1->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- EXPECT_EQ("TER = 44.16, 4| 8| 16| 6 (len=77)", details);
- string enc;
- t1->Encode(&enc);
- ScoreP t3 = SentenceScorer::CreateScoreFromString(TER, enc);
- details.clear();
- t3->ScoreDetails(&details);
- EXPECT_EQ("TER = 44.16, 4| 8| 16| 6 (len=77)", details);
- EXPECT_FALSE(t3->IsAdditiveIdentity());
- ScoreP tz = t3->GetZero();
- EXPECT_TRUE(tz->IsAdditiveIdentity());
-}
-
-TEST_F(ScorerTest, TestTERScorerSimple) {
- vector<vector<WordID> > ref(1);
- TD::ConvertSentence("1 2 3 A B", &ref[0]);
- vector<WordID> hyp;
- TD::ConvertSentence("A B 1 2 3", &hyp);
- ScorerP s1 = SentenceScorer::CreateSentenceScorer(TER, ref);
- string details;
- ScoreP t1 = s1->ScoreCandidate(hyp);
- t1->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
-}
-
-TEST_F(ScorerTest, TestSERScorerSimple) {
- vector<vector<WordID> > ref(1);
- TD::ConvertSentence("A B C D", &ref[0]);
- vector<WordID> hyp1;
- TD::ConvertSentence("A B C", &hyp1);
- vector<WordID> hyp2;
- TD::ConvertSentence("A B C D", &hyp2);
- ScorerP s1 = SentenceScorer::CreateSentenceScorer(SER, ref);
- string details;
- ScoreP t1 = s1->ScoreCandidate(hyp1);
- t1->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- ScoreP t2 = s1->ScoreCandidate(hyp2);
- t2->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- t2->PlusEquals(*t1);
- t2->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
-}
-
-TEST_F(ScorerTest, TestCombiScorer) {
- ScorerP s1 = SentenceScorer::CreateSentenceScorer(BLEU_minus_TER_over_2, refs0);
- string details;
- ScoreP t1 = s1->ScoreCandidate(hyp1);
- t1->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- cerr << t1->ComputeScore() << endl;
- string enc;
- t1->Encode(&enc);
- ScoreP t2 = SentenceScorer::CreateScoreFromString(BLEU_minus_TER_over_2, enc);
- details.clear();
- t2->ScoreDetails(&details);
- cerr << "DETAILS: " << details << endl;
- ScoreP cz = t2->GetZero();
- EXPECT_FALSE(t2->IsAdditiveIdentity());
- EXPECT_TRUE(cz->IsAdditiveIdentity());
- cz->PlusEquals(*t2);
- EXPECT_FALSE(cz->IsAdditiveIdentity());
- string d2;
- cz->ScoreDetails(&d2);
- EXPECT_EQ(d2, details);
-}
-
-TEST_F(ScorerTest, AERTest) {
- vector<vector<WordID> > refs0(1);
- TD::ConvertSentence("0-0 2-1 1-2 3-3", &refs0[0]);
-
- vector<WordID> hyp;
- TD::ConvertSentence("0-0 1-1", &hyp);
- AERScorer* as = new AERScorer(refs0);
- ScoreP x = as->ScoreCandidate(hyp);
- string details;
- x->ScoreDetails(&details);
- cerr << details << endl;
- string enc;
- x->Encode(&enc);
- delete as;
- cerr << "ENC size: " << enc.size() << endl;
- ScoreP y = SentenceScorer::CreateScoreFromString(AER, enc);
- string d2;
- y->ScoreDetails(&d2);
- cerr << d2 << endl;
- EXPECT_EQ(d2, details);
-}
-
-int main(int argc, char **argv) {
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
-
diff --git a/vest/viterbi_envelope.cc b/vest/viterbi_envelope.cc
index 5c24c018..9fcf75a0 100644
--- a/vest/viterbi_envelope.cc
+++ b/vest/viterbi_envelope.cc
@@ -153,6 +153,7 @@ void Segment::ConstructTranslation(vector<WordID>* trans) const {
}
size_t ant_size = ant_trans.size();
vector<const vector<WordID>*> pants(ant_size);
+ assert(ant_size == cur->edge->tail_nodes_.size());
--ant_size;
for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
cur->edge->rule_->ESubstitute(pants, trans);
diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h
index 1689a00e..60ad82d8 100644
--- a/vest/viterbi_envelope.h
+++ b/vest/viterbi_envelope.h
@@ -54,7 +54,7 @@ struct ViterbiEnvelope {
const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other);
const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other);
bool IsMultiplicativeIdentity() const {
- return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge); }
+ return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge) && (!segs[0]->p1) && (!segs[0]->p2); }
const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const {
if (!is_sorted) Sort();
return segs;