summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rw-r--r--decoder/bottom_up_parser.cc2
-rw-r--r--decoder/grammar_test.cc6
-rw-r--r--decoder/hg_test.cc60
-rw-r--r--decoder/hg_test.h69
-rw-r--r--decoder/rule_lexer.h1
-rw-r--r--decoder/rule_lexer.ll47
-rw-r--r--decoder/scfg_translator.cc2
-rw-r--r--decoder/test_data/hg_test.hg1
-rw-r--r--decoder/test_data/hg_test.hg_balanced1
-rw-r--r--decoder/test_data/hg_test.hg_int1
-rw-r--r--decoder/test_data/hg_test.lattice1
-rw-r--r--decoder/test_data/hg_test.tiny1
-rw-r--r--decoder/test_data/hg_test.tiny_lattice1
-rw-r--r--decoder/test_data/small.json.gzbin1561 -> 1733 bytes
-rw-r--r--decoder/tree2string_translator.cc1
-rw-r--r--decoder/trule.cc202
-rw-r--r--decoder/trule.h22
17 files changed, 141 insertions, 277 deletions
diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc
index 8738c8f1..ff4c7a90 100644
--- a/decoder/bottom_up_parser.cc
+++ b/decoder/bottom_up_parser.cc
@@ -159,7 +159,7 @@ PassiveChart::PassiveChart(const string& goal,
chart_(input.size()+1, input.size()+1),
nodemap_(input.size()+1, input.size()+1),
goal_cat_(TD::Convert(goal) * -1),
- goal_rule_(new TRule("[Goal] ||| [" + goal + ",1] ||| [" + goal + ",1]")),
+ goal_rule_(new TRule("[Goal] ||| [" + goal + "] ||| [1]")),
goal_idx_(-1),
lc_fid_(FD::Convert("LatticeCost")),
unaries_() {
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index 6d2c6e67..69240139 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -33,9 +33,9 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) {
ModelSet models(w, ms);
TextGrammar g;
- TRulePtr r1(new TRule("[X] ||| a b c ||| A B C ||| 0.1 0.2 0.3", true));
- TRulePtr r2(new TRule("[X] ||| a b c ||| 1 2 3 ||| 0.2 0.3 0.4", true));
- TRulePtr r3(new TRule("[X] ||| a b c d ||| A B C D ||| 0.1 0.2 0.3", true));
+ TRulePtr r1(new TRule("[X] ||| a b c ||| A B C ||| 0.1 0.2 0.3"));
+ TRulePtr r2(new TRule("[X] ||| a b c ||| 1 2 3 ||| 0.2 0.3 0.4"));
+ TRulePtr r3(new TRule("[X] ||| a b c d ||| A B C D ||| 0.1 0.2 0.3"));
cerr << r1->AsString() << endl;
g.AddRule(r1);
g.AddRule(r2);
diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc
index 8519e559..95cfae51 100644
--- a/decoder/hg_test.cc
+++ b/decoder/hg_test.cc
@@ -18,8 +18,10 @@ using namespace std;
BOOST_FIXTURE_TEST_SUITE( s, HGSetup );
BOOST_AUTO_TEST_CASE(Controlled) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
+ cerr << "PATH: " << path << "/hg.tiny\n";
Hypergraph hg;
- CreateHG_tiny(&hg);
+ CreateHG_tiny(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 0.8);
@@ -37,10 +39,11 @@ BOOST_AUTO_TEST_CASE(Controlled) {
}
BOOST_AUTO_TEST_CASE(Union) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg1;
Hypergraph hg2;
- CreateHG_tiny(&hg1);
- CreateHG(&hg2);
+ CreateHG_tiny(path, &hg1);
+ CreateHG(path, &hg2);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 1.0);
@@ -84,8 +87,9 @@ BOOST_AUTO_TEST_CASE(Union) {
}
BOOST_AUTO_TEST_CASE(ControlledKBest) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG(&hg);
+ CreateHG(path, &hg);
vector<double> w(2); w[0]=0.4; w[1]=0.8;
hg.Reweight(w);
vector<WordID> trans;
@@ -107,10 +111,11 @@ BOOST_AUTO_TEST_CASE(ControlledKBest) {
BOOST_AUTO_TEST_CASE(InsideScore) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 1.0);
Hypergraph hg;
- CreateTinyLatticeHG(&hg);
+ CreateTinyLatticeHG(path, &hg);
hg.Reweight(wts);
vector<WordID> trans;
prob_t cost = ViterbiESentence(hg, &trans);
@@ -130,10 +135,11 @@ BOOST_AUTO_TEST_CASE(InsideScore) {
BOOST_AUTO_TEST_CASE(PruneInsideOutside) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
SparseVector<double> wts;
wts.set_value(FD::Convert("Feature_1"), 1.0);
Hypergraph hg;
- CreateLatticeHG(&hg);
+ CreateLatticeHG(path, &hg);
hg.Reweight(wts);
vector<WordID> trans;
prob_t cost = ViterbiESentence(hg, &trans);
@@ -152,8 +158,9 @@ BOOST_AUTO_TEST_CASE(PruneInsideOutside) {
}
BOOST_AUTO_TEST_CASE(TestPruneEdges) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateLatticeHG(&hg);
+ CreateLatticeHG(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 1.0);
hg.Reweight(wts);
@@ -166,8 +173,9 @@ BOOST_AUTO_TEST_CASE(TestPruneEdges) {
}
BOOST_AUTO_TEST_CASE(TestIntersect) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG_int(&hg);
+ CreateHG_int(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 1.0);
hg.Reweight(wts);
@@ -192,8 +200,9 @@ BOOST_AUTO_TEST_CASE(TestIntersect) {
}
BOOST_AUTO_TEST_CASE(TestPrune2) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG_int(&hg);
+ CreateHG_int(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 1.0);
hg.Reweight(wts);
@@ -207,8 +216,9 @@ BOOST_AUTO_TEST_CASE(TestPrune2) {
}
BOOST_AUTO_TEST_CASE(Sample) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateLatticeHG(&hg);
+ CreateLatticeHG(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("Feature_1"), 0.0);
hg.Reweight(wts);
@@ -220,6 +230,7 @@ BOOST_AUTO_TEST_CASE(Sample) {
}
BOOST_AUTO_TEST_CASE(PLF) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
string inplf = "((('haupt',-2.06655,1),('hauptgrund',-5.71033,2),),(('grund',-1.78709,1),),(('für\\'',0.1,1),),)";
HypergraphIO::ReadFromPLF(inplf, &hg);
@@ -234,8 +245,9 @@ BOOST_AUTO_TEST_CASE(PLF) {
}
BOOST_AUTO_TEST_CASE(PushWeightsToGoal) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG(&hg);
+ CreateHG(path, &hg);
vector<double> w(2); w[0]=0.4; w[1]=0.8;
hg.Reweight(w);
vector<WordID> trans;
@@ -248,8 +260,9 @@ BOOST_AUTO_TEST_CASE(PushWeightsToGoal) {
}
BOOST_AUTO_TEST_CASE(TestSpecialKBest) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHGBalanced(&hg);
+ CreateHGBalanced(path, &hg);
vector<double> w(1); w[0]=0;
hg.Reweight(w);
vector<pair<vector<WordID>, prob_t> > list;
@@ -264,8 +277,9 @@ BOOST_AUTO_TEST_CASE(TestSpecialKBest) {
}
BOOST_AUTO_TEST_CASE(TestGenericViterbi) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG_tiny(&hg);
+ CreateHG_tiny(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 0.8);
@@ -279,8 +293,9 @@ BOOST_AUTO_TEST_CASE(TestGenericViterbi) {
}
BOOST_AUTO_TEST_CASE(TestGenericInside) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateTinyLatticeHG(&hg);
+ CreateTinyLatticeHG(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 1.0);
hg.Reweight(wts);
@@ -296,8 +311,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside) {
}
BOOST_AUTO_TEST_CASE(TestGenericInside2) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG(&hg);
+ CreateHG(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 0.8);
@@ -322,8 +338,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside2) {
}
BOOST_AUTO_TEST_CASE(TestAddExpectations) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG(&hg);
+ CreateHG(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 0.8);
@@ -338,8 +355,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) {
}
BOOST_AUTO_TEST_CASE(Small) {
- Hypergraph hg;
std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
+ Hypergraph hg;
CreateSmallHG(&hg, path);
SparseVector<double> wts;
wts.set_value(FD::Convert("Model_0"), -2.0);
@@ -361,6 +378,7 @@ BOOST_AUTO_TEST_CASE(Small) {
}
BOOST_AUTO_TEST_CASE(JSONTest) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
ostringstream os;
JSONParser::WriteEscapedString("\"I don't know\", she said.", &os);
BOOST_CHECK_EQUAL("\"\\\"I don't know\\\", she said.\"", os.str());
@@ -370,9 +388,10 @@ BOOST_AUTO_TEST_CASE(JSONTest) {
}
BOOST_AUTO_TEST_CASE(TestGenericKBest) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg;
- CreateHG(&hg);
- //CreateHGBalanced(&hg);
+ CreateHG(path, &hg);
+ //CreateHGBalanced(path, &hg);
SparseVector<double> wts;
wts.set_value(FD::Convert("f1"), 0.4);
wts.set_value(FD::Convert("f2"), 1.0);
@@ -392,8 +411,9 @@ BOOST_AUTO_TEST_CASE(TestGenericKBest) {
}
BOOST_AUTO_TEST_CASE(TestReadWriteHG) {
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA);
Hypergraph hg,hg2;
- CreateHG(&hg);
+ CreateHG(path, &hg);
hg.edges_.front().j_ = 23;
hg.edges_.back().prev_i_ = 99;
ostringstream os;
diff --git a/decoder/hg_test.h b/decoder/hg_test.h
index e96cb0b1..b7bab3c2 100644
--- a/decoder/hg_test.h
+++ b/decoder/hg_test.h
@@ -23,25 +23,13 @@ Name perro_wts="SameFirstLetter 1 LongerThanPrev 1 ShorterThanPrev 1 GlueTop 0.0
// you can inherit from this or just use the static methods
struct HGSetup {
- enum {
- HG,
- HG_int,
- HG_tiny,
- HGBalanced,
- LatticeHG,
- TinyLatticeHG,
- };
- static void CreateHG(Hypergraph* hg);
- static void CreateHG_int(Hypergraph* hg);
- static void CreateHG_tiny(Hypergraph* hg);
- static void CreateHGBalanced(Hypergraph* hg);
- static void CreateLatticeHG(Hypergraph* hg);
- static void CreateTinyLatticeHG(Hypergraph* hg);
-
- static void Json(Hypergraph *hg,std::string const& json) {
- std::istringstream i(json);
- HypergraphIO::ReadFromJSON(&i, hg);
- }
+ static void CreateHG(const std::string& path,Hypergraph* hg);
+ static void CreateHG_int(const std::string& path,Hypergraph* hg);
+ static void CreateHG_tiny(const std::string& path, Hypergraph* hg);
+ static void CreateHGBalanced(const std::string& path,Hypergraph* hg);
+ static void CreateLatticeHG(const std::string& path,Hypergraph* hg);
+ static void CreateTinyLatticeHG(const std::string& path,Hypergraph* hg);
+
static void JsonFile(Hypergraph *hg,std::string f) {
ReadFile rf(f);
HypergraphIO::ReadFromJSON(rf.stream(), hg);
@@ -52,18 +40,6 @@ struct HGSetup {
static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); }
};
-namespace {
-Name HGjsons[]= {
- "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}",
-"{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| b\",3,\"[X] ||| a [1]\",4,\"[X] ||| [1] b\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[0,0.1],\"rule\":1},{\"tail\":[],\"feats\":[0,0.1],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X\"},\"edges\":[{\"tail\":[0],\"feats\":[0,0.3],\"rule\":3},{\"tail\":[0],\"feats\":[0,0.2],\"rule\":4}],\"node\":{\"in_edges\":[2,3],\"cat\":\"Goal\"}}",
- "{\"rules\":[1,\"[X] ||| <s>\",2,\"[X] ||| X [1]\",3,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[0,-2,1,-99],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.5,1,-0.8],\"rule\":2},{\"tail\":[0],\"feats\":[0,-0.7,1,-0.9],\"rule\":3}],\"node\":{\"in_edges\":[1,2]}}",
- "{\"rules\":[1,\"[X] ||| i\",2,\"[X] ||| a\",3,\"[X] ||| b\",4,\"[X] ||| [1] [2]\",5,\"[X] ||| [1] [2]\",6,\"[X] ||| c\",7,\"[X] ||| d\",8,\"[X] ||| [1] [2]\",9,\"[X] ||| [1] [2]\",10,\"[X] ||| [1] [2]\",11,\"[X] ||| [1] [2]\",12,\"[X] ||| [1] [2]\",13,\"[X] ||| [1] [2]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[1,2],\"feats\":[],\"rule\":4},{\"tail\":[2,1],\"feats\":[],\"rule\":5}],\"node\":{\"in_edges\":[3,4]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":6}],\"node\":{\"in_edges\":[5]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":7}],\"node\":{\"in_edges\":[6]},\"edges\":[{\"tail\":[4,5],\"feats\":[],\"rule\":8},{\"tail\":[5,4],\"feats\":[],\"rule\":9}],\"node\":{\"in_edges\":[7,8]},\"edges\":[{\"tail\":[3,6],\"feats\":[],\"rule\":10},{\"tail\":[6,3],\"feats\":[],\"rule\":11}],\"node\":{\"in_edges\":[9,10]},\"edges\":[{\"tail\":[7,0],\"feats\":[],\"rule\":12},{\"tail\":[0,7],\"feats\":[],\"rule\":13}],\"node\":{\"in_edges\":[11,12]}}",
- "{\"rules\":[1,\"[X] ||| [1] a\",2,\"[X] ||| [1] A\",3,\"[X] ||| [1] A A\",4,\"[X] ||| [1] b\",5,\"[X] ||| [1] c\",6,\"[X] ||| [1] B C\",7,\"[X] ||| [1] A B C\",8,\"[X] ||| [1] CC\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[],\"node\":{\"in_edges\":[]},\"edges\":[{\"tail\":[0],\"feats\":[2,-0.3],\"rule\":1},{\"tail\":[0],\"feats\":[2,-0.6],\"rule\":2},{\"tail\":[0],\"feats\":[2,-1.7],\"rule\":3}],\"node\":{\"in_edges\":[0,1,2]},\"edges\":[{\"tail\":[1],\"feats\":[2,-0.5],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[2],\"feats\":[2,-0.6],\"rule\":5},{\"tail\":[1],\"feats\":[2,-0.8],\"rule\":6},{\"tail\":[0],\"feats\":[2,-0.01],\"rule\":7},{\"tail\":[2],\"feats\":[2,-0.8],\"rule\":8}],\"node\":{\"in_edges\":[4,5,6,7]}}",
- "{\"rules\":[1,\"[X] ||| [1] a\",2,\"[X] ||| [1] A\",3,\"[X] ||| [1] b\",4,\"[X] ||| [1] B'\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[],\"node\":{\"in_edges\":[]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.2],\"rule\":1},{\"tail\":[0],\"feats\":[0,-0.6],\"rule\":2}],\"node\":{\"in_edges\":[0,1]},\"edges\":[{\"tail\":[1],\"feats\":[0,-0.1],\"rule\":3},{\"tail\":[1],\"feats\":[0,-0.9],\"rule\":4}],\"node\":{\"in_edges\":[2,3]}}",
-};
-
-}
-
void AddNullEdge(Hypergraph* hg) {
TRule x;
x.arity_ = 0;
@@ -71,31 +47,36 @@ void AddNullEdge(Hypergraph* hg) {
hg->edges_.back().head_node_ = 0;
}
-void HGSetup::CreateTinyLatticeHG(Hypergraph* hg) {
- Json(hg,HGjsons[TinyLatticeHG]);
+void HGSetup::CreateTinyLatticeHG(const std::string& path,Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.tiny_lattice");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
AddNullEdge(hg);
}
-void HGSetup::CreateLatticeHG(Hypergraph* hg) {
- Json(hg,HGjsons[LatticeHG]);
+void HGSetup::CreateLatticeHG(const std::string& path,Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.lattice");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
AddNullEdge(hg);
}
-void HGSetup::CreateHG_tiny(Hypergraph* hg) {
- Json(hg,HGjsons[HG_tiny]);
+void HGSetup::CreateHG_tiny(const std::string& path, Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.tiny");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
}
-void HGSetup::CreateHG_int(Hypergraph* hg) {
- Json(hg,HGjsons[HG_int]);
+void HGSetup::CreateHG_int(const std::string& path,Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.hg_int");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
}
-void HGSetup::CreateHG(Hypergraph* hg) {
- Json(hg,HGjsons[HG]);
+void HGSetup::CreateHG(const std::string& path,Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.hg");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
}
-void HGSetup::CreateHGBalanced(Hypergraph* hg) {
- Json(hg,HGjsons[HGBalanced]);
+void HGSetup::CreateHGBalanced(const std::string& path,Hypergraph* hg) {
+ ReadFile rf(path + "/hg_test.hg_balanced");
+ HypergraphIO::ReadFromJSON(rf.stream(), hg);
}
-
#endif
diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h
index f844e5b2..e15c056d 100644
--- a/decoder/rule_lexer.h
+++ b/decoder/rule_lexer.h
@@ -9,6 +9,7 @@
struct RuleLexer {
typedef void (*RuleCallback)(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra);
static void ReadRules(std::istream* in, RuleCallback func, const std::string& fname, void* extra);
+ static void ReadRule(const std::string&, RuleCallback func, bool mono_rule, void* extra);
};
#endif
diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll
index cc73c079..d4a8d86b 100644
--- a/decoder/rule_lexer.ll
+++ b/decoder/rule_lexer.ll
@@ -14,6 +14,7 @@
#include "verbose.h"
#include "tree_fragment.h"
+bool lex_mono_rules = false;
int lex_line = 0;
std::istream* scfglex_stream = NULL;
RuleLexer::RuleCallback rule_callback = NULL;
@@ -120,7 +121,7 @@ void check_and_update_ctf_stack(const TRulePtr& rp) {
%}
REAL [\-+]?[0-9]+(\.[0-9]*)?([eE][-+]*[0-9]+)?
-NT [^\t \[\],]+
+NT ([^\t \n\r\[\],]+|Goal)
%x LHS_END SRC TRG FEATS FEATVAL ALIGNS TREE
%%
@@ -132,7 +133,7 @@ NT [^\t \[\],]+
<INITIAL>\[{NT}\] {
scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
scfglex_lhs = -TD::Convert(scfglex_tmp_token);
- // std::cerr << scfglex_tmp_token << "\n";
+ //std::cerr << "LHS: " << scfglex_tmp_token << "\n";
BEGIN(LHS_END);
}
@@ -199,9 +200,9 @@ NT [^\t \[\],]+
<SRC>\|\|\| {
memset(scfglex_nt_sanity, 0, scfglex_src_arity * sizeof(int));
- BEGIN(TRG);
+ if (lex_mono_rules) { BEGIN(FEATS); } else { BEGIN(TRG); }
}
-<SRC>[^ \t]+ {
+<SRC>[^ \t\n\r]+ {
scfglex_tmp_token.assign(yytext, yyleng);
scfglex_src_rhs[scfglex_src_rhs_size] = TD::Convert(scfglex_tmp_token);
++scfglex_src_rhs_size;
@@ -217,14 +218,28 @@ NT [^\t \[\],]+
<TRG>\|\|\| {
BEGIN(FEATS);
}
-<TRG>[^ \t]+ {
+<TRG>[^ \t\n\r]+ {
scfglex_tmp_token.assign(yytext, yyleng);
scfglex_trg_rhs[scfglex_trg_rhs_size] = TD::Convert(scfglex_tmp_token);
++scfglex_trg_rhs_size;
}
<TRG>[ \t]+ { ; }
-<TRG,FEATS,ALIGNS,TREE>\n {
+<SRC,TRG,FEATS,ALIGNS,TREE>\n {
+ if (lex_mono_rules) {
+ if (scfglex_trg_rhs_size != 0) {
+ std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": expected monolingual rule\n";
+ abort();
+ }
+ scfglex_trg_arity = scfglex_src_arity;
+ scfglex_trg_rhs_size = scfglex_src_rhs_size;
+ int ntc = 0;
+ for (int i = 0; i < scfglex_src_rhs_size; ++i)
+ if (scfglex_trg_rhs[i] <= 0)
+ scfglex_trg_rhs[i] = ntc--;
+ else
+ scfglex_trg_rhs[i] = scfglex_src_rhs[i];
+ }
if (scfglex_src_arity != scfglex_trg_arity) {
std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": LHS and RHS arity mismatch!\n";
abort();
@@ -243,7 +258,7 @@ NT [^\t \[\],]+
TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
rule_callback(rp, ctf_level, coarse_rp, rule_callback_extra);
ctf_rule_stack.push(rp);
- // std::cerr << rp->AsString() << std::endl;
+ //std::cerr << "RULE: " << rp->AsString() << std::endl;
num_rules++;
lex_line++;
if (!SILENT) {
@@ -317,7 +332,7 @@ NT [^\t \[\],]+
#include "filelib.h"
-void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) {
+static void init_default_feature_names() {
if (scfglex_phrase_fnames.empty()) {
scfglex_phrase_fnames.resize(100);
for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) {
@@ -326,6 +341,11 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const
scfglex_phrase_fnames[i] = FD::Convert(os.str());
}
}
+}
+
+void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) {
+ init_default_feature_names();
+ lex_mono_rules = false;
lex_line = 1;
scfglex_fname = fname;
scfglex_stream = in;
@@ -334,3 +354,14 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const
yylex();
}
+void RuleLexer::ReadRule(const std::string& srule, RuleCallback func, bool mono, void* extra) {
+ init_default_feature_names();
+ lex_mono_rules = mono;
+ lex_line = 1;
+ rule_callback_extra = extra;
+ rule_callback = func;
+ yy_scan_string(srule.c_str());
+ yylex();
+ yylex_destroy();
+}
+
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index 159a1d60..88f62769 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -47,7 +47,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const
TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]"));
AddRule(stop_glue);
RefineRule(stop_glue, ctf_level);
- TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1"));
+ TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + "] ["+ default_nt + "] ||| [1] [2] ||| Glue=1"));
AddRule(glue);
RefineRule(glue, ctf_level);
}
diff --git a/decoder/test_data/hg_test.hg b/decoder/test_data/hg_test.hg
new file mode 100644
index 00000000..ef98e9d4
--- /dev/null
+++ b/decoder/test_data/hg_test.hg
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| a ||| a",2,"[X] ||| A [X] ||| A [1]",3,"[X] ||| c ||| c",4,"[X] ||| C [X] ||| C [1]",5,"[X] ||| [X] B [X] ||| [1] B [2]",6,"[X] ||| [X] b [X] ||| [1] b [2]",7,"[X] ||| X [X] ||| X [1]",8,"[X] ||| Z [X] ||| Z [1]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[24568,32767,24568,32767],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[0],"spans":[24568,32767,24568,32767],"feats":[0,-0.8,1,-0.1],"rule":2}],"node":{"in_edges":[1],"cat":"X"},"edges":[{"tail":[],"spans":[24568,32767,24568,32767],"feats":[1,-1],"rule":3}],"node":{"in_edges":[2],"cat":"X"},"edges":[{"tail":[2],"spans":[24568,32767,24568,32767],"feats":[0,-0.2,1,-0.1],"rule":4}],"node":{"in_edges":[3],"cat":"X"},"edges":[{"tail":[1,3],"spans":[24568,32767,24568,32767],"feats":[0,-1.2,1,-0.2],"rule":5},{"tail":[1,3],"spans":[24568,32767,24568,32767],"feats":[0,-0.5,1,-1.3],"rule":6}],"node":{"in_edges":[4,5],"cat":"X"},"edges":[{"tail":[4],"spans":[24568,32767,24568,32767],"feats":[0,-0.5,1,-0.8],"rule":7},{"tail":[4],"spans":[24568,32767,24568,32767],"feats":[0,-0.7,1,-0.9],"rule":8}],"node":{"in_edges":[6,7],"cat":"X"}}
diff --git a/decoder/test_data/hg_test.hg_balanced b/decoder/test_data/hg_test.hg_balanced
new file mode 100644
index 00000000..0f0f499f
--- /dev/null
+++ b/decoder/test_data/hg_test.hg_balanced
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| i ||| i",2,"[X] ||| a ||| a",3,"[X] ||| b ||| b",4,"[X] ||| [X] [X] ||| [1] [2]",5,"[X] ||| [X] [X] ||| [1] [2]",6,"[X] ||| c ||| c",7,"[X] ||| d ||| d",8,"[X] ||| [X] [X] ||| [1] [2]",9,"[X] ||| [X] [X] ||| [1] [2]",10,"[X] ||| [X] [X] ||| [1] [2]",11,"[X] ||| [X] [X] ||| [1] [2]",12,"[X] ||| [X] [X] ||| [1] [2]",13,"[X] ||| [X] [X] ||| [1] [2]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":2}],"node":{"in_edges":[1],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":3}],"node":{"in_edges":[2],"cat":"X"},"edges":[{"tail":[1,2],"spans":[32760,32767,32760,32767],"feats":[],"rule":4},{"tail":[2,1],"spans":[32760,32767,32760,32767],"feats":[],"rule":5}],"node":{"in_edges":[3,4],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":6}],"node":{"in_edges":[5],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":7}],"node":{"in_edges":[6],"cat":"X"},"edges":[{"tail":[4,5],"spans":[32760,32767,32760,32767],"feats":[],"rule":8},{"tail":[5,4],"spans":[32760,32767,32760,32767],"feats":[],"rule":9}],"node":{"in_edges":[7,8],"cat":"X"},"edges":[{"tail":[3,6],"spans":[32760,32767,32760,32767],"feats":[],"rule":10},{"tail":[6,3],"spans":[32760,32767,32760,32767],"feats":[],"rule":11}],"node":{"in_edges":[9,10],"cat":"X"},"edges":[{"tail":[7,0],"spans":[32760,32767,32760,32767],"feats":[],"rule":12},{"tail":[0,7],"spans":[32760,32767,32760,32767],"feats":[],"rule":13}],"node":{"in_edges":[11,12],"cat":"X"}}
diff --git a/decoder/test_data/hg_test.hg_int b/decoder/test_data/hg_test.hg_int
new file mode 100644
index 00000000..9c4603bc
--- /dev/null
+++ b/decoder/test_data/hg_test.hg_int
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| a ||| a",2,"[X] ||| b ||| b",3,"[X] ||| a [X] ||| a [1]",4,"[X] ||| [X] b ||| [1] b"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[-8200,32767,-8200,32767],"feats":[0,0.1],"rule":1},{"tail":[],"spans":[-8200,32767,-8200,32767],"feats":[0,0.1],"rule":2}],"node":{"in_edges":[0,1],"cat":"X"},"edges":[{"tail":[0],"spans":[-8200,32767,-8200,32767],"feats":[0,0.3],"rule":3},{"tail":[0],"spans":[-8200,32767,-8200,32767],"feats":[0,0.2],"rule":4}],"node":{"in_edges":[2,3],"cat":"Goal"}}
diff --git a/decoder/test_data/hg_test.lattice b/decoder/test_data/hg_test.lattice
new file mode 100644
index 00000000..29e021c5
--- /dev/null
+++ b/decoder/test_data/hg_test.lattice
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| [X] a ||| [1] a",2,"[X] ||| [X] A ||| [1] A",3,"[X] ||| [X] A A ||| [1] A A",4,"[X] ||| [X] b ||| [1] b",5,"[X] ||| [X] c ||| [1] c",6,"[X] ||| [X] B C ||| [1] B C",7,"[X] ||| [X] A B C ||| [1] A B C",8,"[X] ||| [X] CC ||| [1] CC"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7"],"edges":[],"node":{"in_edges":[]},"edges":[{"tail":[0],"feats":[2,-0.3],"rule":1},{"tail":[0],"feats":[2,-0.6],"rule":2},{"tail":[0],"feats":[2,-1.7],"rule":3}],"node":{"in_edges":[0,1,2]},"edges":[{"tail":[1],"feats":[2,-0.5],"rule":4}],"node":{"in_edges":[3]},"edges":[{"tail":[2],"feats":[2,-0.6],"rule":5},{"tail":[1],"feats":[2,-0.8],"rule":6},{"tail":[0],"feats":[2,-0.01],"rule":7},{"tail":[2],"feats":[2,-0.8],"rule":8}],"node":{"in_edges":[4,5,6,7]}}"
diff --git a/decoder/test_data/hg_test.tiny b/decoder/test_data/hg_test.tiny
new file mode 100644
index 00000000..101b96e9
--- /dev/null
+++ b/decoder/test_data/hg_test.tiny
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| <s> ||| <s>",2,"[X] ||| X [X] ||| X [1]",3,"[X] ||| Z [X] ||| Z [1]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[25080,32767,25080,32767],"feats":[0,-2,1,-99],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[0],"spans":[25080,32767,25080,32767],"feats":[0,-0.5,1,-0.8],"rule":2},{"tail":[0],"spans":[25080,32767,25080,32767],"feats":[0,-0.7,1,-0.9],"rule":3}],"node":{"in_edges":[1,2],"cat":"X"}}
diff --git a/decoder/test_data/hg_test.tiny_lattice b/decoder/test_data/hg_test.tiny_lattice
new file mode 100644
index 00000000..b9adf3cd
--- /dev/null
+++ b/decoder/test_data/hg_test.tiny_lattice
@@ -0,0 +1 @@
+{"rules":[1,"[X] ||| [X] a ||| [1] a",2,"[X] ||| [X] A ||| [1] A",3,"[X] ||| [X] b ||| [1] b",4,"[X] ||| [X] B' ||| [1] B'"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7"],"edges":[],"node":{"in_edges":[]},"edges":[{"tail":[0],"feats":[0,-0.2],"rule":1},{"tail":[0],"feats":[0,-0.6],"rule":2}],"node":{"in_edges":[0,1]},"edges":[{"tail":[1],"feats":[0,-0.1],"rule":3},{"tail":[1],"feats":[0,-0.9],"rule":4}],"node":{"in_edges":[2,3]}}
diff --git a/decoder/test_data/small.json.gz b/decoder/test_data/small.json.gz
index 892ba360..f6f37293 100644
--- a/decoder/test_data/small.json.gz
+++ b/decoder/test_data/small.json.gz
Binary files differ
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index 4cd584fb..f288ab4e 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -174,6 +174,7 @@ struct Tree2StringTranslatorImpl {
q.push(ParserState(input_tree.begin(), g.get()));
unique.insert(q.back());
}
+ if (q.size() == 0) return false;
unsigned tree_top = q.front().input_node_idx;
while(!q.empty()) {
ParserState& s = q.front();
diff --git a/decoder/trule.cc b/decoder/trule.cc
index c22baae3..1bd5425f 100644
--- a/decoder/trule.cc
+++ b/decoder/trule.cc
@@ -17,73 +17,16 @@ bool TRule::IsGoal() const {
return GetLHS() == kGOAL;
}
-static WordID ConvertTrgString(const string& w) {
- const unsigned len = w.size();
- WordID id = 0;
- // [X,0] or [0]
- // for target rules, we ignore the category, just keep the index
- if (len > 2 && w[0]=='[' && w[len-1]==']' && w[len-2] > '0' && w[len-2] <= '9' &&
- (len == 3 || (len > 4 && w[len-3] == ','))) {
- id = w[len-2] - '0';
- id = 1 - id;
- } else {
- id = TD::Convert(w);
- }
- return id;
-}
-
-static WordID ConvertSrcString(const string& w, bool mono = false) {
- const unsigned len = w.size();
- // [X,0]
- // for source rules, we keep the category and ignore the index (source rules are
- // always numbered 1, 2, 3...
- if (mono) {
- if (len > 2 && w[0]=='[' && w[len-1]==']') {
- if (len > 4 && w[len-3] == ',') {
- cerr << "[ERROR] Monolingual rules mut not have non-terminal indices:\n "
- << w << endl;
- exit(1);
- }
- // TODO check that source indices go 1,2,3,etc.
- return TD::Convert(w.substr(1, len-2)) * -1;
- } else {
- return TD::Convert(w);
- }
- } else {
- if (len > 4 && w[0]=='[' && w[len-1]==']' && w[len-3] == ',' && w[len-2] > '0' && w[len-2] <= '9') {
- return TD::Convert(w.substr(1, len-4)) * -1;
- } else {
- return TD::Convert(w);
- }
- }
-}
-
-static WordID ConvertLHS(const string& w) {
- if (w[0] == '[') {
- const unsigned len = w.size();
- if (len < 3) { cerr << "Format error: " << w << endl; exit(1); }
- return TD::Convert(w.substr(1, len-2)) * -1;
- } else {
- return TD::Convert(w) * -1;
- }
-}
-
TRule* TRule::CreateRuleSynchronous(const string& rule) {
TRule* res = new TRule;
- if (res->ReadFromString(rule, true, false)) return res;
+ if (res->ReadFromString(rule)) return res;
cerr << "[ERROR] Failed to creating rule from: " << rule << endl;
delete res;
return NULL;
}
TRule* TRule::CreateRulePhrasetable(const string& rule) {
- // TODO make this faster
- // TODO add configuration for default NT type
- if (rule[0] == '[') {
- cerr << "Phrasetable rules shouldn't have a LHS / non-terminals:\n " << rule << endl;
- return NULL;
- }
- TRule* res = new TRule("[X] ||| " + rule, true, false);
+ TRule* res = new TRule("[X] ||| " + rule);
if (res->Arity() != 0) {
cerr << "Phrasetable rules should have arity 0:\n " << rule << endl;
delete res;
@@ -93,138 +36,27 @@ TRule* TRule::CreateRulePhrasetable(const string& rule) {
}
TRule* TRule::CreateRuleMonolingual(const string& rule) {
- return new TRule(rule, false, true);
+ return new TRule(rule, true);
}
namespace {
-// callback for lexer
+// callback for single rule lexer
int n_assigned=0;
-void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
- (void) ctf_level;
- (void) coarse_rule;
- TRule *assignto=(TRule *)extra;
- *assignto=*new_rule;
- ++n_assigned;
-}
-
-}
-
-bool TRule::ReadFromString(const string& line, bool strict, bool mono) {
- if (!is_single_line_stripped(line))
- cerr<<"\nWARNING: building rule from multi-line string "<<line<<".\n";
- // backed off of this: it's failing to parse TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); thinks [1] is the features!
- if (false && !(mono||strict)) {
- // use lexer
- istringstream il(line);
- n_assigned=0;
- RuleLexer::ReadRules(&il,assign_trule,"STRING",this);
- if (n_assigned>1)
- cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n";
- return n_assigned;
- }
-
- e_.clear();
- f_.clear();
- scores_.clear();
-
- string w;
- istringstream is(line);
- int format = CountSubstrings(line, "|||");
- if (strict && format < 2) {
- cerr << "Bad rule format in strict mode:\n" << line << endl;
- return false;
+ void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) {
+ (void) ctf_level;
+ (void) coarse_rule;
+ *static_cast<TRule*>(extra) = *new_rule;
+ ++n_assigned;
}
- if (format >= 2 || (mono && format == 1)) {
- while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); }
- while(is>>w && w!="|||") { f_.push_back(ConvertSrcString(w, mono)); }
- if (!mono) {
- while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); }
- }
- int fv = 0;
- if (is) {
- string ss;
- getline(is, ss);
- //cerr << "L: " << ss << endl;
- unsigned start = 0;
- unsigned len = ss.size();
- const size_t ppos = ss.find(" |||");
- if (ppos != string::npos) { len = ppos; }
- while (start < len) {
- while(start < len && (ss[start] == ' ' || ss[start] == ';'))
- ++start;
- if (start == len) break;
- unsigned end = start + 1;
- while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';'))
- ++end;
- if (end == len || ss[end] == ' ' || ss[end] == ';') {
- //cerr << "PROC: '" << ss.substr(start, end - start) << "'\n";
- // non-named features
- if (end != len) { ss[end] = 0; }
- string fname = "PhraseModel_X";
- if (fv > 9) { cerr << "Too many phrasetable scores - used named format\n"; abort(); }
- fname[12]='0' + fv;
- ++fv;
- // if the feature set is frozen, this may return zero, indicating an
- // undefined feature
- const int fid = FD::Convert(fname);
- if (fid)
- scores_.set_value(fid, atof(&ss[start]));
- //cerr << "F: " << fname << " VAL=" << scores_.value(FD::Convert(fname)) << endl;
- } else {
- const int fid = FD::Convert(ss.substr(start, end - start));
- start = end + 1;
- end = start + 1;
- while(end < len && (ss[end] != ' ' && ss[end] != ';'))
- ++end;
- if (end < len) { ss[end] = 0; }
- assert(start < len);
- if (fid)
- scores_.set_value(fid, atof(&ss[start]));
- //cerr << "F: " << FD::Convert(fid) << " VAL=" << scores_.value(fid) << endl;
- }
- start = end + 1;
- }
- }
- } else if (format == 1) {
- while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); }
- while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); }
- f_ = e_;
- int x = ConvertLHS("[X]");
- for (unsigned i = 0; i < f_.size(); ++i)
- if (f_[i] <= 0) { f_[i] = x; }
- } else {
- cerr << "F: " << format << endl;
- cerr << "[ERROR] Don't know how to read:\n" << line << endl;
- }
- if (mono) {
- e_ = f_;
- int ci = 0;
- for (unsigned i = 0; i < e_.size(); ++i)
- if (e_[i] < 0)
- e_[i] = ci--;
- }
- ComputeArity();
- return SanityCheck();
}
-bool TRule::SanityCheck() const {
- vector<int> used(f_.size(), 0);
- int ac = 0;
- for (unsigned i = 0; i < e_.size(); ++i) {
- int ind = e_[i];
- if (ind > 0) continue;
- ind = -ind;
- if ((++used[ind]) != 1) {
- cerr << "[ERROR] e-side variable index " << (ind+1) << " used more than once!\n";
- return false;
- }
- ac++;
- }
- if (ac != Arity()) {
- cerr << "[ERROR] e-side arity mismatches f-side\n";
- return false;
- }
- return true;
+bool TRule::ReadFromString(const string& line, bool mono) {
+ n_assigned = 0;
+ //cerr << "LINE: " << line << " -- mono=" << mono << endl;
+ RuleLexer::ReadRule(line + '\n', assign_trule, mono, this);
+ if (n_assigned > 1)
+ cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n";
+ return n_assigned;
}
void TRule::ComputeArity() {
@@ -245,7 +77,7 @@ string TRule::AsString(bool verbose) const {
if (w < 0) {
int wi = w * -1;
++idx;
- os << " [" << TD::Convert(wi) << ',' << idx << ']';
+ os << " [" << TD::Convert(wi) << ']';
} else {
os << ' ' << TD::Convert(w);
}
diff --git a/decoder/trule.h b/decoder/trule.h
index e9a10bea..7dced5a1 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -51,23 +51,18 @@ class TRule {
TRule(const TRule& other) :
e_(other.e_), f_(other.f_), lhs_(other.lhs_), scores_(other.scores_), arity_(other.arity_), prev_i(-1), prev_j(-1), a_(other.a_) {}
- // if mono or strict is true, then lexer won't be used, and //FIXME: > 9 variables won't work
- explicit TRule(const std::string& text, bool strict = false, bool mono = false) : prev_i(-1), prev_j(-1) {
- ReadFromString(text, strict, mono);
+ explicit TRule(const std::string& text, bool mono = false) : prev_i(-1), prev_j(-1) {
+ ReadFromString(text, mono);
}
- // deprecated, use lexer
// make a rule from a hiero-like rule table, e.g.
// [X] ||| [X,1] DE [X,2] ||| [X,2] of the [X,1]
- // if misformatted, returns NULL
static TRule* CreateRuleSynchronous(const std::string& rule);
- // deprecated, use lexer
// make a rule from a phrasetable entry (i.e., one that has no LHS type), e.g:
// el gato ||| the cat ||| Feature_2=0.34
static TRule* CreateRulePhrasetable(const std::string& rule);
- // deprecated, use lexer
// make a rule from a non-synchrnous CFG representation, e.g.:
// [LHS] ||| term1 [NT] term2 [OTHER_NT] [YET_ANOTHER_NT]
static TRule* CreateRuleMonolingual(const std::string& rule);
@@ -80,11 +75,10 @@ class TRule {
std::vector<WordID>* result) const {
unsigned vc = 0;
result->clear();
- for (std::vector<WordID>::const_iterator i = e_.begin(); i != e_.end(); ++i) {
- const WordID& c = *i;
+ for (const auto& c : e_) {
if (c < 1) {
++vc;
- const std::vector<WordID>& var_value = *var_values[-c];
+ const auto& var_value = *var_values[-c];
std::copy(var_value.begin(),
var_value.end(),
std::back_inserter(*result));
@@ -99,10 +93,9 @@ class TRule {
std::vector<WordID>* result) const {
unsigned vc = 0;
result->clear();
- for (std::vector<WordID>::const_iterator i = f_.begin(); i != f_.end(); ++i) {
- const WordID& c = *i;
+ for (const auto& c : f_) {
if (c < 1) {
- const std::vector<WordID>& var_value = *var_values[vc++];
+ const auto& var_value = *var_values[vc++];
std::copy(var_value.begin(),
var_value.end(),
std::back_inserter(*result));
@@ -113,7 +106,7 @@ class TRule {
assert(vc == var_values.size());
}
- bool ReadFromString(const std::string& line, bool strict = false, bool monolingual = false);
+ bool ReadFromString(const std::string& line, bool monolingual = false);
bool Initialized() const { return e_.size(); }
@@ -166,7 +159,6 @@ class TRule {
private:
TRule(const WordID& src, const WordID& trg) : e_(1, trg), f_(1, src), lhs_(), arity_(), prev_i(), prev_j() {}
- bool SanityCheck() const;
};
inline size_t hash_value(const TRule& r) {