diff options
author | Chris Dyer <redpony@gmail.com> | 2014-04-07 00:54:52 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-04-07 00:54:52 -0400 |
commit | cb85bfbe908f6001a92b0af99ea03f369416059e (patch) | |
tree | d4635f2fb73b0991d593c59f68b0de0773f3d499 | |
parent | 54e527dbce04fdac6a5dc30c491b53d405d42931 (diff) |
clean up dead TRule code
-rw-r--r-- | decoder/bottom_up_parser.cc | 2 | ||||
-rw-r--r-- | decoder/grammar_test.cc | 6 | ||||
-rw-r--r-- | decoder/hg_test.cc | 60 | ||||
-rw-r--r-- | decoder/hg_test.h | 69 | ||||
-rw-r--r-- | decoder/rule_lexer.h | 1 | ||||
-rw-r--r-- | decoder/rule_lexer.ll | 47 | ||||
-rw-r--r-- | decoder/scfg_translator.cc | 2 | ||||
-rw-r--r-- | decoder/test_data/hg_test.hg | 1 | ||||
-rw-r--r-- | decoder/test_data/hg_test.hg_balanced | 1 | ||||
-rw-r--r-- | decoder/test_data/hg_test.hg_int | 1 | ||||
-rw-r--r-- | decoder/test_data/hg_test.lattice | 1 | ||||
-rw-r--r-- | decoder/test_data/hg_test.tiny | 1 | ||||
-rw-r--r-- | decoder/test_data/hg_test.tiny_lattice | 1 | ||||
-rw-r--r-- | decoder/test_data/small.json.gz | bin | 1561 -> 1733 bytes | |||
-rw-r--r-- | decoder/tree2string_translator.cc | 1 | ||||
-rw-r--r-- | decoder/trule.cc | 202 | ||||
-rw-r--r-- | decoder/trule.h | 22 | ||||
-rw-r--r-- | training/dpmert/lo_test.cc | 2 |
18 files changed, 142 insertions, 278 deletions
diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 8738c8f1..ff4c7a90 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -159,7 +159,7 @@ PassiveChart::PassiveChart(const string& goal, chart_(input.size()+1, input.size()+1), nodemap_(input.size()+1, input.size()+1), goal_cat_(TD::Convert(goal) * -1), - goal_rule_(new TRule("[Goal] ||| [" + goal + ",1] ||| [" + goal + ",1]")), + goal_rule_(new TRule("[Goal] ||| [" + goal + "] ||| [1]")), goal_idx_(-1), lc_fid_(FD::Convert("LatticeCost")), unaries_() { diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index 6d2c6e67..69240139 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -33,9 +33,9 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) { ModelSet models(w, ms); TextGrammar g; - TRulePtr r1(new TRule("[X] ||| a b c ||| A B C ||| 0.1 0.2 0.3", true)); - TRulePtr r2(new TRule("[X] ||| a b c ||| 1 2 3 ||| 0.2 0.3 0.4", true)); - TRulePtr r3(new TRule("[X] ||| a b c d ||| A B C D ||| 0.1 0.2 0.3", true)); + TRulePtr r1(new TRule("[X] ||| a b c ||| A B C ||| 0.1 0.2 0.3")); + TRulePtr r2(new TRule("[X] ||| a b c ||| 1 2 3 ||| 0.2 0.3 0.4")); + TRulePtr r3(new TRule("[X] ||| a b c d ||| A B C D ||| 0.1 0.2 0.3")); cerr << r1->AsString() << endl; g.AddRule(r1); g.AddRule(r2); diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 8519e559..95cfae51 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -18,8 +18,10 @@ using namespace std; BOOST_FIXTURE_TEST_SUITE( s, HGSetup ); BOOST_AUTO_TEST_CASE(Controlled) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); + cerr << "PATH: " << path << "/hg.tiny\n"; Hypergraph hg; - CreateHG_tiny(&hg); + CreateHG_tiny(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -37,10 +39,11 @@ BOOST_AUTO_TEST_CASE(Controlled) { } BOOST_AUTO_TEST_CASE(Union) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg1; Hypergraph hg2; - CreateHG_tiny(&hg1); - CreateHG(&hg2); + CreateHG_tiny(path, &hg1); + CreateHG(path, &hg2); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 1.0); @@ -84,8 +87,9 @@ BOOST_AUTO_TEST_CASE(Union) { } BOOST_AUTO_TEST_CASE(ControlledKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); vector<double> w(2); w[0]=0.4; w[1]=0.8; hg.Reweight(w); vector<WordID> trans; @@ -107,10 +111,11 @@ BOOST_AUTO_TEST_CASE(ControlledKBest) { BOOST_AUTO_TEST_CASE(InsideScore) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 1.0); Hypergraph hg; - CreateTinyLatticeHG(&hg); + CreateTinyLatticeHG(path, &hg); hg.Reweight(wts); vector<WordID> trans; prob_t cost = ViterbiESentence(hg, &trans); @@ -130,10 +135,11 @@ BOOST_AUTO_TEST_CASE(InsideScore) { BOOST_AUTO_TEST_CASE(PruneInsideOutside) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); SparseVector<double> wts; wts.set_value(FD::Convert("Feature_1"), 1.0); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); hg.Reweight(wts); vector<WordID> trans; prob_t cost = ViterbiESentence(hg, &trans); @@ -152,8 +158,9 @@ BOOST_AUTO_TEST_CASE(PruneInsideOutside) { } BOOST_AUTO_TEST_CASE(TestPruneEdges) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -166,8 +173,9 @@ BOOST_AUTO_TEST_CASE(TestPruneEdges) { } BOOST_AUTO_TEST_CASE(TestIntersect) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_int(&hg); + CreateHG_int(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -192,8 +200,9 @@ BOOST_AUTO_TEST_CASE(TestIntersect) { } BOOST_AUTO_TEST_CASE(TestPrune2) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_int(&hg); + CreateHG_int(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -207,8 +216,9 @@ BOOST_AUTO_TEST_CASE(TestPrune2) { } BOOST_AUTO_TEST_CASE(Sample) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("Feature_1"), 0.0); hg.Reweight(wts); @@ -220,6 +230,7 @@ BOOST_AUTO_TEST_CASE(Sample) { } BOOST_AUTO_TEST_CASE(PLF) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; string inplf = "((('haupt',-2.06655,1),('hauptgrund',-5.71033,2),),(('grund',-1.78709,1),),(('für\\'',0.1,1),),)"; HypergraphIO::ReadFromPLF(inplf, &hg); @@ -234,8 +245,9 @@ BOOST_AUTO_TEST_CASE(PLF) { } BOOST_AUTO_TEST_CASE(PushWeightsToGoal) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); vector<double> w(2); w[0]=0.4; w[1]=0.8; hg.Reweight(w); vector<WordID> trans; @@ -248,8 +260,9 @@ BOOST_AUTO_TEST_CASE(PushWeightsToGoal) { } BOOST_AUTO_TEST_CASE(TestSpecialKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHGBalanced(&hg); + CreateHGBalanced(path, &hg); vector<double> w(1); w[0]=0; hg.Reweight(w); vector<pair<vector<WordID>, prob_t> > list; @@ -264,8 +277,9 @@ BOOST_AUTO_TEST_CASE(TestSpecialKBest) { } BOOST_AUTO_TEST_CASE(TestGenericViterbi) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_tiny(&hg); + CreateHG_tiny(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -279,8 +293,9 @@ BOOST_AUTO_TEST_CASE(TestGenericViterbi) { } BOOST_AUTO_TEST_CASE(TestGenericInside) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateTinyLatticeHG(&hg); + CreateTinyLatticeHG(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -296,8 +311,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside) { } BOOST_AUTO_TEST_CASE(TestGenericInside2) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -322,8 +338,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside2) { } BOOST_AUTO_TEST_CASE(TestAddExpectations) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -338,8 +355,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) { } BOOST_AUTO_TEST_CASE(Small) { - Hypergraph hg; std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); + Hypergraph hg; CreateSmallHG(&hg, path); SparseVector<double> wts; wts.set_value(FD::Convert("Model_0"), -2.0); @@ -361,6 +378,7 @@ BOOST_AUTO_TEST_CASE(Small) { } BOOST_AUTO_TEST_CASE(JSONTest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); ostringstream os; JSONParser::WriteEscapedString("\"I don't know\", she said.", &os); BOOST_CHECK_EQUAL("\"\\\"I don't know\\\", she said.\"", os.str()); @@ -370,9 +388,10 @@ BOOST_AUTO_TEST_CASE(JSONTest) { } BOOST_AUTO_TEST_CASE(TestGenericKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); - //CreateHGBalanced(&hg); + CreateHG(path, &hg); + //CreateHGBalanced(path, &hg); SparseVector<double> wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 1.0); @@ -392,8 +411,9 @@ BOOST_AUTO_TEST_CASE(TestGenericKBest) { } BOOST_AUTO_TEST_CASE(TestReadWriteHG) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg,hg2; - CreateHG(&hg); + CreateHG(path, &hg); hg.edges_.front().j_ = 23; hg.edges_.back().prev_i_ = 99; ostringstream os; diff --git a/decoder/hg_test.h b/decoder/hg_test.h index e96cb0b1..b7bab3c2 100644 --- a/decoder/hg_test.h +++ b/decoder/hg_test.h @@ -23,25 +23,13 @@ Name perro_wts="SameFirstLetter 1 LongerThanPrev 1 ShorterThanPrev 1 GlueTop 0.0 // you can inherit from this or just use the static methods struct HGSetup { - enum { - HG, - HG_int, - HG_tiny, - HGBalanced, - LatticeHG, - TinyLatticeHG, - }; - static void CreateHG(Hypergraph* hg); - static void CreateHG_int(Hypergraph* hg); - static void CreateHG_tiny(Hypergraph* hg); - static void CreateHGBalanced(Hypergraph* hg); - static void CreateLatticeHG(Hypergraph* hg); - static void CreateTinyLatticeHG(Hypergraph* hg); - - static void Json(Hypergraph *hg,std::string const& json) { - std::istringstream i(json); - HypergraphIO::ReadFromJSON(&i, hg); - } + static void CreateHG(const std::string& path,Hypergraph* hg); + static void CreateHG_int(const std::string& path,Hypergraph* hg); + static void CreateHG_tiny(const std::string& path, Hypergraph* hg); + static void CreateHGBalanced(const std::string& path,Hypergraph* hg); + static void CreateLatticeHG(const std::string& path,Hypergraph* hg); + static void CreateTinyLatticeHG(const std::string& path,Hypergraph* hg); + static void JsonFile(Hypergraph *hg,std::string f) { ReadFile rf(f); HypergraphIO::ReadFromJSON(rf.stream(), hg); @@ -52,18 +40,6 @@ struct HGSetup { static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); } }; -namespace { -Name HGjsons[]= { - "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}", -"{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| b\",3,\"[X] ||| a [1]\",4,\"[X] ||| [1] b\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[0,0.1],\"rule\":1},{\"tail\":[],\"feats\":[0,0.1],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X\"},\"edges\":[{\"tail\":[0],\"feats\":[0,0.3],\"rule\":3},{\"tail\":[0],\"feats\":[0,0.2],\"rule\":4}],\"node\":{\"in_edges\":[2,3],\"cat\":\"Goal\"}}", - "{\"rules\":[1,\"[X] ||| <s>\",2,\"[X] ||| X [1]\",3,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[0,-2,1,-99],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.5,1,-0.8],\"rule\":2},{\"tail\":[0],\"feats\":[0,-0.7,1,-0.9],\"rule\":3}],\"node\":{\"in_edges\":[1,2]}}", - "{\"rules\":[1,\"[X] ||| i\",2,\"[X] ||| a\",3,\"[X] ||| b\",4,\"[X] ||| [1] [2]\",5,\"[X] ||| [1] [2]\",6,\"[X] ||| c\",7,\"[X] ||| d\",8,\"[X] ||| [1] [2]\",9,\"[X] ||| [1] [2]\",10,\"[X] ||| [1] [2]\",11,\"[X] ||| [1] [2]\",12,\"[X] ||| [1] [2]\",13,\"[X] ||| [1] [2]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[1,2],\"feats\":[],\"rule\":4},{\"tail\":[2,1],\"feats\":[],\"rule\":5}],\"node\":{\"in_edges\":[3,4]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":6}],\"node\":{\"in_edges\":[5]},\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":7}],\"node\":{\"in_edges\":[6]},\"edges\":[{\"tail\":[4,5],\"feats\":[],\"rule\":8},{\"tail\":[5,4],\"feats\":[],\"rule\":9}],\"node\":{\"in_edges\":[7,8]},\"edges\":[{\"tail\":[3,6],\"feats\":[],\"rule\":10},{\"tail\":[6,3],\"feats\":[],\"rule\":11}],\"node\":{\"in_edges\":[9,10]},\"edges\":[{\"tail\":[7,0],\"feats\":[],\"rule\":12},{\"tail\":[0,7],\"feats\":[],\"rule\":13}],\"node\":{\"in_edges\":[11,12]}}", - "{\"rules\":[1,\"[X] ||| [1] a\",2,\"[X] ||| [1] A\",3,\"[X] ||| [1] A A\",4,\"[X] ||| [1] b\",5,\"[X] ||| [1] c\",6,\"[X] ||| [1] B C\",7,\"[X] ||| [1] A B C\",8,\"[X] ||| [1] CC\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[],\"node\":{\"in_edges\":[]},\"edges\":[{\"tail\":[0],\"feats\":[2,-0.3],\"rule\":1},{\"tail\":[0],\"feats\":[2,-0.6],\"rule\":2},{\"tail\":[0],\"feats\":[2,-1.7],\"rule\":3}],\"node\":{\"in_edges\":[0,1,2]},\"edges\":[{\"tail\":[1],\"feats\":[2,-0.5],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[2],\"feats\":[2,-0.6],\"rule\":5},{\"tail\":[1],\"feats\":[2,-0.8],\"rule\":6},{\"tail\":[0],\"feats\":[2,-0.01],\"rule\":7},{\"tail\":[2],\"feats\":[2,-0.8],\"rule\":8}],\"node\":{\"in_edges\":[4,5,6,7]}}", - "{\"rules\":[1,\"[X] ||| [1] a\",2,\"[X] ||| [1] A\",3,\"[X] ||| [1] b\",4,\"[X] ||| [1] B'\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[],\"node\":{\"in_edges\":[]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.2],\"rule\":1},{\"tail\":[0],\"feats\":[0,-0.6],\"rule\":2}],\"node\":{\"in_edges\":[0,1]},\"edges\":[{\"tail\":[1],\"feats\":[0,-0.1],\"rule\":3},{\"tail\":[1],\"feats\":[0,-0.9],\"rule\":4}],\"node\":{\"in_edges\":[2,3]}}", -}; - -} - void AddNullEdge(Hypergraph* hg) { TRule x; x.arity_ = 0; @@ -71,31 +47,36 @@ void AddNullEdge(Hypergraph* hg) { hg->edges_.back().head_node_ = 0; } -void HGSetup::CreateTinyLatticeHG(Hypergraph* hg) { - Json(hg,HGjsons[TinyLatticeHG]); +void HGSetup::CreateTinyLatticeHG(const std::string& path,Hypergraph* hg) { + ReadFile rf(path + "/hg_test.tiny_lattice"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); AddNullEdge(hg); } -void HGSetup::CreateLatticeHG(Hypergraph* hg) { - Json(hg,HGjsons[LatticeHG]); +void HGSetup::CreateLatticeHG(const std::string& path,Hypergraph* hg) { + ReadFile rf(path + "/hg_test.lattice"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); AddNullEdge(hg); } -void HGSetup::CreateHG_tiny(Hypergraph* hg) { - Json(hg,HGjsons[HG_tiny]); +void HGSetup::CreateHG_tiny(const std::string& path, Hypergraph* hg) { + ReadFile rf(path + "/hg_test.tiny"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); } -void HGSetup::CreateHG_int(Hypergraph* hg) { - Json(hg,HGjsons[HG_int]); +void HGSetup::CreateHG_int(const std::string& path,Hypergraph* hg) { + ReadFile rf(path + "/hg_test.hg_int"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); } -void HGSetup::CreateHG(Hypergraph* hg) { - Json(hg,HGjsons[HG]); +void HGSetup::CreateHG(const std::string& path,Hypergraph* hg) { + ReadFile rf(path + "/hg_test.hg"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); } -void HGSetup::CreateHGBalanced(Hypergraph* hg) { - Json(hg,HGjsons[HGBalanced]); +void HGSetup::CreateHGBalanced(const std::string& path,Hypergraph* hg) { + ReadFile rf(path + "/hg_test.hg_balanced"); + HypergraphIO::ReadFromJSON(rf.stream(), hg); } - #endif diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h index f844e5b2..e15c056d 100644 --- a/decoder/rule_lexer.h +++ b/decoder/rule_lexer.h @@ -9,6 +9,7 @@ struct RuleLexer { typedef void (*RuleCallback)(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra); static void ReadRules(std::istream* in, RuleCallback func, const std::string& fname, void* extra); + static void ReadRule(const std::string&, RuleCallback func, bool mono_rule, void* extra); }; #endif diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll index cc73c079..d4a8d86b 100644 --- a/decoder/rule_lexer.ll +++ b/decoder/rule_lexer.ll @@ -14,6 +14,7 @@ #include "verbose.h" #include "tree_fragment.h" +bool lex_mono_rules = false; int lex_line = 0; std::istream* scfglex_stream = NULL; RuleLexer::RuleCallback rule_callback = NULL; @@ -120,7 +121,7 @@ void check_and_update_ctf_stack(const TRulePtr& rp) { %} REAL [\-+]?[0-9]+(\.[0-9]*)?([eE][-+]*[0-9]+)? -NT [^\t \[\],]+ +NT ([^\t \n\r\[\],]+|Goal) %x LHS_END SRC TRG FEATS FEATVAL ALIGNS TREE %% @@ -132,7 +133,7 @@ NT [^\t \[\],]+ <INITIAL>\[{NT}\] { scfglex_tmp_token.assign(yytext + 1, yyleng - 2); scfglex_lhs = -TD::Convert(scfglex_tmp_token); - // std::cerr << scfglex_tmp_token << "\n"; + //std::cerr << "LHS: " << scfglex_tmp_token << "\n"; BEGIN(LHS_END); } @@ -199,9 +200,9 @@ NT [^\t \[\],]+ <SRC>\|\|\| { memset(scfglex_nt_sanity, 0, scfglex_src_arity * sizeof(int)); - BEGIN(TRG); + if (lex_mono_rules) { BEGIN(FEATS); } else { BEGIN(TRG); } } -<SRC>[^ \t]+ { +<SRC>[^ \t\n\r]+ { scfglex_tmp_token.assign(yytext, yyleng); scfglex_src_rhs[scfglex_src_rhs_size] = TD::Convert(scfglex_tmp_token); ++scfglex_src_rhs_size; @@ -217,14 +218,28 @@ NT [^\t \[\],]+ <TRG>\|\|\| { BEGIN(FEATS); } -<TRG>[^ \t]+ { +<TRG>[^ \t\n\r]+ { scfglex_tmp_token.assign(yytext, yyleng); scfglex_trg_rhs[scfglex_trg_rhs_size] = TD::Convert(scfglex_tmp_token); ++scfglex_trg_rhs_size; } <TRG>[ \t]+ { ; } -<TRG,FEATS,ALIGNS,TREE>\n { +<SRC,TRG,FEATS,ALIGNS,TREE>\n { + if (lex_mono_rules) { + if (scfglex_trg_rhs_size != 0) { + std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": expected monolingual rule\n"; + abort(); + } + scfglex_trg_arity = scfglex_src_arity; + scfglex_trg_rhs_size = scfglex_src_rhs_size; + int ntc = 0; + for (int i = 0; i < scfglex_src_rhs_size; ++i) + if (scfglex_trg_rhs[i] <= 0) + scfglex_trg_rhs[i] = ntc--; + else + scfglex_trg_rhs[i] = scfglex_src_rhs[i]; + } if (scfglex_src_arity != scfglex_trg_arity) { std::cerr << "Grammar " << scfglex_fname << " line " << lex_line << ": LHS and RHS arity mismatch!\n"; abort(); @@ -243,7 +258,7 @@ NT [^\t \[\],]+ TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top()); rule_callback(rp, ctf_level, coarse_rp, rule_callback_extra); ctf_rule_stack.push(rp); - // std::cerr << rp->AsString() << std::endl; + //std::cerr << "RULE: " << rp->AsString() << std::endl; num_rules++; lex_line++; if (!SILENT) { @@ -317,7 +332,7 @@ NT [^\t \[\],]+ #include "filelib.h" -void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) { +static void init_default_feature_names() { if (scfglex_phrase_fnames.empty()) { scfglex_phrase_fnames.resize(100); for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) { @@ -326,6 +341,11 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const scfglex_phrase_fnames[i] = FD::Convert(os.str()); } } +} + +void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const std::string& fname, void* extra) { + init_default_feature_names(); + lex_mono_rules = false; lex_line = 1; scfglex_fname = fname; scfglex_stream = in; @@ -334,3 +354,14 @@ void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, const yylex(); } +void RuleLexer::ReadRule(const std::string& srule, RuleCallback func, bool mono, void* extra) { + init_default_feature_names(); + lex_mono_rules = mono; + lex_line = 1; + rule_callback_extra = extra; + rule_callback = func; + yy_scan_string(srule.c_str()); + yylex(); + yylex_destroy(); +} + diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 159a1d60..88f62769 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -47,7 +47,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); AddRule(stop_glue); RefineRule(stop_glue, ctf_level); - TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); + TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + "] ["+ default_nt + "] ||| [1] [2] ||| Glue=1")); AddRule(glue); RefineRule(glue, ctf_level); } diff --git a/decoder/test_data/hg_test.hg b/decoder/test_data/hg_test.hg new file mode 100644 index 00000000..ef98e9d4 --- /dev/null +++ b/decoder/test_data/hg_test.hg @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| a ||| a",2,"[X] ||| A [X] ||| A [1]",3,"[X] ||| c ||| c",4,"[X] ||| C [X] ||| C [1]",5,"[X] ||| [X] B [X] ||| [1] B [2]",6,"[X] ||| [X] b [X] ||| [1] b [2]",7,"[X] ||| X [X] ||| X [1]",8,"[X] ||| Z [X] ||| Z [1]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[24568,32767,24568,32767],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[0],"spans":[24568,32767,24568,32767],"feats":[0,-0.8,1,-0.1],"rule":2}],"node":{"in_edges":[1],"cat":"X"},"edges":[{"tail":[],"spans":[24568,32767,24568,32767],"feats":[1,-1],"rule":3}],"node":{"in_edges":[2],"cat":"X"},"edges":[{"tail":[2],"spans":[24568,32767,24568,32767],"feats":[0,-0.2,1,-0.1],"rule":4}],"node":{"in_edges":[3],"cat":"X"},"edges":[{"tail":[1,3],"spans":[24568,32767,24568,32767],"feats":[0,-1.2,1,-0.2],"rule":5},{"tail":[1,3],"spans":[24568,32767,24568,32767],"feats":[0,-0.5,1,-1.3],"rule":6}],"node":{"in_edges":[4,5],"cat":"X"},"edges":[{"tail":[4],"spans":[24568,32767,24568,32767],"feats":[0,-0.5,1,-0.8],"rule":7},{"tail":[4],"spans":[24568,32767,24568,32767],"feats":[0,-0.7,1,-0.9],"rule":8}],"node":{"in_edges":[6,7],"cat":"X"}} diff --git a/decoder/test_data/hg_test.hg_balanced b/decoder/test_data/hg_test.hg_balanced new file mode 100644 index 00000000..0f0f499f --- /dev/null +++ b/decoder/test_data/hg_test.hg_balanced @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| i ||| i",2,"[X] ||| a ||| a",3,"[X] ||| b ||| b",4,"[X] ||| [X] [X] ||| [1] [2]",5,"[X] ||| [X] [X] ||| [1] [2]",6,"[X] ||| c ||| c",7,"[X] ||| d ||| d",8,"[X] ||| [X] [X] ||| [1] [2]",9,"[X] ||| [X] [X] ||| [1] [2]",10,"[X] ||| [X] [X] ||| [1] [2]",11,"[X] ||| [X] [X] ||| [1] [2]",12,"[X] ||| [X] [X] ||| [1] [2]",13,"[X] ||| [X] [X] ||| [1] [2]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":2}],"node":{"in_edges":[1],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":3}],"node":{"in_edges":[2],"cat":"X"},"edges":[{"tail":[1,2],"spans":[32760,32767,32760,32767],"feats":[],"rule":4},{"tail":[2,1],"spans":[32760,32767,32760,32767],"feats":[],"rule":5}],"node":{"in_edges":[3,4],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":6}],"node":{"in_edges":[5],"cat":"X"},"edges":[{"tail":[],"spans":[32760,32767,32760,32767],"feats":[],"rule":7}],"node":{"in_edges":[6],"cat":"X"},"edges":[{"tail":[4,5],"spans":[32760,32767,32760,32767],"feats":[],"rule":8},{"tail":[5,4],"spans":[32760,32767,32760,32767],"feats":[],"rule":9}],"node":{"in_edges":[7,8],"cat":"X"},"edges":[{"tail":[3,6],"spans":[32760,32767,32760,32767],"feats":[],"rule":10},{"tail":[6,3],"spans":[32760,32767,32760,32767],"feats":[],"rule":11}],"node":{"in_edges":[9,10],"cat":"X"},"edges":[{"tail":[7,0],"spans":[32760,32767,32760,32767],"feats":[],"rule":12},{"tail":[0,7],"spans":[32760,32767,32760,32767],"feats":[],"rule":13}],"node":{"in_edges":[11,12],"cat":"X"}} diff --git a/decoder/test_data/hg_test.hg_int b/decoder/test_data/hg_test.hg_int new file mode 100644 index 00000000..9c4603bc --- /dev/null +++ b/decoder/test_data/hg_test.hg_int @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| a ||| a",2,"[X] ||| b ||| b",3,"[X] ||| a [X] ||| a [1]",4,"[X] ||| [X] b ||| [1] b"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[-8200,32767,-8200,32767],"feats":[0,0.1],"rule":1},{"tail":[],"spans":[-8200,32767,-8200,32767],"feats":[0,0.1],"rule":2}],"node":{"in_edges":[0,1],"cat":"X"},"edges":[{"tail":[0],"spans":[-8200,32767,-8200,32767],"feats":[0,0.3],"rule":3},{"tail":[0],"spans":[-8200,32767,-8200,32767],"feats":[0,0.2],"rule":4}],"node":{"in_edges":[2,3],"cat":"Goal"}} diff --git a/decoder/test_data/hg_test.lattice b/decoder/test_data/hg_test.lattice new file mode 100644 index 00000000..29e021c5 --- /dev/null +++ b/decoder/test_data/hg_test.lattice @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| [X] a ||| [1] a",2,"[X] ||| [X] A ||| [1] A",3,"[X] ||| [X] A A ||| [1] A A",4,"[X] ||| [X] b ||| [1] b",5,"[X] ||| [X] c ||| [1] c",6,"[X] ||| [X] B C ||| [1] B C",7,"[X] ||| [X] A B C ||| [1] A B C",8,"[X] ||| [X] CC ||| [1] CC"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7"],"edges":[],"node":{"in_edges":[]},"edges":[{"tail":[0],"feats":[2,-0.3],"rule":1},{"tail":[0],"feats":[2,-0.6],"rule":2},{"tail":[0],"feats":[2,-1.7],"rule":3}],"node":{"in_edges":[0,1,2]},"edges":[{"tail":[1],"feats":[2,-0.5],"rule":4}],"node":{"in_edges":[3]},"edges":[{"tail":[2],"feats":[2,-0.6],"rule":5},{"tail":[1],"feats":[2,-0.8],"rule":6},{"tail":[0],"feats":[2,-0.01],"rule":7},{"tail":[2],"feats":[2,-0.8],"rule":8}],"node":{"in_edges":[4,5,6,7]}}" diff --git a/decoder/test_data/hg_test.tiny b/decoder/test_data/hg_test.tiny new file mode 100644 index 00000000..101b96e9 --- /dev/null +++ b/decoder/test_data/hg_test.tiny @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| <s> ||| <s>",2,"[X] ||| X [X] ||| X [1]",3,"[X] ||| Z [X] ||| Z [1]"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7","LatticeCost"],"edges":[{"tail":[],"spans":[25080,32767,25080,32767],"feats":[0,-2,1,-99],"rule":1}],"node":{"in_edges":[0],"cat":"X"},"edges":[{"tail":[0],"spans":[25080,32767,25080,32767],"feats":[0,-0.5,1,-0.8],"rule":2},{"tail":[0],"spans":[25080,32767,25080,32767],"feats":[0,-0.7,1,-0.9],"rule":3}],"node":{"in_edges":[1,2],"cat":"X"}} diff --git a/decoder/test_data/hg_test.tiny_lattice b/decoder/test_data/hg_test.tiny_lattice new file mode 100644 index 00000000..b9adf3cd --- /dev/null +++ b/decoder/test_data/hg_test.tiny_lattice @@ -0,0 +1 @@ +{"rules":[1,"[X] ||| [X] a ||| [1] a",2,"[X] ||| [X] A ||| [1] A",3,"[X] ||| [X] b ||| [1] b",4,"[X] ||| [X] B' ||| [1] B'"],"features":["f1","f2","Feature_1","Feature_0","Model_0","Model_1","Model_2","Model_3","Model_4","Model_5","Model_6","Model_7"],"edges":[],"node":{"in_edges":[]},"edges":[{"tail":[0],"feats":[0,-0.2],"rule":1},{"tail":[0],"feats":[0,-0.6],"rule":2}],"node":{"in_edges":[0,1]},"edges":[{"tail":[1],"feats":[0,-0.1],"rule":3},{"tail":[1],"feats":[0,-0.9],"rule":4}],"node":{"in_edges":[2,3]}} diff --git a/decoder/test_data/small.json.gz b/decoder/test_data/small.json.gz Binary files differindex 892ba360..f6f37293 100644 --- a/decoder/test_data/small.json.gz +++ b/decoder/test_data/small.json.gz diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index 4cd584fb..f288ab4e 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -174,6 +174,7 @@ struct Tree2StringTranslatorImpl { q.push(ParserState(input_tree.begin(), g.get())); unique.insert(q.back()); } + if (q.size() == 0) return false; unsigned tree_top = q.front().input_node_idx; while(!q.empty()) { ParserState& s = q.front(); diff --git a/decoder/trule.cc b/decoder/trule.cc index c22baae3..1bd5425f 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -17,73 +17,16 @@ bool TRule::IsGoal() const { return GetLHS() == kGOAL; } -static WordID ConvertTrgString(const string& w) { - const unsigned len = w.size(); - WordID id = 0; - // [X,0] or [0] - // for target rules, we ignore the category, just keep the index - if (len > 2 && w[0]=='[' && w[len-1]==']' && w[len-2] > '0' && w[len-2] <= '9' && - (len == 3 || (len > 4 && w[len-3] == ','))) { - id = w[len-2] - '0'; - id = 1 - id; - } else { - id = TD::Convert(w); - } - return id; -} - -static WordID ConvertSrcString(const string& w, bool mono = false) { - const unsigned len = w.size(); - // [X,0] - // for source rules, we keep the category and ignore the index (source rules are - // always numbered 1, 2, 3... - if (mono) { - if (len > 2 && w[0]=='[' && w[len-1]==']') { - if (len > 4 && w[len-3] == ',') { - cerr << "[ERROR] Monolingual rules mut not have non-terminal indices:\n " - << w << endl; - exit(1); - } - // TODO check that source indices go 1,2,3,etc. - return TD::Convert(w.substr(1, len-2)) * -1; - } else { - return TD::Convert(w); - } - } else { - if (len > 4 && w[0]=='[' && w[len-1]==']' && w[len-3] == ',' && w[len-2] > '0' && w[len-2] <= '9') { - return TD::Convert(w.substr(1, len-4)) * -1; - } else { - return TD::Convert(w); - } - } -} - -static WordID ConvertLHS(const string& w) { - if (w[0] == '[') { - const unsigned len = w.size(); - if (len < 3) { cerr << "Format error: " << w << endl; exit(1); } - return TD::Convert(w.substr(1, len-2)) * -1; - } else { - return TD::Convert(w) * -1; - } -} - TRule* TRule::CreateRuleSynchronous(const string& rule) { TRule* res = new TRule; - if (res->ReadFromString(rule, true, false)) return res; + if (res->ReadFromString(rule)) return res; cerr << "[ERROR] Failed to creating rule from: " << rule << endl; delete res; return NULL; } TRule* TRule::CreateRulePhrasetable(const string& rule) { - // TODO make this faster - // TODO add configuration for default NT type - if (rule[0] == '[') { - cerr << "Phrasetable rules shouldn't have a LHS / non-terminals:\n " << rule << endl; - return NULL; - } - TRule* res = new TRule("[X] ||| " + rule, true, false); + TRule* res = new TRule("[X] ||| " + rule); if (res->Arity() != 0) { cerr << "Phrasetable rules should have arity 0:\n " << rule << endl; delete res; @@ -93,138 +36,27 @@ TRule* TRule::CreateRulePhrasetable(const string& rule) { } TRule* TRule::CreateRuleMonolingual(const string& rule) { - return new TRule(rule, false, true); + return new TRule(rule, true); } namespace { -// callback for lexer +// callback for single rule lexer int n_assigned=0; -void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { - (void) ctf_level; - (void) coarse_rule; - TRule *assignto=(TRule *)extra; - *assignto=*new_rule; - ++n_assigned; -} - -} - -bool TRule::ReadFromString(const string& line, bool strict, bool mono) { - if (!is_single_line_stripped(line)) - cerr<<"\nWARNING: building rule from multi-line string "<<line<<".\n"; - // backed off of this: it's failing to parse TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); thinks [1] is the features! - if (false && !(mono||strict)) { - // use lexer - istringstream il(line); - n_assigned=0; - RuleLexer::ReadRules(&il,assign_trule,"STRING",this); - if (n_assigned>1) - cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n"; - return n_assigned; - } - - e_.clear(); - f_.clear(); - scores_.clear(); - - string w; - istringstream is(line); - int format = CountSubstrings(line, "|||"); - if (strict && format < 2) { - cerr << "Bad rule format in strict mode:\n" << line << endl; - return false; + void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { + (void) ctf_level; + (void) coarse_rule; + *static_cast<TRule*>(extra) = *new_rule; + ++n_assigned; } - if (format >= 2 || (mono && format == 1)) { - while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); } - while(is>>w && w!="|||") { f_.push_back(ConvertSrcString(w, mono)); } - if (!mono) { - while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } - } - int fv = 0; - if (is) { - string ss; - getline(is, ss); - //cerr << "L: " << ss << endl; - unsigned start = 0; - unsigned len = ss.size(); - const size_t ppos = ss.find(" |||"); - if (ppos != string::npos) { len = ppos; } - while (start < len) { - while(start < len && (ss[start] == ' ' || ss[start] == ';')) - ++start; - if (start == len) break; - unsigned end = start + 1; - while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';')) - ++end; - if (end == len || ss[end] == ' ' || ss[end] == ';') { - //cerr << "PROC: '" << ss.substr(start, end - start) << "'\n"; - // non-named features - if (end != len) { ss[end] = 0; } - string fname = "PhraseModel_X"; - if (fv > 9) { cerr << "Too many phrasetable scores - used named format\n"; abort(); } - fname[12]='0' + fv; - ++fv; - // if the feature set is frozen, this may return zero, indicating an - // undefined feature - const int fid = FD::Convert(fname); - if (fid) - scores_.set_value(fid, atof(&ss[start])); - //cerr << "F: " << fname << " VAL=" << scores_.value(FD::Convert(fname)) << endl; - } else { - const int fid = FD::Convert(ss.substr(start, end - start)); - start = end + 1; - end = start + 1; - while(end < len && (ss[end] != ' ' && ss[end] != ';')) - ++end; - if (end < len) { ss[end] = 0; } - assert(start < len); - if (fid) - scores_.set_value(fid, atof(&ss[start])); - //cerr << "F: " << FD::Convert(fid) << " VAL=" << scores_.value(fid) << endl; - } - start = end + 1; - } - } - } else if (format == 1) { - while(is>>w && w!="|||") { lhs_ = ConvertLHS(w); } - while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } - f_ = e_; - int x = ConvertLHS("[X]"); - for (unsigned i = 0; i < f_.size(); ++i) - if (f_[i] <= 0) { f_[i] = x; } - } else { - cerr << "F: " << format << endl; - cerr << "[ERROR] Don't know how to read:\n" << line << endl; - } - if (mono) { - e_ = f_; - int ci = 0; - for (unsigned i = 0; i < e_.size(); ++i) - if (e_[i] < 0) - e_[i] = ci--; - } - ComputeArity(); - return SanityCheck(); } -bool TRule::SanityCheck() const { - vector<int> used(f_.size(), 0); - int ac = 0; - for (unsigned i = 0; i < e_.size(); ++i) { - int ind = e_[i]; - if (ind > 0) continue; - ind = -ind; - if ((++used[ind]) != 1) { - cerr << "[ERROR] e-side variable index " << (ind+1) << " used more than once!\n"; - return false; - } - ac++; - } - if (ac != Arity()) { - cerr << "[ERROR] e-side arity mismatches f-side\n"; - return false; - } - return true; +bool TRule::ReadFromString(const string& line, bool mono) { + n_assigned = 0; + //cerr << "LINE: " << line << " -- mono=" << mono << endl; + RuleLexer::ReadRule(line + '\n', assign_trule, mono, this); + if (n_assigned > 1) + cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "<<line<<".\n"; + return n_assigned; } void TRule::ComputeArity() { @@ -245,7 +77,7 @@ string TRule::AsString(bool verbose) const { if (w < 0) { int wi = w * -1; ++idx; - os << " [" << TD::Convert(wi) << ',' << idx << ']'; + os << " [" << TD::Convert(wi) << ']'; } else { os << ' ' << TD::Convert(w); } diff --git a/decoder/trule.h b/decoder/trule.h index e9a10bea..7dced5a1 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -51,23 +51,18 @@ class TRule { TRule(const TRule& other) : e_(other.e_), f_(other.f_), lhs_(other.lhs_), scores_(other.scores_), arity_(other.arity_), prev_i(-1), prev_j(-1), a_(other.a_) {} - // if mono or strict is true, then lexer won't be used, and //FIXME: > 9 variables won't work - explicit TRule(const std::string& text, bool strict = false, bool mono = false) : prev_i(-1), prev_j(-1) { - ReadFromString(text, strict, mono); + explicit TRule(const std::string& text, bool mono = false) : prev_i(-1), prev_j(-1) { + ReadFromString(text, mono); } - // deprecated, use lexer // make a rule from a hiero-like rule table, e.g. // [X] ||| [X,1] DE [X,2] ||| [X,2] of the [X,1] - // if misformatted, returns NULL static TRule* CreateRuleSynchronous(const std::string& rule); - // deprecated, use lexer // make a rule from a phrasetable entry (i.e., one that has no LHS type), e.g: // el gato ||| the cat ||| Feature_2=0.34 static TRule* CreateRulePhrasetable(const std::string& rule); - // deprecated, use lexer // make a rule from a non-synchrnous CFG representation, e.g.: // [LHS] ||| term1 [NT] term2 [OTHER_NT] [YET_ANOTHER_NT] static TRule* CreateRuleMonolingual(const std::string& rule); @@ -80,11 +75,10 @@ class TRule { std::vector<WordID>* result) const { unsigned vc = 0; result->clear(); - for (std::vector<WordID>::const_iterator i = e_.begin(); i != e_.end(); ++i) { - const WordID& c = *i; + for (const auto& c : e_) { if (c < 1) { ++vc; - const std::vector<WordID>& var_value = *var_values[-c]; + const auto& var_value = *var_values[-c]; std::copy(var_value.begin(), var_value.end(), std::back_inserter(*result)); @@ -99,10 +93,9 @@ class TRule { std::vector<WordID>* result) const { unsigned vc = 0; result->clear(); - for (std::vector<WordID>::const_iterator i = f_.begin(); i != f_.end(); ++i) { - const WordID& c = *i; + for (const auto& c : f_) { if (c < 1) { - const std::vector<WordID>& var_value = *var_values[vc++]; + const auto& var_value = *var_values[vc++]; std::copy(var_value.begin(), var_value.end(), std::back_inserter(*result)); @@ -113,7 +106,7 @@ class TRule { assert(vc == var_values.size()); } - bool ReadFromString(const std::string& line, bool strict = false, bool monolingual = false); + bool ReadFromString(const std::string& line, bool monolingual = false); bool Initialized() const { return e_.size(); } @@ -166,7 +159,6 @@ class TRule { private: TRule(const WordID& src, const WordID& trg) : e_(1, trg), f_(1, src), lhs_(), arity_(), prev_i(), prev_j() {} - bool SanityCheck() const; }; inline size_t hash_value(const TRule& r) { diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc index d89bcd99..b8776169 100644 --- a/training/dpmert/lo_test.cc +++ b/training/dpmert/lo_test.cc @@ -56,7 +56,7 @@ BOOST_AUTO_TEST_CASE(TestConvexHull) { } BOOST_AUTO_TEST_CASE(TestConvexHullInside) { - const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; + const string json = "{\"rules\":[1,\"[X] ||| a ||| a\",2,\"[X] ||| A [X] ||| A [1]\",3,\"[X] ||| c ||| c\",4,\"[X] ||| C [X] ||| C [1]\",5,\"[X] ||| [X] B [X] ||| [1] B [2]\",6,\"[X] ||| [X] b [X] ||| [1] b [2]\",7,\"[X] ||| X [X] ||| X [1]\",8,\"[X] ||| Z [X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; Hypergraph hg; istringstream instr(json); HypergraphIO::ReadFromJSON(&instr, &hg); |