From 02f4420f4d63708ca912e0e095dcfa528bf848dc Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 14 Jan 2015 19:14:06 +0100 Subject: small changes --- src/fast_weaver.cc | 2 +- src/grammar.hh | 10 +++---- src/hypergraph.cc | 2 +- src/hypergraph.hh | 1 + src/parse.hh | 76 +++++++++++++++++------------------------------------- src/test_parse.cc | 7 +++-- 6 files changed, 36 insertions(+), 62 deletions(-) diff --git a/src/fast_weaver.cc b/src/fast_weaver.cc index 4854476..81740e5 100644 --- a/src/fast_weaver.cc +++ b/src/fast_weaver.cc @@ -9,10 +9,10 @@ main(int argc, char** argv) G::Grammar g; Hg::io::read(hg, g.rules, y, argv[1]); //Hg::io::manual(hg, g.rules); - clock_t begin = clock(); Hg::Path p; Hg::viterbi_path(hg, p); vector s; + clock_t begin = clock(); Hg::derive(p, p.back()->head, s); for (auto it: s) cout << it << " "; diff --git a/src/grammar.hh b/src/grammar.hh index c489ec5..51a72fa 100644 --- a/src/grammar.hh +++ b/src/grammar.hh @@ -52,7 +52,6 @@ struct NT : public Item { istringstream ss(t); if (ss >> index_) { // [i] symbol_ = ""; - index_ = stoi(s); return; } else { ss.clear(); @@ -111,7 +110,7 @@ struct T : public Item { virtual ostream& escaped(ostream& os) const { - os << util::json_escape(symbol_); + return os << util::json_escape(symbol_); } }; @@ -156,6 +155,7 @@ struct Rule { Sv::SparseVector* f; map order; string as_str_; + bool is_glue; Rule() {} @@ -201,6 +201,7 @@ Sv::SparseVector* f; } if (j == 4) break; } + r->is_glue = false; } ostream& @@ -303,14 +304,13 @@ struct Grammar { for (auto nt: nts) { ostringstream oss_1; oss_1 << "[S] ||| [" << nt << ",1] ||| [" << nt << ",1] ||| "; - cout << oss_1.str() << endl; Rule* r1 = new Rule(oss_1.str(), vocab); + r1->is_glue = true; rules.push_back(r1); start_non_terminal.push_back(r1); ostringstream oss_2; oss_2 << "[S] ||| [S,1] [" << nt << ",2] ||| [S,1] [" << nt << ",2] ||| "; - cout << oss_2.str() << endl; Rule* r2 = new Rule(oss_2.str(), vocab); - cout << *r2 << endl; + r2->is_glue = true; rules.push_back(r2); start_non_terminal.push_back(r2); } } diff --git a/src/hypergraph.cc b/src/hypergraph.cc index 40bcc64..0a965d0 100644 --- a/src/hypergraph.cc +++ b/src/hypergraph.cc @@ -168,7 +168,7 @@ read(Hypergraph& hg, vector& rules, G::Vocabulary& vocab, const string } e->rule = rules[e->rule_id_]; } else { - // ERROR + assert(false); // ERROR } i++; } diff --git a/src/hypergraph.hh b/src/hypergraph.hh index 8e05e9f..d1217a5 100644 --- a/src/hypergraph.hh +++ b/src/hypergraph.hh @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/parse.hh b/src/parse.hh index 0dd2fc0..6e7883a 100644 --- a/src/parse.hh +++ b/src/parse.hh @@ -72,6 +72,8 @@ struct ChartItem rule->repr(os); os << ">"; os << endl; + + return os; } friend ostream& @@ -128,8 +130,11 @@ struct Chart for (map >::const_iterator it = chart.m_.cbegin(); it != chart.m_.cend(); it++) { os << "(" << it->first.first << "," << it->first.second << ")" << endl; - for (auto jt: it->second) - jt->repr(os); os << endl; + size_t j = 0; + for (auto jt: it->second) { + os << j << " "; jt->repr(os); + j++; + } } return os; @@ -139,28 +144,20 @@ struct Chart bool scan(ChartItem* item, vector in, size_t limit, Chart& passive) { - //cout << "S1" << endl; while (item->dot < item->rule->rhs.size() && item->rule->rhs[item->dot]->type() == G::TERMINAL) { - //cout << "S2" << endl; if (item->span.second == limit) return false; - //cout << "S3" << endl; if (item->rule->rhs[item->dot]->symbol() == in[item->span.second]) { - //cout << "S4" << endl; item->dot++; - //cout << "S5" << endl; item->span.second++; - //cout << "S6" << endl; } else { - //cout << "S7" << endl; return false; } } - //cout << "S8" << endl; + return true; } - void init(vector const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g) { @@ -168,10 +165,8 @@ init(vector const& in, size_t n, Chart& active, Chart& passive, G::Gr size_t j = 0; for (auto it: in) { if (it == rule->rhs.front()->symbol()) { - cout << it << " " << j << j+rule->rhs.size() << endl; Span span(j, j+rule->rhs.size()); passive.add(new ChartItem(rule, span, rule->rhs.size()), span); - cout << "new passive item [1] " << *passive.at(span).back() << endl; } j++; } @@ -179,12 +174,14 @@ init(vector const& in, size_t n, Chart& active, Chart& passive, G::Gr } void -parse(vector const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g) +parse(vector const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g, size_t max_span_size) { vector spans; Parse::visit(spans, 1, 0, n); for (auto span: spans) { + size_t span_size = span.second-span.first; + cout << "Span (" << span.first << "," << span.second << ")" << endl; for (auto it: g.start_terminal) { @@ -192,109 +189,82 @@ parse(vector const& in, size_t n, Chart& active, Chart& passive, G::Gr if (scan(item, in, span.second, passive) && span.first + item->rule->rhs.size() <= span.second) { active.add(item, span); - cout << "new active item [1] " << *active.at(span).back(); } } for (auto it: g.start_non_terminal) { - if (it->rhs.size() > span.second-span.first) continue; + if (it->rhs.size() > span.second-span.first + || (span_size>max_span_size)) continue; active.add(new ChartItem(it, Span(span.first,span.first), 0), span); - cout << "new active item [2] " << *active.at(span).back(); } set new_symbols; vector remaining_items; while (true) { - cout << "active size at (" << span.first << "," << span.second << ") " << active.at(span).size() << endl; - cout << "passive size at (" << span.first << "," << span.second << ") " << passive.at(span).size() << endl; if (active.at(span).empty()) break; ChartItem* item = active.at(span).back(); - cout << "current item " << *item; + active.at(span).pop_back(); + while (item->rule->rhs[item->dot]->type() == G::NON_TERMINAL) { + symbol_t cur_sym = item->rule->rhs[item->dot]->symbol(); + } + } + + /*while (true) { + if (active.at(span).empty()) break; + ChartItem* item = active.at(span).back(); active.at(span).pop_back(); bool advanced = false; vector spans2; Parse::visit(spans2, 1, span.first, span.second, 1); for (auto span2: spans2) { - cout << "A" << endl; if (item->rule->rhs[item->dot]->type() == G::NON_TERMINAL) { - cout << "B" << endl; if (passive.has_at(item->rule->rhs[item->dot]->symbol(), span2)) { - cout << "C" << endl; if (span2.first == item->span.second) { - cout << "D" << endl; ChartItem* new_item = new ChartItem(*item); - cout << "D1" << endl; new_item->span.second = span2.second; - cout << "D2" << endl; new_item->dot++; - cout << "D3" << endl; new_item->tails_spans.push_back(span2); - cout << "D4" << endl; if (scan(new_item, in, span.second, passive)) { - cout << "E" << endl; if (new_item->dot == new_item->rule->rhs.size()) { - cout << "F" << endl; if (new_item->span.first == span.first && new_item->span.second == span.second) { - cout << "G" << endl; - cout << "H" << endl; new_symbols.insert(new_item->rule->lhs->symbol()); passive.add(new_item, span); - cout << "new passive item [2] " << *new_item; advanced = true; } } else { if (new_item->span.second+(new_item->rule->rhs.size()-new_item->dot) <= span.second) { active.add(new_item, span); - cout << "new active item [3] " << *new_item; } } } - cout << "I" << endl; } } } } - cout << "J" << endl; if (!advanced) { - cout << "K" << endl; remaining_items.push_back(item); } } for (auto new_sym: new_symbols) { - cout << "new sym " << new_sym << endl; for (auto rem_item: remaining_items) { if (rem_item->dot != 0 || rem_item->rule->rhs[rem_item->dot]->type() != G::NON_TERMINAL) { continue; - cout << "K1" << endl; } - cout << "K2" << endl; if (rem_item->rule->rhs[rem_item->dot]->symbol() == new_sym) { - cout << "K3" << endl; ChartItem* new_item = new ChartItem(*rem_item); - cout << "K31" << endl; - //new_item->tails_spans[new_item->dot-1] = span; new_item->tails_spans.push_back(span); new_item->dot++; - cout << "K32" << endl; if (new_item->dot == new_item->rule->rhs.size()) { - cout << "K4" << endl; new_symbols.insert(new_item->rule->lhs->symbol()); passive.add(new_item, span); } } } + }*/ } - - cout << "L" << endl; - cout << "-------------------" << endl; - cout << endl; - } - - //cout << "ACTIVE" << endl << active << endl; - cout << "PASSIVE" << endl << passive << endl; } } // diff --git a/src/test_parse.cc b/src/test_parse.cc index 2d51d44..01dc15e 100644 --- a/src/test_parse.cc +++ b/src/test_parse.cc @@ -5,7 +5,8 @@ int main(int argc, char** argv) //string in("ich sah ein kleines haus"); //string in("europa bildet den ersten oder zweiten markt für die zehn am häufigsten von indien exportierten produkte , erklärte der europäische kommissar weiter . die asiatischen und europäischen giganten tauschen jährlich güter im wert von 47 milliarden euro und dienstleistungen im wert von 10 milliarden euro aus , hatte diese woche daniéle smadja , vorsitzende der abordnung der europäischen kommission in neu delhi , erklärt , und bedauert , dass der gegenseitige handel sein potential noch nicht ausgeschöpft hat . die eu und indien treffen sich am freitag zu ihrem achten diplomatischen in neu delhi , bei dem premierminister manmohan singh und der präsident der europäischen kommission josé manuel durao barrosso anwesend sein werden ."); //string in("aber schon bald nach seinem eintritt kam der erste große erfolg ."); - string in("lebensmittel schuld an europäischer inflation"); + string in("offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg ."); + //string in("lebensmittel schuld an europäischer inflation"); vector tok = util::tokenize(in); size_t n = tok.size(); G::Vocabulary v; @@ -13,7 +14,9 @@ int main(int argc, char** argv) g.add_glue(v); Parse::Chart active(n); Parse::Chart passive(n); + size_t max_span_size = 15; init(tok, n, active, passive, g); - parse(tok, n, active, passive, g); + parse(tok, n, active, passive, g, max_span_size); + cout << "PASSIVE" << endl << passive << endl; } -- cgit v1.2.3