summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-01-14 19:14:06 +0100
committerPatrick Simianer <p@simianer.de>2015-01-14 19:14:06 +0100
commit02f4420f4d63708ca912e0e095dcfa528bf848dc (patch)
treee72b923b910594f3e0071b2603c6aa8b013f7202
parenta1eea9655deea9a563a2ee13c82cd4545e182c7d (diff)
small changes
-rw-r--r--src/fast_weaver.cc2
-rw-r--r--src/grammar.hh10
-rw-r--r--src/hypergraph.cc2
-rw-r--r--src/hypergraph.hh1
-rw-r--r--src/parse.hh76
-rw-r--r--src/test_parse.cc7
6 files changed, 36 insertions, 62 deletions
diff --git a/src/fast_weaver.cc b/src/fast_weaver.cc
index 4854476..81740e5 100644
--- a/src/fast_weaver.cc
+++ b/src/fast_weaver.cc
@@ -9,10 +9,10 @@ main(int argc, char** argv)
G::Grammar g;
Hg::io::read(hg, g.rules, y, argv[1]);
//Hg::io::manual(hg, g.rules);
- clock_t begin = clock();
Hg::Path p;
Hg::viterbi_path(hg, p);
vector<string> s;
+ clock_t begin = clock();
Hg::derive(p, p.back()->head, s);
for (auto it: s)
cout << it << " ";
diff --git a/src/grammar.hh b/src/grammar.hh
index c489ec5..51a72fa 100644
--- a/src/grammar.hh
+++ b/src/grammar.hh
@@ -52,7 +52,6 @@ struct NT : public Item {
istringstream ss(t);
if (ss >> index_) { // [i]
symbol_ = "";
- index_ = stoi(s);
return;
} else {
ss.clear();
@@ -111,7 +110,7 @@ struct T : public Item {
virtual ostream&
escaped(ostream& os) const
{
- os << util::json_escape(symbol_);
+ return os << util::json_escape(symbol_);
}
};
@@ -156,6 +155,7 @@ struct Rule {
Sv::SparseVector<string, score_t>* f;
map<size_t, size_t> order;
string as_str_;
+ bool is_glue;
Rule() {}
@@ -201,6 +201,7 @@ Sv::SparseVector<string, score_t>* f;
}
if (j == 4) break;
}
+ r->is_glue = false;
}
ostream&
@@ -303,14 +304,13 @@ struct Grammar {
for (auto nt: nts) {
ostringstream oss_1;
oss_1 << "[S] ||| [" << nt << ",1] ||| [" << nt << ",1] ||| ";
- cout << oss_1.str() << endl;
Rule* r1 = new Rule(oss_1.str(), vocab);
+ r1->is_glue = true;
rules.push_back(r1); start_non_terminal.push_back(r1);
ostringstream oss_2;
oss_2 << "[S] ||| [S,1] [" << nt << ",2] ||| [S,1] [" << nt << ",2] ||| ";
- cout << oss_2.str() << endl;
Rule* r2 = new Rule(oss_2.str(), vocab);
- cout << *r2 << endl;
+ r2->is_glue = true;
rules.push_back(r2); start_non_terminal.push_back(r2);
}
}
diff --git a/src/hypergraph.cc b/src/hypergraph.cc
index 40bcc64..0a965d0 100644
--- a/src/hypergraph.cc
+++ b/src/hypergraph.cc
@@ -168,7 +168,7 @@ read(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab, const string
}
e->rule = rules[e->rule_id_];
} else {
- // ERROR
+ assert(false); // ERROR
}
i++;
}
diff --git a/src/hypergraph.hh b/src/hypergraph.hh
index 8e05e9f..d1217a5 100644
--- a/src/hypergraph.hh
+++ b/src/hypergraph.hh
@@ -1,6 +1,7 @@
#pragma once
#include <algorithm>
+#include <cassert>
#include <fstream>
#include <functional>
#include <iostream>
diff --git a/src/parse.hh b/src/parse.hh
index 0dd2fc0..6e7883a 100644
--- a/src/parse.hh
+++ b/src/parse.hh
@@ -72,6 +72,8 @@ struct ChartItem
rule->repr(os);
os << ">";
os << endl;
+
+ return os;
}
friend ostream&
@@ -128,8 +130,11 @@ struct Chart
for (map<Span, vector<ChartItem*> >::const_iterator it = chart.m_.cbegin();
it != chart.m_.cend(); it++) {
os << "(" << it->first.first << "," << it->first.second << ")" << endl;
- for (auto jt: it->second)
- jt->repr(os); os << endl;
+ size_t j = 0;
+ for (auto jt: it->second) {
+ os << j << " "; jt->repr(os);
+ j++;
+ }
}
return os;
@@ -139,28 +144,20 @@ struct Chart
bool
scan(ChartItem* item, vector<symbol_t> in, size_t limit, Chart& passive)
{
- //cout << "S1" << endl;
while (item->dot < item->rule->rhs.size() &&
item->rule->rhs[item->dot]->type() == G::TERMINAL) {
- //cout << "S2" << endl;
if (item->span.second == limit) return false;
- //cout << "S3" << endl;
if (item->rule->rhs[item->dot]->symbol() == in[item->span.second]) {
- //cout << "S4" << endl;
item->dot++;
- //cout << "S5" << endl;
item->span.second++;
- //cout << "S6" << endl;
} else {
- //cout << "S7" << endl;
return false;
}
}
- //cout << "S8" << endl;
+
return true;
}
-
void
init(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g)
{
@@ -168,10 +165,8 @@ init(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Gr
size_t j = 0;
for (auto it: in) {
if (it == rule->rhs.front()->symbol()) {
- cout << it << " " << j << j+rule->rhs.size() << endl;
Span span(j, j+rule->rhs.size());
passive.add(new ChartItem(rule, span, rule->rhs.size()), span);
- cout << "new passive item [1] " << *passive.at(span).back() << endl;
}
j++;
}
@@ -179,12 +174,14 @@ init(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Gr
}
void
-parse(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g)
+parse(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g, size_t max_span_size)
{
vector<Span> spans;
Parse::visit(spans, 1, 0, n);
for (auto span: spans) {
+ size_t span_size = span.second-span.first;
+
cout << "Span (" << span.first << "," << span.second << ")" << endl;
for (auto it: g.start_terminal) {
@@ -192,109 +189,82 @@ parse(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Gr
if (scan(item, in, span.second, passive)
&& span.first + item->rule->rhs.size() <= span.second) {
active.add(item, span);
- cout << "new active item [1] " << *active.at(span).back();
}
}
for (auto it: g.start_non_terminal) {
- if (it->rhs.size() > span.second-span.first) continue;
+ if (it->rhs.size() > span.second-span.first
+ || (span_size>max_span_size)) continue;
active.add(new ChartItem(it, Span(span.first,span.first), 0), span);
- cout << "new active item [2] " << *active.at(span).back();
}
set<symbol_t> new_symbols;
vector<ChartItem*> remaining_items;
while (true) {
- cout << "active size at (" << span.first << "," << span.second << ") " << active.at(span).size() << endl;
- cout << "passive size at (" << span.first << "," << span.second << ") " << passive.at(span).size() << endl;
if (active.at(span).empty()) break;
ChartItem* item = active.at(span).back();
- cout << "current item " << *item;
+ active.at(span).pop_back();
+ while (item->rule->rhs[item->dot]->type() == G::NON_TERMINAL) {
+ symbol_t cur_sym = item->rule->rhs[item->dot]->symbol();
+ }
+ }
+
+ /*while (true) {
+ if (active.at(span).empty()) break;
+ ChartItem* item = active.at(span).back();
active.at(span).pop_back();
bool advanced = false;
vector<Span> spans2;
Parse::visit(spans2, 1, span.first, span.second, 1);
for (auto span2: spans2) {
- cout << "A" << endl;
if (item->rule->rhs[item->dot]->type() == G::NON_TERMINAL) {
- cout << "B" << endl;
if (passive.has_at(item->rule->rhs[item->dot]->symbol(), span2)) {
- cout << "C" << endl;
if (span2.first == item->span.second) {
- cout << "D" << endl;
ChartItem* new_item = new ChartItem(*item);
- cout << "D1" << endl;
new_item->span.second = span2.second;
- cout << "D2" << endl;
new_item->dot++;
- cout << "D3" << endl;
new_item->tails_spans.push_back(span2);
- cout << "D4" << endl;
if (scan(new_item, in, span.second, passive)) {
- cout << "E" << endl;
if (new_item->dot == new_item->rule->rhs.size()) {
- cout << "F" << endl;
if (new_item->span.first == span.first && new_item->span.second == span.second) {
- cout << "G" << endl;
- cout << "H" << endl;
new_symbols.insert(new_item->rule->lhs->symbol());
passive.add(new_item, span);
- cout << "new passive item [2] " << *new_item;
advanced = true;
}
} else {
if (new_item->span.second+(new_item->rule->rhs.size()-new_item->dot) <= span.second) {
active.add(new_item, span);
- cout << "new active item [3] " << *new_item;
}
}
}
- cout << "I" << endl;
}
}
}
}
- cout << "J" << endl;
if (!advanced) {
- cout << "K" << endl;
remaining_items.push_back(item);
}
}
for (auto new_sym: new_symbols) {
- cout << "new sym " << new_sym << endl;
for (auto rem_item: remaining_items) {
if (rem_item->dot != 0 ||
rem_item->rule->rhs[rem_item->dot]->type() != G::NON_TERMINAL) {
continue;
- cout << "K1" << endl;
}
- cout << "K2" << endl;
if (rem_item->rule->rhs[rem_item->dot]->symbol() == new_sym) {
- cout << "K3" << endl;
ChartItem* new_item = new ChartItem(*rem_item);
- cout << "K31" << endl;
- //new_item->tails_spans[new_item->dot-1] = span;
new_item->tails_spans.push_back(span);
new_item->dot++;
- cout << "K32" << endl;
if (new_item->dot == new_item->rule->rhs.size()) {
- cout << "K4" << endl;
new_symbols.insert(new_item->rule->lhs->symbol());
passive.add(new_item, span);
}
}
}
+ }*/
}
-
- cout << "L" << endl;
- cout << "-------------------" << endl;
- cout << endl;
- }
-
- //cout << "ACTIVE" << endl << active << endl;
- cout << "PASSIVE" << endl << passive << endl;
}
} //
diff --git a/src/test_parse.cc b/src/test_parse.cc
index 2d51d44..01dc15e 100644
--- a/src/test_parse.cc
+++ b/src/test_parse.cc
@@ -5,7 +5,8 @@ int main(int argc, char** argv)
//string in("ich sah ein kleines haus");
//string in("europa bildet den ersten oder zweiten markt für die zehn am häufigsten von indien exportierten produkte , erklärte der europäische kommissar weiter . die asiatischen und europäischen giganten tauschen jährlich güter im wert von 47 milliarden euro und dienstleistungen im wert von 10 milliarden euro aus , hatte diese woche daniéle smadja , vorsitzende der abordnung der europäischen kommission in neu delhi , erklärt , und bedauert , dass der gegenseitige handel sein potential noch nicht ausgeschöpft hat . die eu und indien treffen sich am freitag zu ihrem achten diplomatischen in neu delhi , bei dem premierminister manmohan singh und der präsident der europäischen kommission josé manuel durao barrosso anwesend sein werden .");
//string in("aber schon bald nach seinem eintritt kam der erste große erfolg .");
- string in("lebensmittel schuld an europäischer inflation");
+ string in("offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .");
+ //string in("lebensmittel schuld an europäischer inflation");
vector<symbol_t> tok = util::tokenize(in);
size_t n = tok.size();
G::Vocabulary v;
@@ -13,7 +14,9 @@ int main(int argc, char** argv)
g.add_glue(v);
Parse::Chart active(n);
Parse::Chart passive(n);
+ size_t max_span_size = 15;
init(tok, n, active, passive, g);
- parse(tok, n, active, passive, g);
+ parse(tok, n, active, passive, g, max_span_size);
+ cout << "PASSIVE" << endl << passive << endl;
}