summaryrefslogtreecommitdiff
path: root/decoder/hg_remove_eps.cc
blob: 050c487618a98b30bffbda512977eb853be5b805 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include "hg_remove_eps.h"

#include <cassert>

#include "trule.h"
#include "hg.h"

using namespace std;

namespace {
  TRulePtr kEPSRule;
  TRulePtr kUnaryRule;

  TRulePtr CreateUnaryRule(int lhs, int rhs) {
    if (!kUnaryRule) kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]"));
    TRule* r = new TRule(*kUnaryRule);
    assert(lhs < 0);
    assert(rhs < 0);
    r->lhs_ = lhs;
    r->f_[0] = rhs;
    return TRulePtr(r);
  }

  TRulePtr CreateEpsilonRule(int lhs, WordID eps) {
    if (!kEPSRule) kEPSRule.reset(new TRule("[X] ||| <eps> ||| <eps>"));
    TRule* r = new TRule(*kEPSRule);
    r->lhs_ = lhs;
    assert(lhs < 0);
    assert(eps > 0);
    r->e_[0] = eps;
    r->f_[0] = eps;
    return TRulePtr(r);
  }
}

void RemoveEpsilons(Hypergraph* g, WordID eps) {
  vector<bool> kill(g->edges_.size(), false);
  for (unsigned i = 0; i < g->edges_.size(); ++i) {
    const Hypergraph::Edge& edge = g->edges_[i];
    if (edge.tail_nodes_.empty() &&
        edge.rule_->f_.size() == 1 &&
        edge.rule_->f_[0] == eps) {
      kill[i] = true;
      if (!edge.feature_values_.empty()) {
        Hypergraph::Node& node = g->nodes_[edge.head_node_];
        if (node.in_edges_.size() != 1) {
          cerr << "[WARNING] <eps> edge with features going into non-empty node - can't promote\n";
          // this *probably* means that there are multiple derivations of the
          // same sequence via different paths through the input forest
          // this needs to be investigated and fixed
        } else {
          for (unsigned j = 0; j < node.out_edges_.size(); ++j)
            g->edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_;
          // cerr << "PROMOTED " << edge.feature_values_ << endl;
	}
      }
    }
  }
  bool created_eps = false;
  g->PruneEdges(kill);
  for (unsigned i = 0; i < g->nodes_.size(); ++i) {
    const Hypergraph::Node& node = g->nodes_[i];
    if (node.in_edges_.empty()) {
      for (unsigned j = 0; j < node.out_edges_.size(); ++j) {
        Hypergraph::Edge& edge = g->edges_[node.out_edges_[j]];
        const int lhs = edge.rule_->lhs_;
        if (edge.rule_->Arity() == 2) {
          assert(edge.rule_->f_.size() == 2);
          assert(edge.rule_->e_.size() == 2);
          unsigned cur = node.id_;
          int t = -1;
          assert(edge.tail_nodes_.size() == 2);
          int rhs = 0;
          for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; rhs = edge.rule_->f_[i]; }
          assert(t != -1);
          edge.tail_nodes_.resize(1);
          edge.tail_nodes_[0] = t;
          edge.rule_ = CreateUnaryRule(lhs, rhs);
        } else {
          edge.rule_ = CreateEpsilonRule(lhs, eps);
          edge.tail_nodes_.clear();
          created_eps = true;
        }
      }
    }
  }
  vector<bool> k2(g->edges_.size(), false);
  g->PruneEdges(k2);
  if (created_eps) RemoveEpsilons(g, eps);
}