From f65e428784cfd2264f3fdfddd574c37acd38f54f Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 27 Sep 2014 15:17:43 +0100 Subject: cosmetic changes, bump to 1.1 --- lib/zipf.rb | 3 +- lib/zipf/SparseVector.rb | 1 - lib/zipf/bleu.rb | 2 - lib/zipf/dag.rb | 2 - lib/zipf/fileutil.rb | 1 - lib/zipf/grammar.rb | 123 --------------------------------- lib/zipf/hg.rb | 173 ----------------------------------------------- lib/zipf/hypergraph.rb | 170 ++++++++++++++++++++++++++++++++++++++++++++++ lib/zipf/misc.rb | 1 - lib/zipf/tfidf.rb | 1 - 10 files changed, 171 insertions(+), 306 deletions(-) delete mode 100644 lib/zipf/grammar.rb delete mode 100644 lib/zipf/hg.rb create mode 100644 lib/zipf/hypergraph.rb (limited to 'lib') diff --git a/lib/zipf.rb b/lib/zipf.rb index 2f59ccc..681e2cd 100755 --- a/lib/zipf.rb +++ b/lib/zipf.rb @@ -9,8 +9,7 @@ require 'zipf/dag' require 'zipf/semirings' require 'zipf/bleu' require 'zipf/misc' -require 'zipf/hg' -require 'zipf/grammar' +require 'zipf/hypergraph' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' diff --git a/lib/zipf/SparseVector.rb b/lib/zipf/SparseVector.rb index 3f950c4..e9af799 100644 --- a/lib/zipf/SparseVector.rb +++ b/lib/zipf/SparseVector.rb @@ -1,6 +1,5 @@ require 'json' - class SparseVector < Hash def initialize arg=nil diff --git a/lib/zipf/bleu.rb b/lib/zipf/bleu.rb index 69de00b..c07315e 100644 --- a/lib/zipf/bleu.rb +++ b/lib/zipf/bleu.rb @@ -1,6 +1,5 @@ module BLEU - class BLEU::NgramCounts attr_accessor :sum, :clipped, :ref_len, :hyp_len, :n @@ -125,6 +124,5 @@ def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0 return Math.exp logbleu end - end #module diff --git a/lib/zipf/dag.rb b/lib/zipf/dag.rb index 45ede20..a0edde7 100644 --- a/lib/zipf/dag.rb +++ b/lib/zipf/dag.rb @@ -2,7 +2,6 @@ module DAG require 'json' - class DAG::Node attr_accessor :label, :outgoing, :incoming, :score, :mark @@ -200,6 +199,5 @@ def DAG::read_graph_from_json fn, semiring=RealSemiring.new return graph, nodes_by_label end - end #module diff --git a/lib/zipf/fileutil.rb b/lib/zipf/fileutil.rb index eb69136..ac701da 100644 --- a/lib/zipf/fileutil.rb +++ b/lib/zipf/fileutil.rb @@ -1,6 +1,5 @@ require 'zlib' - class ReadFile def initialize fn, encoding='utf-8' diff --git a/lib/zipf/grammar.rb b/lib/zipf/grammar.rb deleted file mode 100644 index 568b9fc..0000000 --- a/lib/zipf/grammar.rb +++ /dev/null @@ -1,123 +0,0 @@ -module Grammar - - -class T - attr_accessor :word - - def initialize word - @word = word - end - - def to_s - "T<#{@word}>" - end -end - -class NT - attr_accessor :symbol, :index, :span - - def initialize symbol, index=0 - @symbol = symbol - @index = index - @span = Span.new - end - - def to_s - "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>" - end -end - -class Rule - attr_accessor :lhs, :rhs, :e - - def initialize lhs=nil, rhs=[], e='' - @lhs = lhs - @rhs = rhs - @e = e - end - - def to_s - "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}] ||| #{@e}" - end - - def arity - rhs.select { |i| i.class == NT }.size - end - - def from_s s - _ = splitpipe s, 3 - @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "") - _[1].split.each { |x| - x.strip! - if x[0]=='[' && x[x.size-1] == ']' - @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0]) - else - @rhs << T.new(x) - end - } - @e = _[2] - end - - def self.from_s s - r = self.new - r.from_s s - return r - end -end - -class Span - attr_accessor :left, :right - - def initialize left=nil, right=nil - @left = left - @right = right - end -end - -class Grammar - attr_accessor :rules, :startn, :startt, :flat - - def initialize fn - @rules = []; @startn = []; @startt = [] ;@flat = [] - ReadFile.readlines_strip(fn).each_with_index { |s,i| - STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0 - @rules << Rule.from_s(s) - if @rules.last.rhs.first.class == NT - @startn << @rules.last - else - if rules.last.arity == 0 - @flat << @rules.last - else - @startt << @rules.last - end - end - } - STDERR.write "\n" - end - - def to_s - s = '' - @rules.each { |r| s += r.to_s+"\n" } - return s - end - - def add_glue_rules - @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol| - @rules << Rule.new(NT.new('S'), [NT.new(symbol)]) - @startn << @rules.last - @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')]) - @startn << @rules.last - } - end - - def add_pass_through_rules s - s.each { |word| - @rules << Rule.new(NT.new('X'), [T.new(word)]) - @flat << @rules.last - } - end -end - - -end #module - diff --git a/lib/zipf/hg.rb b/lib/zipf/hg.rb deleted file mode 100644 index f86bf60..0000000 --- a/lib/zipf/hg.rb +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env ruby - -require_relative 'semirings' -require 'json' - - -module HG - - -class HG::Node - attr_accessor :label, :cat, :outgoing, :incoming, :score - - def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil - @label = label - @cat = cat - @outgoing = outgoing - @incoming = incoming - @score = nil - end - - def to_s - "Node" - end -end - -class HG::Hypergraph - attr_accessor :nodes, :edges - - def initialize nodes=[], edges=[] - @nodes = nodes - @edges = edges - end - - def arity - @edges.map { |e| e.arity }.max - end - - def to_s - "Hypergraph" - end -end - -class HG::Hyperedge - attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right - - def initialize head=nil, tails=[], weight=0.0, f={} - @head = head - @tails = tails - @weight = weight - @f = f - @mark = 0 - end - - def arity - return @tails.size - end - - def marked? - arity == @mark - end - - def to_s - "Hyperedge" - end -end - -def HG::topological_sort nodes - sorted = [] - s = nodes.reject { |n| !n.incoming.empty? } - while !s.empty? - sorted << s.shift - sorted.last.outgoing.each { |e| - next if e.marked? - e.mark += 1 - s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty? - } - end - return sorted -end - -def HG::init nodes, semiring, root - nodes.each { |n| n.score=semiring.null } - root.score = semiring.one -end - -def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new - toposorted = topological_sort hypergraph.nodes - init toposorted, semiring, root - toposorted.each { |n| - n.incoming.each { |e| - s = semiring.one - e.tails.each { |m| - s = semiring.multiply.call(s, m.score) - } - n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) - } - } -end - -def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new - toposorted = topological_sort hypergraph.nodes - init toposorted, semiring, root - best_path = [] - toposorted.each { |n| - best_edge = nil - n.incoming.each { |e| - s = semiring.one - e.tails.each { |m| - s = semiring.multiply.call(s, m.score) - } - if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add - best_edge = e - end - n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) - } - best_path << best_edge - } - return best_path, toposorted.last.score -end - -def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false - nodes = [] - edges = [] - nodes_by_label = {} - nodes_by_index = [] - h = JSON.parse File.new(fn).read - w = SparseVector.from_h h['weights'] - h['nodes'].each { |i| - n = Node.new i['label'], i['cat'] - nodes << n - nodes_by_label[n.label] = n - nodes_by_index << n - } - h['edges'].each { |i| - e = Hyperedge.new(nodes_by_label[i['head']], \ - i['tails'].map{|j| nodes_by_label[j]}.to_a, \ - semiring.convert.call(i['weight'].to_f), \ - {}) - e.f = SparseVector.from_h i['f'] - if log_weights - e.weight = Math.exp(w.dot(e.f)) - else - e.weight = w.dot(e.f) - end - e.tails.each { |m| - m.outgoing << e - } - e.head.incoming << e - edges << e - } - return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index -end - -def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new - toposorted = topological_sort hypergraph.nodes - paths = [[]] - toposorted.each { |n| - next if n.incoming.empty? - new_paths = [] - while !paths.empty? - p = paths.pop - n.incoming.each { |e| - new_paths << p+[e] - } - end - paths = new_paths - } - return paths -end - - -end #module - diff --git a/lib/zipf/hypergraph.rb b/lib/zipf/hypergraph.rb new file mode 100644 index 0000000..6666062 --- /dev/null +++ b/lib/zipf/hypergraph.rb @@ -0,0 +1,170 @@ +#!/usr/bin/env ruby + +require_relative 'semirings' +require 'json' + +module HG + +class HG::Node + attr_accessor :label, :cat, :outgoing, :incoming, :score + + def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil + @label = label + @cat = cat + @outgoing = outgoing + @incoming = incoming + @score = nil + end + + def to_s + "Node" + end +end + +class HG::Hypergraph + attr_accessor :nodes, :edges + + def initialize nodes=[], edges=[] + @nodes = nodes + @edges = edges + end + + def arity + @edges.map { |e| e.arity }.max + end + + def to_s + "Hypergraph" + end +end + +class HG::Hyperedge + attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right + + def initialize head=nil, tails=[], weight=0.0, f={} + @head = head + @tails = tails + @weight = weight + @f = f + @mark = 0 + end + + def arity + return @tails.size + end + + def marked? + arity == @mark + end + + def to_s + "Hyperedge" + end +end + +def HG::topological_sort nodes + sorted = [] + s = nodes.reject { |n| !n.incoming.empty? } + while !s.empty? + sorted << s.shift + sorted.last.outgoing.each { |e| + next if e.marked? + e.mark += 1 + s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty? + } + end + return sorted +end + +def HG::init nodes, semiring, root + nodes.each { |n| n.score=semiring.null } + root.score = semiring.one +end + +def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new + toposorted = topological_sort hypergraph.nodes + init toposorted, semiring, root + toposorted.each { |n| + n.incoming.each { |e| + s = semiring.one + e.tails.each { |m| + s = semiring.multiply.call(s, m.score) + } + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + } + } +end + +def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new + toposorted = topological_sort hypergraph.nodes + init toposorted, semiring, root + best_path = [] + toposorted.each { |n| + best_edge = nil + n.incoming.each { |e| + s = semiring.one + e.tails.each { |m| + s = semiring.multiply.call(s, m.score) + } + if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add + best_edge = e + end + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + } + best_path << best_edge + } + return best_path, toposorted.last.score +end + +def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false + nodes = [] + edges = [] + nodes_by_label = {} + nodes_by_index = [] + h = JSON.parse File.new(fn).read + w = SparseVector.from_h h['weights'] + h['nodes'].each { |i| + n = Node.new i['label'], i['cat'] + nodes << n + nodes_by_label[n.label] = n + nodes_by_index << n + } + h['edges'].each { |i| + e = Hyperedge.new(nodes_by_label[i['head']], \ + i['tails'].map{|j| nodes_by_label[j]}.to_a, \ + semiring.convert.call(i['weight'].to_f), \ + {}) + e.f = SparseVector.from_h i['f'] + if log_weights + e.weight = Math.exp(w.dot(e.f)) + else + e.weight = w.dot(e.f) + end + e.tails.each { |m| + m.outgoing << e + } + e.head.incoming << e + edges << e + } + return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index +end + +def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new + toposorted = topological_sort hypergraph.nodes + paths = [[]] + toposorted.each { |n| + next if n.incoming.empty? + new_paths = [] + while !paths.empty? + p = paths.pop + n.incoming.each { |e| + new_paths << p+[e] + } + end + paths = new_paths + } + return paths +end + +end #module + diff --git a/lib/zipf/misc.rb b/lib/zipf/misc.rb index 0319a5f..4d29a06 100644 --- a/lib/zipf/misc.rb +++ b/lib/zipf/misc.rb @@ -1,6 +1,5 @@ require 'timeout' - class Array def max_index self.index(self.max) diff --git a/lib/zipf/tfidf.rb b/lib/zipf/tfidf.rb index 13a40a3..7fb92c9 100644 --- a/lib/zipf/tfidf.rb +++ b/lib/zipf/tfidf.rb @@ -1,6 +1,5 @@ module TFIDF - # returns key='raw frequency' for an # array-like object def TFIDF::tf array, stopwords=[] -- cgit v1.2.3