From f65e428784cfd2264f3fdfddd574c37acd38f54f Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sat, 27 Sep 2014 15:17:43 +0100
Subject: cosmetic changes, bump to 1.1
---
lib/zipf.rb | 3 +-
lib/zipf/SparseVector.rb | 1 -
lib/zipf/bleu.rb | 2 -
lib/zipf/dag.rb | 2 -
lib/zipf/fileutil.rb | 1 -
lib/zipf/grammar.rb | 123 ---------------------------------
lib/zipf/hg.rb | 173 -----------------------------------------------
lib/zipf/hypergraph.rb | 170 ++++++++++++++++++++++++++++++++++++++++++++++
lib/zipf/misc.rb | 1 -
lib/zipf/tfidf.rb | 1 -
10 files changed, 171 insertions(+), 306 deletions(-)
delete mode 100644 lib/zipf/grammar.rb
delete mode 100644 lib/zipf/hg.rb
create mode 100644 lib/zipf/hypergraph.rb
(limited to 'lib')
diff --git a/lib/zipf.rb b/lib/zipf.rb
index 2f59ccc..681e2cd 100755
--- a/lib/zipf.rb
+++ b/lib/zipf.rb
@@ -9,8 +9,7 @@ require 'zipf/dag'
require 'zipf/semirings'
require 'zipf/bleu'
require 'zipf/misc'
-require 'zipf/hg'
-require 'zipf/grammar'
+require 'zipf/hypergraph'
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
diff --git a/lib/zipf/SparseVector.rb b/lib/zipf/SparseVector.rb
index 3f950c4..e9af799 100644
--- a/lib/zipf/SparseVector.rb
+++ b/lib/zipf/SparseVector.rb
@@ -1,6 +1,5 @@
require 'json'
-
class SparseVector < Hash
def initialize arg=nil
diff --git a/lib/zipf/bleu.rb b/lib/zipf/bleu.rb
index 69de00b..c07315e 100644
--- a/lib/zipf/bleu.rb
+++ b/lib/zipf/bleu.rb
@@ -1,6 +1,5 @@
module BLEU
-
class BLEU::NgramCounts
attr_accessor :sum, :clipped, :ref_len, :hyp_len, :n
@@ -125,6 +124,5 @@ def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
return Math.exp logbleu
end
-
end #module
diff --git a/lib/zipf/dag.rb b/lib/zipf/dag.rb
index 45ede20..a0edde7 100644
--- a/lib/zipf/dag.rb
+++ b/lib/zipf/dag.rb
@@ -2,7 +2,6 @@ module DAG
require 'json'
-
class DAG::Node
attr_accessor :label, :outgoing, :incoming, :score, :mark
@@ -200,6 +199,5 @@ def DAG::read_graph_from_json fn, semiring=RealSemiring.new
return graph, nodes_by_label
end
-
end #module
diff --git a/lib/zipf/fileutil.rb b/lib/zipf/fileutil.rb
index eb69136..ac701da 100644
--- a/lib/zipf/fileutil.rb
+++ b/lib/zipf/fileutil.rb
@@ -1,6 +1,5 @@
require 'zlib'
-
class ReadFile
def initialize fn, encoding='utf-8'
diff --git a/lib/zipf/grammar.rb b/lib/zipf/grammar.rb
deleted file mode 100644
index 568b9fc..0000000
--- a/lib/zipf/grammar.rb
+++ /dev/null
@@ -1,123 +0,0 @@
-module Grammar
-
-
-class T
- attr_accessor :word
-
- def initialize word
- @word = word
- end
-
- def to_s
- "T<#{@word}>"
- end
-end
-
-class NT
- attr_accessor :symbol, :index, :span
-
- def initialize symbol, index=0
- @symbol = symbol
- @index = index
- @span = Span.new
- end
-
- def to_s
- "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>"
- end
-end
-
-class Rule
- attr_accessor :lhs, :rhs, :e
-
- def initialize lhs=nil, rhs=[], e=''
- @lhs = lhs
- @rhs = rhs
- @e = e
- end
-
- def to_s
- "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}] ||| #{@e}"
- end
-
- def arity
- rhs.select { |i| i.class == NT }.size
- end
-
- def from_s s
- _ = splitpipe s, 3
- @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
- _[1].split.each { |x|
- x.strip!
- if x[0]=='[' && x[x.size-1] == ']'
- @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0])
- else
- @rhs << T.new(x)
- end
- }
- @e = _[2]
- end
-
- def self.from_s s
- r = self.new
- r.from_s s
- return r
- end
-end
-
-class Span
- attr_accessor :left, :right
-
- def initialize left=nil, right=nil
- @left = left
- @right = right
- end
-end
-
-class Grammar
- attr_accessor :rules, :startn, :startt, :flat
-
- def initialize fn
- @rules = []; @startn = []; @startt = [] ;@flat = []
- ReadFile.readlines_strip(fn).each_with_index { |s,i|
- STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0
- @rules << Rule.from_s(s)
- if @rules.last.rhs.first.class == NT
- @startn << @rules.last
- else
- if rules.last.arity == 0
- @flat << @rules.last
- else
- @startt << @rules.last
- end
- end
- }
- STDERR.write "\n"
- end
-
- def to_s
- s = ''
- @rules.each { |r| s += r.to_s+"\n" }
- return s
- end
-
- def add_glue_rules
- @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol|
- @rules << Rule.new(NT.new('S'), [NT.new(symbol)])
- @startn << @rules.last
- @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
- @startn << @rules.last
- }
- end
-
- def add_pass_through_rules s
- s.each { |word|
- @rules << Rule.new(NT.new('X'), [T.new(word)])
- @flat << @rules.last
- }
- end
-end
-
-
-end #module
-
diff --git a/lib/zipf/hg.rb b/lib/zipf/hg.rb
deleted file mode 100644
index f86bf60..0000000
--- a/lib/zipf/hg.rb
+++ /dev/null
@@ -1,173 +0,0 @@
-#!/usr/bin/env ruby
-
-require_relative 'semirings'
-require 'json'
-
-
-module HG
-
-
-class HG::Node
- attr_accessor :label, :cat, :outgoing, :incoming, :score
-
- def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil
- @label = label
- @cat = cat
- @outgoing = outgoing
- @incoming = incoming
- @score = nil
- end
-
- def to_s
- "Node"
- end
-end
-
-class HG::Hypergraph
- attr_accessor :nodes, :edges
-
- def initialize nodes=[], edges=[]
- @nodes = nodes
- @edges = edges
- end
-
- def arity
- @edges.map { |e| e.arity }.max
- end
-
- def to_s
- "Hypergraph"
- end
-end
-
-class HG::Hyperedge
- attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right
-
- def initialize head=nil, tails=[], weight=0.0, f={}
- @head = head
- @tails = tails
- @weight = weight
- @f = f
- @mark = 0
- end
-
- def arity
- return @tails.size
- end
-
- def marked?
- arity == @mark
- end
-
- def to_s
- "Hyperedge"
- end
-end
-
-def HG::topological_sort nodes
- sorted = []
- s = nodes.reject { |n| !n.incoming.empty? }
- while !s.empty?
- sorted << s.shift
- sorted.last.outgoing.each { |e|
- next if e.marked?
- e.mark += 1
- s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty?
- }
- end
- return sorted
-end
-
-def HG::init nodes, semiring, root
- nodes.each { |n| n.score=semiring.null }
- root.score = semiring.one
-end
-
-def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- init toposorted, semiring, root
- toposorted.each { |n|
- n.incoming.each { |e|
- s = semiring.one
- e.tails.each { |m|
- s = semiring.multiply.call(s, m.score)
- }
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
- }
- }
-end
-
-def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- init toposorted, semiring, root
- best_path = []
- toposorted.each { |n|
- best_edge = nil
- n.incoming.each { |e|
- s = semiring.one
- e.tails.each { |m|
- s = semiring.multiply.call(s, m.score)
- }
- if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
- best_edge = e
- end
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
- }
- best_path << best_edge
- }
- return best_path, toposorted.last.score
-end
-
-def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
- nodes = []
- edges = []
- nodes_by_label = {}
- nodes_by_index = []
- h = JSON.parse File.new(fn).read
- w = SparseVector.from_h h['weights']
- h['nodes'].each { |i|
- n = Node.new i['label'], i['cat']
- nodes << n
- nodes_by_label[n.label] = n
- nodes_by_index << n
- }
- h['edges'].each { |i|
- e = Hyperedge.new(nodes_by_label[i['head']], \
- i['tails'].map{|j| nodes_by_label[j]}.to_a, \
- semiring.convert.call(i['weight'].to_f), \
- {})
- e.f = SparseVector.from_h i['f']
- if log_weights
- e.weight = Math.exp(w.dot(e.f))
- else
- e.weight = w.dot(e.f)
- end
- e.tails.each { |m|
- m.outgoing << e
- }
- e.head.incoming << e
- edges << e
- }
- return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index
-end
-
-def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- paths = [[]]
- toposorted.each { |n|
- next if n.incoming.empty?
- new_paths = []
- while !paths.empty?
- p = paths.pop
- n.incoming.each { |e|
- new_paths << p+[e]
- }
- end
- paths = new_paths
- }
- return paths
-end
-
-
-end #module
-
diff --git a/lib/zipf/hypergraph.rb b/lib/zipf/hypergraph.rb
new file mode 100644
index 0000000..6666062
--- /dev/null
+++ b/lib/zipf/hypergraph.rb
@@ -0,0 +1,170 @@
+#!/usr/bin/env ruby
+
+require_relative 'semirings'
+require 'json'
+
+module HG
+
+class HG::Node
+ attr_accessor :label, :cat, :outgoing, :incoming, :score
+
+ def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil
+ @label = label
+ @cat = cat
+ @outgoing = outgoing
+ @incoming = incoming
+ @score = nil
+ end
+
+ def to_s
+ "Node"
+ end
+end
+
+class HG::Hypergraph
+ attr_accessor :nodes, :edges
+
+ def initialize nodes=[], edges=[]
+ @nodes = nodes
+ @edges = edges
+ end
+
+ def arity
+ @edges.map { |e| e.arity }.max
+ end
+
+ def to_s
+ "Hypergraph"
+ end
+end
+
+class HG::Hyperedge
+ attr_accessor :head, :tails, :weight, :f, :mark, :rule, :left, :right
+
+ def initialize head=nil, tails=[], weight=0.0, f={}
+ @head = head
+ @tails = tails
+ @weight = weight
+ @f = f
+ @mark = 0
+ end
+
+ def arity
+ return @tails.size
+ end
+
+ def marked?
+ arity == @mark
+ end
+
+ def to_s
+ "Hyperedge"
+ end
+end
+
+def HG::topological_sort nodes
+ sorted = []
+ s = nodes.reject { |n| !n.incoming.empty? }
+ while !s.empty?
+ sorted << s.shift
+ sorted.last.outgoing.each { |e|
+ next if e.marked?
+ e.mark += 1
+ s << e.head if e.head.incoming.reject{ |f| f.mark==f.arity }.empty?
+ }
+ end
+ return sorted
+end
+
+def HG::init nodes, semiring, root
+ nodes.each { |n| n.score=semiring.null }
+ root.score = semiring.one
+end
+
+def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ toposorted.each { |n|
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ }
+end
+
+def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ init toposorted, semiring, root
+ best_path = []
+ toposorted.each { |n|
+ best_edge = nil
+ n.incoming.each { |e|
+ s = semiring.one
+ e.tails.each { |m|
+ s = semiring.multiply.call(s, m.score)
+ }
+ if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ best_edge = e
+ end
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ }
+ best_path << best_edge
+ }
+ return best_path, toposorted.last.score
+end
+
+def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
+ nodes = []
+ edges = []
+ nodes_by_label = {}
+ nodes_by_index = []
+ h = JSON.parse File.new(fn).read
+ w = SparseVector.from_h h['weights']
+ h['nodes'].each { |i|
+ n = Node.new i['label'], i['cat']
+ nodes << n
+ nodes_by_label[n.label] = n
+ nodes_by_index << n
+ }
+ h['edges'].each { |i|
+ e = Hyperedge.new(nodes_by_label[i['head']], \
+ i['tails'].map{|j| nodes_by_label[j]}.to_a, \
+ semiring.convert.call(i['weight'].to_f), \
+ {})
+ e.f = SparseVector.from_h i['f']
+ if log_weights
+ e.weight = Math.exp(w.dot(e.f))
+ else
+ e.weight = w.dot(e.f)
+ end
+ e.tails.each { |m|
+ m.outgoing << e
+ }
+ e.head.incoming << e
+ edges << e
+ }
+ return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index
+end
+
+def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new
+ toposorted = topological_sort hypergraph.nodes
+ paths = [[]]
+ toposorted.each { |n|
+ next if n.incoming.empty?
+ new_paths = []
+ while !paths.empty?
+ p = paths.pop
+ n.incoming.each { |e|
+ new_paths << p+[e]
+ }
+ end
+ paths = new_paths
+ }
+ return paths
+end
+
+end #module
+
diff --git a/lib/zipf/misc.rb b/lib/zipf/misc.rb
index 0319a5f..4d29a06 100644
--- a/lib/zipf/misc.rb
+++ b/lib/zipf/misc.rb
@@ -1,6 +1,5 @@
require 'timeout'
-
class Array
def max_index
self.index(self.max)
diff --git a/lib/zipf/tfidf.rb b/lib/zipf/tfidf.rb
index 13a40a3..7fb92c9 100644
--- a/lib/zipf/tfidf.rb
+++ b/lib/zipf/tfidf.rb
@@ -1,6 +1,5 @@
module TFIDF
-
# returns key='raw frequency' for an
# array-like object
def TFIDF::tf array, stopwords=[]
--
cgit v1.2.3