" and not in_p
+ in_p = true
+ collect = []
+ next
+ elsif line.downcase == "
" and in_p
+ if collect.size > 0
+ puts collect.join(" ").strip
+ end
+ in_p = false
+ next
+ elsif in_p
+ collect.push line
+ next
+ else
+ puts line
+ end
+end
+
diff --git a/gigaword_collapse_tags b/gigaword_collapse_tags
deleted file mode 100755
index cbaf7d7..0000000
--- a/gigaword_collapse_tags
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env ruby
-
-# works with gigaword en v5
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-in_p = false
-in_dateline = false
-collect = []
-
-while line = STDIN.gets
- line.strip!
- if line.downcase == ""
- in_dateline = true
- next
- elsif line.downcase == ""
- in_dateline = false
- next
- elsif in_dateline
- next
- elsif line.downcase == "
" and not in_p
- in_p = true
- collect = []
- next
- elsif line.downcase == "
" and in_p
- if collect.size > 0
- puts collect.join(" ").strip
- end
- in_p = false
- next
- elsif in_p
- collect.push line
- next
- else
- puts line
- end
-end
-
diff --git a/hadoop-uniq b/hadoop-uniq
new file mode 100755
index 0000000..5052419
--- /dev/null
+++ b/hadoop-uniq
@@ -0,0 +1,11 @@
+#!/bin/zsh
+
+HADOOP_HOME=/usr/lib/hadoop
+
+$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
+ -D mapred.reduce.tasks=98 \
+ -input d \
+ -output d.uniq \
+ -mapper 'cut -d " " -f 1' \
+ -reducer /usr/bin/uniq
+
diff --git a/hadoop_uniq b/hadoop_uniq
deleted file mode 100755
index 5052419..0000000
--- a/hadoop_uniq
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/zsh
-
-HADOOP_HOME=/usr/lib/hadoop
-
-$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
- -D mapred.reduce.tasks=98 \
- -input d \
- -output d.uniq \
- -mapper 'cut -d " " -f 1' \
- -reducer /usr/bin/uniq
-
diff --git a/kbest-bleu-oracles b/kbest-bleu-oracles
new file mode 100755
index 0000000..a36c345
--- /dev/null
+++ b/kbest-bleu-oracles
@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+def get_context kbest_lists, references, n
+ a = []
+ kbest_lists.each_index { |i|
+ a << BLEU::get_counts(kbest_lists[i][0].s, references[i], n, 1)
+ }
+ return a
+end
+
+def main
+ conf = Trollop::options do
+ opt :kbest_lists, "kbest lists", :type => :string, :required => true
+ opt :references, "reference", :type => :string, :required => true
+ opt :n, "N for BLEU", :type => :int, :default => 4
+ opt :weight, "how much to weigh single translations", :type => :int, :default => 1
+ opt :debug, "debug mode", :type => :bool, :default => false
+ end
+ debug = conf[:debug]
+ n = conf[:n]
+ kbest_lists = read_kbest_lists conf[:kbest_lists]
+ references = ReadFile.readlines_strip conf[:references]
+ context = get_context kbest_lists, references, n
+ kbest_lists.each_with_index { |kbest,j|
+ scores = []
+ max_score = -1.0/0
+ max_idx = -1
+ kbest.each_index { |i|
+ context_cp = context.dup
+ context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight]
+ score = BLEU::hbleu_(context_cp, n, debug)
+ scores << score
+ if score > max_score
+ max_score = score
+ max_idx = i
+ end
+ STDERR.write "#{i} #{kbest[i]}\t#{score}\n---\n" if debug
+ }
+ puts "#{references[j]}"
+ puts "BLEU=#{scores[0]} ||| #{kbest[0]}"
+ puts "BLEU=#{max_score} ||| #{kbest[max_idx]}"
+ puts
+ STDERR.write "\n" if debug
+ }
+end
+
+main
+
diff --git a/kbest_bleu_oracles b/kbest_bleu_oracles
deleted file mode 100755
index a36c345..0000000
--- a/kbest_bleu_oracles
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def get_context kbest_lists, references, n
- a = []
- kbest_lists.each_index { |i|
- a << BLEU::get_counts(kbest_lists[i][0].s, references[i], n, 1)
- }
- return a
-end
-
-def main
- conf = Trollop::options do
- opt :kbest_lists, "kbest lists", :type => :string, :required => true
- opt :references, "reference", :type => :string, :required => true
- opt :n, "N for BLEU", :type => :int, :default => 4
- opt :weight, "how much to weigh single translations", :type => :int, :default => 1
- opt :debug, "debug mode", :type => :bool, :default => false
- end
- debug = conf[:debug]
- n = conf[:n]
- kbest_lists = read_kbest_lists conf[:kbest_lists]
- references = ReadFile.readlines_strip conf[:references]
- context = get_context kbest_lists, references, n
- kbest_lists.each_with_index { |kbest,j|
- scores = []
- max_score = -1.0/0
- max_idx = -1
- kbest.each_index { |i|
- context_cp = context.dup
- context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight]
- score = BLEU::hbleu_(context_cp, n, debug)
- scores << score
- if score > max_score
- max_score = score
- max_idx = i
- end
- STDERR.write "#{i} #{kbest[i]}\t#{score}\n---\n" if debug
- }
- puts "#{references[j]}"
- puts "BLEU=#{scores[0]} ||| #{kbest[0]}"
- puts "BLEU=#{max_score} ||| #{kbest[max_idx]}"
- puts
- STDERR.write "\n" if debug
- }
-end
-
-main
-
diff --git a/kendalls-tau b/kendalls-tau
new file mode 100755
index 0000000..c0c20be
--- /dev/null
+++ b/kendalls-tau
@@ -0,0 +1,75 @@
+#!/usr/bin/env ruby
+
+#################################################
+# reads space delimted pairs of scores as input,
+# outputs Kendall's τ
+#################################################
+
+def kendall_with_ties l
+ concordant = 0
+ disconcordant = 0
+ tie_a = 0
+ tie_b = 0
+ l.each_with_index { |k,i|
+ l[i+1,l.size].each_with_index { |m,j|
+ if (k.first < m.first && k[1] < m[1]) ||
+ (k.first > m.first && k[1] > m[1])
+ concordant += 1
+ elsif (k.first == m.first && k[1] != m[1])
+ tie_a += 1
+ elsif (k.first != m.first && k[1] == m[1])
+ tie_b += 1
+ else
+ disconcordant += 1
+ end
+ }
+ }
+
+ return (concordant-disconcordant)/(Math.sqrt((concordant+disconcordant+tie_a)*(concordant+disconcordant+tie_b)))
+end
+
+def kendall l
+ concordant = 0
+ disconcordant = 0
+ l.each_with_index { |k,i|
+ l[i+1,l.size].each_with_index { |m,j|
+ if (k.first <= m.first && k[1] <= m[1]) ||
+ (k.first >= m.first && k[1] >= m[1])
+ concordant += 1
+ else
+ disconcordant += 1
+ end
+ }
+ }
+
+ return (concordant-disconcordant)/(0.5 * l.size * (l.size-1))
+end
+
+def has_ties? l
+ if l.map{ |p| p[1] }.uniq.size != l.size ||
+ l.map{ |p| p[2] }.uniq.size != l.size
+ return true
+ end
+
+ return false
+end
+
+def main
+ l = []
+ while line = STDIN.gets
+ a,b = line.split
+ l << [a.to_f, b.to_f]
+ end
+
+ v = -1
+ if has_ties? l
+ v = kendall_with_ties l
+ else
+ v = kendall l
+ end
+
+ puts v
+end
+
+main
+
diff --git a/kendalls_tau b/kendalls_tau
deleted file mode 100755
index c0c20be..0000000
--- a/kendalls_tau
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env ruby
-
-#################################################
-# reads space delimted pairs of scores as input,
-# outputs Kendall's τ
-#################################################
-
-def kendall_with_ties l
- concordant = 0
- disconcordant = 0
- tie_a = 0
- tie_b = 0
- l.each_with_index { |k,i|
- l[i+1,l.size].each_with_index { |m,j|
- if (k.first < m.first && k[1] < m[1]) ||
- (k.first > m.first && k[1] > m[1])
- concordant += 1
- elsif (k.first == m.first && k[1] != m[1])
- tie_a += 1
- elsif (k.first != m.first && k[1] == m[1])
- tie_b += 1
- else
- disconcordant += 1
- end
- }
- }
-
- return (concordant-disconcordant)/(Math.sqrt((concordant+disconcordant+tie_a)*(concordant+disconcordant+tie_b)))
-end
-
-def kendall l
- concordant = 0
- disconcordant = 0
- l.each_with_index { |k,i|
- l[i+1,l.size].each_with_index { |m,j|
- if (k.first <= m.first && k[1] <= m[1]) ||
- (k.first >= m.first && k[1] >= m[1])
- concordant += 1
- else
- disconcordant += 1
- end
- }
- }
-
- return (concordant-disconcordant)/(0.5 * l.size * (l.size-1))
-end
-
-def has_ties? l
- if l.map{ |p| p[1] }.uniq.size != l.size ||
- l.map{ |p| p[2] }.uniq.size != l.size
- return true
- end
-
- return false
-end
-
-def main
- l = []
- while line = STDIN.gets
- a,b = line.split
- l << [a.to_f, b.to_f]
- end
-
- v = -1
- if has_ties? l
- v = kendall_with_ties l
- else
- v = kendall l
- end
-
- puts v
-end
-
-main
-
diff --git a/key-count b/key-count
new file mode 100755
index 0000000..deaa522
--- /dev/null
+++ b/key-count
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+h = {}
+h.default = 0
+while line = STDIN.gets
+ line.strip!
+ h[line] += 1
+end
+
+h.each_pair { |k,v| puts "#{k} #{v}" }
+
diff --git a/key_count b/key_count
deleted file mode 100755
index deaa522..0000000
--- a/key_count
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-h = {}
-h.default = 0
-while line = STDIN.gets
- line.strip!
- h[line] += 1
-end
-
-h.each_pair { |k,v| puts "#{k} #{v}" }
-
diff --git a/lin-reg b/lin-reg
new file mode 100755
index 0000000..7a8e614
--- /dev/null
+++ b/lin-reg
@@ -0,0 +1,70 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+def read_data fn, scale
+ f = ReadFile.new fn
+ data = []
+ while line = f.gets
+ line.strip!
+ a = []
+ a << 1.0
+ tokenize(line).each { |i| a << i.to_f }
+ v = SparseVector.from_a a
+ data << v
+ end
+ if scale
+ data.map { |i| i.keys }.flatten.uniq.each { |k|
+ max = data.map { |i| i[k] }.max
+ data.each { |i| i[k] /= max }
+ }
+ end
+ return data
+end
+
+def main
+ conf = Trollop::options do
+ opt :input, "input data", :type => :string, :required => true
+ opt :output, "output data", :type => :string, :required => true
+ opt :learning_rate, "learning rate", :type => :float, :default => 0.07
+ opt :stop, "stopping criterion", :type => :int, :default => 100
+ opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t'
+ opt :show_loss, "show loss per iter", :type => :bool, :default => false
+ end
+ data = read_data conf[:input], conf[:scale_features]
+ zeros = [0.0]*data[0].size
+ t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
+ model = SparseVector.new zeros
+ stop = 0
+ prev_model = nil
+ i = 0
+ while true
+ i += 1
+ u = SparseVector.new zeros
+ overall_loss = 0.0
+ data.each_with_index { |x,j|
+ loss = model.dot(x) - t[j]
+ overall_loss += loss**2
+ u += x * loss
+ }
+ STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss]
+ u *= conf[:learning_rate]*(1.0/t.size)
+ model -= u
+ if model.approx_eql? prev_model
+ stop += 1
+ else
+ stop = 0
+ end
+ break if stop==conf[:stop]
+ prev_model = model
+ end
+ tss = t.map{ |y| (y-t.mean)**2 }.sum
+ j = -1
+ rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum
+ STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n"
+ puts model.to_s
+end
+
+main
+
diff --git a/lin_reg b/lin_reg
deleted file mode 100755
index 7a8e614..0000000
--- a/lin_reg
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def read_data fn, scale
- f = ReadFile.new fn
- data = []
- while line = f.gets
- line.strip!
- a = []
- a << 1.0
- tokenize(line).each { |i| a << i.to_f }
- v = SparseVector.from_a a
- data << v
- end
- if scale
- data.map { |i| i.keys }.flatten.uniq.each { |k|
- max = data.map { |i| i[k] }.max
- data.each { |i| i[k] /= max }
- }
- end
- return data
-end
-
-def main
- conf = Trollop::options do
- opt :input, "input data", :type => :string, :required => true
- opt :output, "output data", :type => :string, :required => true
- opt :learning_rate, "learning rate", :type => :float, :default => 0.07
- opt :stop, "stopping criterion", :type => :int, :default => 100
- opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t'
- opt :show_loss, "show loss per iter", :type => :bool, :default => false
- end
- data = read_data conf[:input], conf[:scale_features]
- zeros = [0.0]*data[0].size
- t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
- model = SparseVector.new zeros
- stop = 0
- prev_model = nil
- i = 0
- while true
- i += 1
- u = SparseVector.new zeros
- overall_loss = 0.0
- data.each_with_index { |x,j|
- loss = model.dot(x) - t[j]
- overall_loss += loss**2
- u += x * loss
- }
- STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss]
- u *= conf[:learning_rate]*(1.0/t.size)
- model -= u
- if model.approx_eql? prev_model
- stop += 1
- else
- stop = 0
- end
- break if stop==conf[:stop]
- prev_model = model
- end
- tss = t.map{ |y| (y-t.mean)**2 }.sum
- j = -1
- rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum
- STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n"
- puts model.to_s
-end
-
-main
-
diff --git a/log-reg b/log-reg
new file mode 100755
index 0000000..82dc353
--- /dev/null
+++ b/log-reg
@@ -0,0 +1,71 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'matrix'
+require 'trollop'
+
+def read_data fn
+ f = ReadFile.new fn
+ data = []
+ while line = f.gets
+ line.strip!
+ a = []
+ a << 1.0
+ tokenize(line).each { |i| a << i.to_f }
+ v = Vector.elements a
+ data << v
+ end
+ return data
+end
+
+def dot x, y
+ r = 0.0
+ x.each_with_index { |_,j|
+ r += x[j] * y[j]
+ }
+ return r
+end
+
+def approx_eql x, y, eps=10**-10
+ return false if !x||!y
+ return false if x.size!=y.size
+ x.each_with_index { |_,i|
+ return false if (x[i]-y[i]).abs>eps
+ }
+ return true
+end
+
+def main
+ conf = Trollop::options do
+ opt :input, "input data", :type => :string, :required => true
+ opt :output, "1/0 output data", :type => :string, :required => true
+ end
+ data = read_data conf[:input]
+ dim = data[0].size
+ zeros = [0.0]*dim
+ t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
+ model = Vector.elements zeros
+ prev_model = nil
+ gradient = Vector.elements zeros
+ hessian = Matrix.build(dim,dim) { |i,j| 0.0 }
+ i = 0
+ while true
+ i += 1
+ data.each_with_index { |x,j|
+ m = 1.0/(1+Math.exp(-dot(model, x)))
+ gradient += (m-t[j]) * x
+ hup = Matrix.column_vector(x) * Matrix.row_vector(x)
+ hessian += m*(1.0-m) * hup
+ }
+ gradient /= data.size
+ hessian /= data.size
+ model -= hessian.inverse * gradient
+ break if approx_eql model, prev_model
+ prev_model = model
+ end
+ STDERR.write "ran for #{i} iterations\n"
+ puts model.to_s
+end
+
+main
+
diff --git a/log_reg b/log_reg
deleted file mode 100755
index 82dc353..0000000
--- a/log_reg
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'matrix'
-require 'trollop'
-
-def read_data fn
- f = ReadFile.new fn
- data = []
- while line = f.gets
- line.strip!
- a = []
- a << 1.0
- tokenize(line).each { |i| a << i.to_f }
- v = Vector.elements a
- data << v
- end
- return data
-end
-
-def dot x, y
- r = 0.0
- x.each_with_index { |_,j|
- r += x[j] * y[j]
- }
- return r
-end
-
-def approx_eql x, y, eps=10**-10
- return false if !x||!y
- return false if x.size!=y.size
- x.each_with_index { |_,i|
- return false if (x[i]-y[i]).abs>eps
- }
- return true
-end
-
-def main
- conf = Trollop::options do
- opt :input, "input data", :type => :string, :required => true
- opt :output, "1/0 output data", :type => :string, :required => true
- end
- data = read_data conf[:input]
- dim = data[0].size
- zeros = [0.0]*dim
- t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
- model = Vector.elements zeros
- prev_model = nil
- gradient = Vector.elements zeros
- hessian = Matrix.build(dim,dim) { |i,j| 0.0 }
- i = 0
- while true
- i += 1
- data.each_with_index { |x,j|
- m = 1.0/(1+Math.exp(-dot(model, x)))
- gradient += (m-t[j]) * x
- hup = Matrix.column_vector(x) * Matrix.row_vector(x)
- hessian += m*(1.0-m) * hup
- }
- gradient /= data.size
- hessian /= data.size
- model -= hessian.inverse * gradient
- break if approx_eql model, prev_model
- prev_model = model
- end
- STDERR.write "ran for #{i} iterations\n"
- puts model.to_s
-end
-
-main
-
diff --git a/make-rule-features b/make-rule-features
new file mode 100755
index 0000000..7adb6e9
--- /dev/null
+++ b/make-rule-features
@@ -0,0 +1,44 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+def mkrf src, tgt
+ s = src.gsub /\[X,[1-9]\]/, "NX"
+ t = tgt.gsub /\[X,([1-9])\]/,'N\1'
+ return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}"
+end
+
+def mkrbf s, t
+ s = String.new s
+ if t == "S"
+ s.gsub! /\[X,[1-9]\]/, "X"
+ else
+ s.gsub! /\[X,([1-9])\]/, 'X\1'
+ end
+ s.reverse!
+ s += " >r<"
+ s.reverse!
+ s += " "
+ a = []
+ ngrams(s, 2, true) { |ng|
+ a << "RB#{t}:#{ng.join "_"}"
+ }
+ return a
+end
+
+h = {}
+while line = STDIN.gets
+ _,src,tgt,_,_ = splitpipe line.strip
+ src.strip!
+ tgt.strip!
+ mkrbf(src, "S").each { |f|
+ h[f] = true
+ }
+ mkrbf(tgt, "T").each { |f|
+ h[f] = true
+ }
+ h [mkrf(src, tgt)] = true
+end
+
+h.keys.each { |f| puts f }
+
diff --git a/make_rule_features b/make_rule_features
deleted file mode 100755
index 7adb6e9..0000000
--- a/make_rule_features
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-def mkrf src, tgt
- s = src.gsub /\[X,[1-9]\]/, "NX"
- t = tgt.gsub /\[X,([1-9])\]/,'N\1'
- return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}"
-end
-
-def mkrbf s, t
- s = String.new s
- if t == "S"
- s.gsub! /\[X,[1-9]\]/, "X"
- else
- s.gsub! /\[X,([1-9])\]/, 'X\1'
- end
- s.reverse!
- s += " >r<"
- s.reverse!
- s += " "
- a = []
- ngrams(s, 2, true) { |ng|
- a << "RB#{t}:#{ng.join "_"}"
- }
- return a
-end
-
-h = {}
-while line = STDIN.gets
- _,src,tgt,_,_ = splitpipe line.strip
- src.strip!
- tgt.strip!
- mkrbf(src, "S").each { |f|
- h[f] = true
- }
- mkrbf(tgt, "T").each { |f|
- h[f] = true
- }
- h [mkrf(src, tgt)] = true
-end
-
-h.keys.each { |f| puts f }
-
diff --git a/max-len b/max-len
new file mode 100755
index 0000000..69013b5
--- /dev/null
+++ b/max-len
@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+max = ARGV[0].to_i
+
+i = 0
+while line = STDIN.gets
+ if tokenize(line).size <= max
+ puts i
+ else
+ STDERR.write line
+ end
+ i += 1
+end
+
diff --git a/max_len b/max_len
deleted file mode 100755
index 69013b5..0000000
--- a/max_len
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-max = ARGV[0].to_i
-
-i = 0
-while line = STDIN.gets
- if tokenize(line).size <= max
- puts i
- else
- STDERR.write line
- end
- i += 1
-end
-
diff --git a/mem-usage b/mem-usage
new file mode 100755
index 0000000..5c2104f
--- /dev/null
+++ b/mem-usage
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+"$@" &
+pid=$! peak=0
+while true; do
+ sleep 1
+ sample="$(ps -o rss= $pid 2> /dev/null)" || break
+ let peak='sample > peak ? sample : peak'
+done
+echo "$(( ${peak%% *} / 1024)) m"
+
diff --git a/mem_usage b/mem_usage
deleted file mode 100755
index 5c2104f..0000000
--- a/mem_usage
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-"$@" &
-pid=$! peak=0
-while true; do
- sleep 1
- sample="$(ps -o rss= $pid 2> /dev/null)" || break
- let peak='sample > peak ? sample : peak'
-done
-echo "$(( ${peak%% *} / 1024)) m"
-
diff --git a/merge-files b/merge-files
new file mode 100755
index 0000000..714b57d
--- /dev/null
+++ b/merge-files
@@ -0,0 +1,31 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+def usage
+ STDERR.write "merge_files +\n"
+ exit 1
+end
+usage if ARGV.size==0
+
+files = ARGV
+hashes = []
+
+files.each { |i|
+ hashes.push Hash.new
+ hashes.last.default = 0
+ f = ReadFile.new i
+ while line = f.gets
+ hashes.last[line.strip] += 1
+ end
+ f.close
+}
+
+hashes.each { |h|
+ h.each { |k,v|
+ counts = []
+ hashes.each { |j| counts.push j[k]; j.delete k }
+ counts.max.times { puts k }
+ }
+}
+
diff --git a/merge-ttable b/merge-ttable
new file mode 100755
index 0000000..ac10903
--- /dev/null
+++ b/merge-ttable
@@ -0,0 +1,34 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+def main
+ conf = Trollop::options do
+ opt :f, "f files", :type => :string, :required => true
+ opt :e, "e files", :type => :string, :required => true
+ end
+
+ f_files = conf[:f].split
+ e_files = conf[:e].split
+
+ h = {}
+ f_files.each_with_index { |fn,i|
+ fa = ReadFile.readlines_strip fn
+ ea = ReadFile.readlines_strip e_files[i]
+ fa.each_with_index { |fw,j|
+ if h.has_key? fw
+ h[fw] << ea[j]
+ else
+ h[fw] = [ea[j]]
+ end
+ }
+ }
+
+ h.each_pair { |f,ea|
+ puts "#{f}\t#{ea.first}"
+ }
+end
+
+main
+
diff --git a/merge_files b/merge_files
deleted file mode 100755
index 714b57d..0000000
--- a/merge_files
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-def usage
- STDERR.write "merge_files +\n"
- exit 1
-end
-usage if ARGV.size==0
-
-files = ARGV
-hashes = []
-
-files.each { |i|
- hashes.push Hash.new
- hashes.last.default = 0
- f = ReadFile.new i
- while line = f.gets
- hashes.last[line.strip] += 1
- end
- f.close
-}
-
-hashes.each { |h|
- h.each { |k,v|
- counts = []
- hashes.each { |j| counts.push j[k]; j.delete k }
- counts.max.times { puts k }
- }
-}
-
diff --git a/merge_ttable b/merge_ttable
deleted file mode 100755
index ac10903..0000000
--- a/merge_ttable
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def main
- conf = Trollop::options do
- opt :f, "f files", :type => :string, :required => true
- opt :e, "e files", :type => :string, :required => true
- end
-
- f_files = conf[:f].split
- e_files = conf[:e].split
-
- h = {}
- f_files.each_with_index { |fn,i|
- fa = ReadFile.readlines_strip fn
- ea = ReadFile.readlines_strip e_files[i]
- fa.each_with_index { |fw,j|
- if h.has_key? fw
- h[fw] << ea[j]
- else
- h[fw] = [ea[j]]
- end
- }
- }
-
- h.each_pair { |f,ea|
- puts "#{f}\t#{ea.first}"
- }
-end
-
-main
-
diff --git a/min-max b/min-max
new file mode 100755
index 0000000..1dbfd40
--- /dev/null
+++ b/min-max
@@ -0,0 +1,40 @@
+#!/usr/bin/ruby
+
+require 'zipf'
+require 'trollop'
+
+conf = Trollop::options do
+ opt :min, "minimum #tokens", :type => :int, :default => 1
+ opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n'
+ opt :in_f, "input 'French' file", :type => :string, :required => true
+ opt :in_e, "input 'English' file", :type => :string, :required => true
+ opt :out_f, "output 'French' file", :type => :string, :required => true
+ opt :out_e, "output 'English' file", :type => :string, :required => true
+ opt :out_id, "output line Nos", :type => :string, :required => true
+end
+
+
+files = {}
+files[:f_file] = ReadFile.new conf[:in_f]
+files[:e_file] = ReadFile.new conf[:in_e]
+files[:f_out_file] = WriteFile.new conf[:out_f]
+files[:e_out_file] = WriteFile.new conf[:out_e]
+files[:id_out_file] = WriteFile.new conf[:out_id]
+i = 0
+while f_line = files[:f_file].gets
+ e_line = files[:e_file].gets
+ f_line.strip!
+ e_line.strip!
+ a = f_line.split
+ b = e_line.split
+ if a.size >= conf[:min] and a.size <= conf[:max] and \
+ b.size >= conf[:min] and b.size <= conf[:max]
+ files[:f_out_file].write "#{f_line}\n"
+ files[:e_out_file].write "#{e_line}\n"
+ files[:id_out_file].write "#{i}\n"
+ end
+ i+=1
+end
+
+files.values.each{ |f| f.close }
+
diff --git a/min_max b/min_max
deleted file mode 100755
index 1dbfd40..0000000
--- a/min_max
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/ruby
-
-require 'zipf'
-require 'trollop'
-
-conf = Trollop::options do
- opt :min, "minimum #tokens", :type => :int, :default => 1
- opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n'
- opt :in_f, "input 'French' file", :type => :string, :required => true
- opt :in_e, "input 'English' file", :type => :string, :required => true
- opt :out_f, "output 'French' file", :type => :string, :required => true
- opt :out_e, "output 'English' file", :type => :string, :required => true
- opt :out_id, "output line Nos", :type => :string, :required => true
-end
-
-
-files = {}
-files[:f_file] = ReadFile.new conf[:in_f]
-files[:e_file] = ReadFile.new conf[:in_e]
-files[:f_out_file] = WriteFile.new conf[:out_f]
-files[:e_out_file] = WriteFile.new conf[:out_e]
-files[:id_out_file] = WriteFile.new conf[:out_id]
-i = 0
-while f_line = files[:f_file].gets
- e_line = files[:e_file].gets
- f_line.strip!
- e_line.strip!
- a = f_line.split
- b = e_line.split
- if a.size >= conf[:min] and a.size <= conf[:max] and \
- b.size >= conf[:min] and b.size <= conf[:max]
- files[:f_out_file].write "#{f_line}\n"
- files[:e_out_file].write "#{e_line}\n"
- files[:id_out_file].write "#{i}\n"
- end
- i+=1
-end
-
-files.values.each{ |f| f.close }
-
diff --git a/moses-1best b/moses-1best
new file mode 100755
index 0000000..fd35cf8
--- /dev/null
+++ b/moses-1best
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+prev_idx = nil
+while line = STDIN.gets
+ line.strip!
+ idx = splitpipe(line)[0].to_i
+ if idx != prev_idx
+ puts line
+ prev_idx = idx
+ end
+end
+
diff --git a/moses_1best b/moses_1best
deleted file mode 100755
index fd35cf8..0000000
--- a/moses_1best
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-prev_idx = nil
-while line = STDIN.gets
- line.strip!
- idx = splitpipe(line)[0].to_i
- if idx != prev_idx
- puts line
- prev_idx = idx
- end
-end
-
diff --git a/no-empty b/no-empty
new file mode 100755
index 0000000..da57e23
--- /dev/null
+++ b/no-empty
@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+files = []
+(0..1).each { |i| files << ReadFile.new(ARGV[i]) }
+(2..3).each { |i| files << WriteFile.new(ARGV[i]) }
+
+while line_f = files[0].gets
+ line_e = files[1].gets
+ line_f.strip!; line_e.strip!
+ next if line_f=='' || line_e==''
+ files[2].write line_f+"\n"
+ files[3].write line_e+"\n"
+end
+
+files.each { |f| f.close }
+
diff --git a/no-non-printables b/no-non-printables
new file mode 100755
index 0000000..9f9e3f9
--- /dev/null
+++ b/no-non-printables
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+sed 's/\xEF\xBB\xBF//g' | sed 's/\xEF\xB7\x93//g' | sed 's/[[:cntrl:]]//g'
+
diff --git a/no_empty b/no_empty
deleted file mode 100755
index da57e23..0000000
--- a/no_empty
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-files = []
-(0..1).each { |i| files << ReadFile.new(ARGV[i]) }
-(2..3).each { |i| files << WriteFile.new(ARGV[i]) }
-
-while line_f = files[0].gets
- line_e = files[1].gets
- line_f.strip!; line_e.strip!
- next if line_f=='' || line_e==''
- files[2].write line_f+"\n"
- files[3].write line_e+"\n"
-end
-
-files.each { |f| f.close }
-
diff --git a/no_non_printables b/no_non_printables
deleted file mode 100755
index 9f9e3f9..0000000
--- a/no_non_printables
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-sed 's/\xEF\xBB\xBF//g' | sed 's/\xEF\xB7\x93//g' | sed 's/[[:cntrl:]]//g'
-
diff --git a/norm-german b/norm-german
new file mode 100755
index 0000000..cf9c060
--- /dev/null
+++ b/norm-german
@@ -0,0 +1,87 @@
+#!/usr/bin/env ruby
+
+require 'thread'
+require 'trollop'
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+
+conf = Trollop::options do
+ banner "norm_german < "
+ opt :upper, "uppercase", :type => :bool, :default => false
+ opt :threads, "#threads", :type => :int, :default => 1, :short => '-h'
+ opt :shard_size, "shard size", :type => :int, :default => 1000
+ opt :train, "train", :type => :bool
+ opt :apply, "apply", :type => :bool
+end
+
+pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ]
+pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ]
+if conf[:upper]
+ PAIRS = pairs_lower
+else
+ PAIRS = pairs_lower+pairs_upper
+end
+
+def get_key(old, new)
+ PAIRS.each { |i|
+ return old if new.gsub(i[0], i[1])==old
+ return old if new.gsub(i[1], i[0])==old
+ }
+ return nil
+end
+
+def build_partial(tokens)
+ h = {}
+ tokens.each { |tok|
+ found = false
+ h.keys.each { |i|
+ if get_key i, tok
+ h[i] << tok
+ found = true
+ break
+ end
+ }
+ h[tok] = [tok] if !found
+ }
+ return h
+end
+
+h = {}
+threads = []
+thread_n = 0
+counter = 0
+token_stock = []
+mutex = Mutex.new
+while tok = STDIN.gets
+ token_stock << [] if !token_stock[thread_n]
+ token_stock[thread_n] << tok.strip!
+ counter += 1
+ if token_stock[thread_n].size%conf[:shard_size]==0
+ STDERR.write "Starting thread ##{thread_n}\n"
+ threads << Thread.new(token_stock[thread_n]) { |tokens|
+ th = build_partial tokens
+ mutex.synchronize do
+ h.merge! th
+ end
+ }
+ threads.last.abort_on_exception = true
+ thread_n += 1
+ else
+ next
+ end
+ if thread_n==conf[:threads]
+ threads.each { |i| i.join }
+ token_stock.each { |i| i.clear }
+ thread_n = 0
+ end
+ STDERR.write "#keys #{h.keys.size}\n"
+end
+
+token_stock.each { |i|
+ if i.size!=0
+ h.merge! build_partial i
+ end
+}
+
diff --git a/norm-hyphens b/norm-hyphens
new file mode 100755
index 0000000..4a152a1
--- /dev/null
+++ b/norm-hyphens
@@ -0,0 +1,4 @@
+#!/bin/zsh -x
+
+sed "s|[ \t]\+\xc2\xad[ \t]\+||g"
+
diff --git a/norm_german b/norm_german
deleted file mode 100755
index cf9c060..0000000
--- a/norm_german
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'thread'
-require 'trollop'
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-
-conf = Trollop::options do
- banner "norm_german < "
- opt :upper, "uppercase", :type => :bool, :default => false
- opt :threads, "#threads", :type => :int, :default => 1, :short => '-h'
- opt :shard_size, "shard size", :type => :int, :default => 1000
- opt :train, "train", :type => :bool
- opt :apply, "apply", :type => :bool
-end
-
-pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ]
-pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ]
-if conf[:upper]
- PAIRS = pairs_lower
-else
- PAIRS = pairs_lower+pairs_upper
-end
-
-def get_key(old, new)
- PAIRS.each { |i|
- return old if new.gsub(i[0], i[1])==old
- return old if new.gsub(i[1], i[0])==old
- }
- return nil
-end
-
-def build_partial(tokens)
- h = {}
- tokens.each { |tok|
- found = false
- h.keys.each { |i|
- if get_key i, tok
- h[i] << tok
- found = true
- break
- end
- }
- h[tok] = [tok] if !found
- }
- return h
-end
-
-h = {}
-threads = []
-thread_n = 0
-counter = 0
-token_stock = []
-mutex = Mutex.new
-while tok = STDIN.gets
- token_stock << [] if !token_stock[thread_n]
- token_stock[thread_n] << tok.strip!
- counter += 1
- if token_stock[thread_n].size%conf[:shard_size]==0
- STDERR.write "Starting thread ##{thread_n}\n"
- threads << Thread.new(token_stock[thread_n]) { |tokens|
- th = build_partial tokens
- mutex.synchronize do
- h.merge! th
- end
- }
- threads.last.abort_on_exception = true
- thread_n += 1
- else
- next
- end
- if thread_n==conf[:threads]
- threads.each { |i| i.join }
- token_stock.each { |i| i.clear }
- thread_n = 0
- end
- STDERR.write "#keys #{h.keys.size}\n"
-end
-
-token_stock.each { |i|
- if i.size!=0
- h.merge! build_partial i
- end
-}
-
diff --git a/norm_hyphens b/norm_hyphens
deleted file mode 100755
index 4a152a1..0000000
--- a/norm_hyphens
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh -x
-
-sed "s|[ \t]\+\xc2\xad[ \t]\+||g"
-
diff --git a/normalize-punctuation b/normalize-punctuation
new file mode 100755
index 0000000..108de44
--- /dev/null
+++ b/normalize-punctuation
@@ -0,0 +1,46 @@
+#!/usr/bin/perl -w
+# adapted from the moses scripts
+
+use strict;
+
+my ($language) = @ARGV;
+
+while() {
+ s/\r//g;
+ # normalize unicode punctuation
+ s/„/\"/g;
+ s/“/\"/g;
+ s/”/\"/g;
+ s/–/-/g;
+ s/—/ - /g; s/ +/ /g;
+ s/´/\'/g;
+ s/([a-z])‘([a-z])/$1\'$2/gi;
+ s/([a-z])’([a-z])/$1\'$2/gi;
+ s/‘/\"/g;
+ s/‚/\"/g;
+ s/’/\"/g;
+ s/''/\"/g;
+ s/´´/\"/g;
+ s/…/.../g;
+ # French quotes
+ s/ « / \"/g;
+ s/« /\"/g;
+ s/«/\"/g;
+ s/ » /\" /g;
+ s/ »/\"/g;
+ s/»/\"/g;
+ # handle pseudo-spaces
+ s/ \%/\%/g;
+ s/nº /nº /g;
+ s/ :/:/g;
+ s/ ºC/ ºC/g;
+ s/ cm/ cm/g;
+ s/ \?/\?/g;
+ s/ \!/\!/g;
+ s/ ;/;/g;
+ s/, /, /g; s/ +/ /g;
+
+ print STDERR $_ if //;
+
+ print $_;
+}
diff --git a/normalize_punctuation b/normalize_punctuation
deleted file mode 100755
index 108de44..0000000
--- a/normalize_punctuation
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/perl -w
-# adapted from the moses scripts
-
-use strict;
-
-my ($language) = @ARGV;
-
-while() {
- s/\r//g;
- # normalize unicode punctuation
- s/„/\"/g;
- s/“/\"/g;
- s/”/\"/g;
- s/–/-/g;
- s/—/ - /g; s/ +/ /g;
- s/´/\'/g;
- s/([a-z])‘([a-z])/$1\'$2/gi;
- s/([a-z])’([a-z])/$1\'$2/gi;
- s/‘/\"/g;
- s/‚/\"/g;
- s/’/\"/g;
- s/''/\"/g;
- s/´´/\"/g;
- s/…/.../g;
- # French quotes
- s/ « / \"/g;
- s/« /\"/g;
- s/«/\"/g;
- s/ » /\" /g;
- s/ »/\"/g;
- s/»/\"/g;
- # handle pseudo-spaces
- s/ \%/\%/g;
- s/nº /nº /g;
- s/ :/:/g;
- s/ ºC/ ºC/g;
- s/ cm/ cm/g;
- s/ \?/\?/g;
- s/ \!/\!/g;
- s/ ;/;/g;
- s/, /, /g; s/ +/ /g;
-
- print STDERR $_ if //;
-
- print $_;
-}
diff --git a/num-tok b/num-tok
new file mode 100755
index 0000000..56cbae9
--- /dev/null
+++ b/num-tok
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+while line = STDIN.gets
+ puts line.strip.split.length
+end
+
diff --git a/num_tok b/num_tok
deleted file mode 100755
index 56cbae9..0000000
--- a/num_tok
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env ruby
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-while line = STDIN.gets
- puts line.strip.split.length
-end
-
diff --git a/paste-pairs b/paste-pairs
new file mode 100755
index 0000000..f6b8b31
--- /dev/null
+++ b/paste-pairs
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import sys
+from itertools import izip
+
+for linenr, (src_line, tgt_line) in enumerate(izip(open(sys.argv[1]), open(sys.argv[2]))):
+ print linenr, (src_line.strip())
+ print linenr, (tgt_line.strip())
+ print
+
diff --git a/paste_pairs b/paste_pairs
deleted file mode 100755
index f6b8b31..0000000
--- a/paste_pairs
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from itertools import izip
-
-for linenr, (src_line, tgt_line) in enumerate(izip(open(sys.argv[1]), open(sys.argv[2]))):
- print linenr, (src_line.strip())
- print linenr, (tgt_line.strip())
- print
-
diff --git a/per-sentence-bleu b/per-sentence-bleu
new file mode 100755
index 0000000..402f364
--- /dev/null
+++ b/per-sentence-bleu
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+def main
+ conf = Trollop::options do
+ opt :input, "input", :type => :string, :default => '-'
+ opt :references, "references", :type => :string, :required => true
+ opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0
+ opt :n, "N", :default => 4
+ end
+
+ refs = ReadFile.readlines_strip conf[:references]
+ i = -1
+ input = ReadFile.new conf[:input]
+ while line = input.gets
+ i += 1
+ if line.strip == ''
+ puts 0.0
+ next
+ end
+ puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack]
+ end
+ input.close
+end
+
+main
+
diff --git a/per-sentence-bleu-kbest b/per-sentence-bleu-kbest
new file mode 100755
index 0000000..f8bd860
--- /dev/null
+++ b/per-sentence-bleu-kbest
@@ -0,0 +1,32 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+def main
+ conf = Trollop::options do
+ opt :kbests, "kbests", :type => :string, :default => '-'
+ opt :references, "references", :type => :string, :required => true
+ end
+ refs = ReadFile.new conf[:references]
+ kbest_lists = read_kbest_lists conf[:kbests]
+ i = 0
+ kbest_lists.each { |list|
+ scores = []
+ o = false
+ list.each { |e| scores << per_sentence_bleu(e, refs[i]) }
+ max = scores.max
+ scores.each_with_index { |x,j|
+ puts "#{j+1} ||| #{scores[j]} ||| #{list[j]}"
+ if scores[j]==max && !o
+ puts "^^^ #{j+1} #{max}"
+ o = true
+ end
+ }
+ puts
+ i += 1
+ }
+end
+
+main
+
diff --git a/per-sentence-ter b/per-sentence-ter
new file mode 100755
index 0000000..fa283ef
--- /dev/null
+++ b/per-sentence-ter
@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+require 'tempfile'
+
+def main
+ conf = Trollop::options do
+ opt :input, "input", :type => :string, :default => '-'
+ opt :references, "references", :type => :string, :required => true
+ opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score'
+ end
+
+ refs = ReadFile.readlines_strip conf[:references]
+ input = ReadFile.new conf[:input]
+ i = -1
+ while line = input.gets
+ line.strip!
+ i += 1
+ a = Tempfile.new 'pster'
+ b = Tempfile.new 'pster'
+ a.write line+"\n"
+ b.write refs[i]+"\n"
+ a.close; b.close
+ score = `/toolbox/cdec-dtrain/mteval/fast_score -i #{a.path} -r #{b.path} -m ter 2>/dev/null`
+ puts score
+ a.unlink; b.unlink
+ end
+ input.close
+end
+
+main
+
diff --git a/per_sentence_bleu b/per_sentence_bleu
deleted file mode 100755
index 402f364..0000000
--- a/per_sentence_bleu
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def main
- conf = Trollop::options do
- opt :input, "input", :type => :string, :default => '-'
- opt :references, "references", :type => :string, :required => true
- opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0
- opt :n, "N", :default => 4
- end
-
- refs = ReadFile.readlines_strip conf[:references]
- i = -1
- input = ReadFile.new conf[:input]
- while line = input.gets
- i += 1
- if line.strip == ''
- puts 0.0
- next
- end
- puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack]
- end
- input.close
-end
-
-main
-
diff --git a/per_sentence_bleu_kbest b/per_sentence_bleu_kbest
deleted file mode 100755
index f8bd860..0000000
--- a/per_sentence_bleu_kbest
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def main
- conf = Trollop::options do
- opt :kbests, "kbests", :type => :string, :default => '-'
- opt :references, "references", :type => :string, :required => true
- end
- refs = ReadFile.new conf[:references]
- kbest_lists = read_kbest_lists conf[:kbests]
- i = 0
- kbest_lists.each { |list|
- scores = []
- o = false
- list.each { |e| scores << per_sentence_bleu(e, refs[i]) }
- max = scores.max
- scores.each_with_index { |x,j|
- puts "#{j+1} ||| #{scores[j]} ||| #{list[j]}"
- if scores[j]==max && !o
- puts "^^^ #{j+1} #{max}"
- o = true
- end
- }
- puts
- i += 1
- }
-end
-
-main
-
diff --git a/per_sentence_ter b/per_sentence_ter
deleted file mode 100755
index fa283ef..0000000
--- a/per_sentence_ter
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-require 'tempfile'
-
-def main
- conf = Trollop::options do
- opt :input, "input", :type => :string, :default => '-'
- opt :references, "references", :type => :string, :required => true
- opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score'
- end
-
- refs = ReadFile.readlines_strip conf[:references]
- input = ReadFile.new conf[:input]
- i = -1
- while line = input.gets
- line.strip!
- i += 1
- a = Tempfile.new 'pster'
- b = Tempfile.new 'pster'
- a.write line+"\n"
- b.write refs[i]+"\n"
- a.close; b.close
- score = `/toolbox/cdec-dtrain/mteval/fast_score -i #{a.path} -r #{b.path} -m ter 2>/dev/null`
- puts score
- a.unlink; b.unlink
- end
- input.close
-end
-
-main
-
diff --git a/preprocess b/preprocess
index 6531bf1..a46b0a8 100755
--- a/preprocess
+++ b/preprocess
@@ -5,5 +5,5 @@ P=`pwd -P`
popd > /dev/null
LANG=$1
-$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | $P/lowercase.perl 2>lowercase.$LANG.err
+$P/no-non-printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize-punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | $P/lowercase.perl 2>lowercase.$LANG.err
diff --git a/preprocess-no-lower b/preprocess-no-lower
new file mode 100755
index 0000000..afd87e9
--- /dev/null
+++ b/preprocess-no-lower
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+pushd `dirname $0` > /dev/null
+P=`pwd -P`
+popd > /dev/null
+
+LANG=$1
+$P/no-non-printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize-punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err
+
diff --git a/preprocess_no_lower b/preprocess_no_lower
deleted file mode 100755
index 3a4d358..0000000
--- a/preprocess_no_lower
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-pushd `dirname $0` > /dev/null
-P=`pwd -P`
-popd > /dev/null
-
-LANG=$1
-$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err
-
diff --git a/pt-bloom b/pt-bloom
new file mode 100755
index 0000000..5c2cf01
--- /dev/null
+++ b/pt-bloom
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+
+require 'bloom-filter'
+require 'trollop'
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+conf = Trollop::options do
+ opt :size, "number of entries in the filter", :type => :int, :required => true
+ opt :error_rate, "error rate", :type => :float, :default => 0.01
+end
+
+f = BloomFilter.new conf[:size], conf[:error_rate]
+while line = STDIN.gets
+ src, tgt = splitpipe(line)[0..1]
+ src.strip!
+ tgt.strip!
+ f.insert(src+" ||| "+tgt)
+end
+
+f.dump('pt.bloom')
+f.close
+
diff --git a/pt_bloom b/pt_bloom
deleted file mode 100755
index 5c2cf01..0000000
--- a/pt_bloom
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'bloom-filter'
-require 'trollop'
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-conf = Trollop::options do
- opt :size, "number of entries in the filter", :type => :int, :required => true
- opt :error_rate, "error rate", :type => :float, :default => 0.01
-end
-
-f = BloomFilter.new conf[:size], conf[:error_rate]
-while line = STDIN.gets
- src, tgt = splitpipe(line)[0..1]
- src.strip!
- tgt.strip!
- f.insert(src+" ||| "+tgt)
-end
-
-f.dump('pt.bloom')
-f.close
-
diff --git a/push-rules b/push-rules
new file mode 100755
index 0000000..c97ab80
--- /dev/null
+++ b/push-rules
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+a = ReadFile.readlines_strip ARGV[0]
+h = {}
+a.each { |i|
+ h[i] = true
+}
+
+f = ARGV[1].to_f
+while line = STDIN.gets
+ line.strip!
+ s,weight = line.split
+ weight = weight.to_f
+ a,_,target = s.rpartition ":"
+ _,_,source = a.split(":",3)
+ if (h[source])
+ puts "#{s}\t#{weight*f}"
+ else
+ puts line
+ end
+end
+
diff --git a/push_rules b/push_rules
deleted file mode 100755
index c97ab80..0000000
--- a/push_rules
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-a = ReadFile.readlines_strip ARGV[0]
-h = {}
-a.each { |i|
- h[i] = true
-}
-
-f = ARGV[1].to_f
-while line = STDIN.gets
- line.strip!
- s,weight = line.split
- weight = weight.to_f
- a,_,target = s.rpartition ":"
- _,_,source = a.split(":",3)
- if (h[source])
- puts "#{s}\t#{weight*f}"
- else
- puts line
- end
-end
-
diff --git a/ruby-eval b/ruby-eval
new file mode 100755
index 0000000..fe0d181
--- /dev/null
+++ b/ruby-eval
@@ -0,0 +1,6 @@
+#!/usr/bin/env ruby
+
+while line = STDIN.gets
+ puts "#{eval line}"
+end
+
diff --git a/ruby_eval b/ruby_eval
deleted file mode 100755
index fe0d181..0000000
--- a/ruby_eval
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env ruby
-
-while line = STDIN.gets
- puts "#{eval line}"
-end
-
diff --git a/rule-shapes b/rule-shapes
new file mode 100755
index 0000000..589a670
--- /dev/null
+++ b/rule-shapes
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+def shape s
+ res = []
+ in_t = false
+ s.split.each { |i|
+ if i.match(/\A\[X,\d\]\z/)
+ if in_t
+ in_t = false
+ end
+ res << "NT"
+ next
+ else
+ res << "T" if not in_t
+ in_t = true
+ end
+ }
+ return res
+end
+
+while line = STDIN.gets
+ f, e = line.split(/\t/)
+ f.strip!; e.strip!
+ puts shape(f).join('_')+"-"+shape(e).join('_')
+end
+
diff --git a/rule_shapes b/rule_shapes
deleted file mode 100755
index 589a670..0000000
--- a/rule_shapes
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env ruby
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-def shape s
- res = []
- in_t = false
- s.split.each { |i|
- if i.match(/\A\[X,\d\]\z/)
- if in_t
- in_t = false
- end
- res << "NT"
- next
- else
- res << "T" if not in_t
- in_t = true
- end
- }
- return res
-end
-
-while line = STDIN.gets
- f, e = line.split(/\t/)
- f.strip!; e.strip!
- puts shape(f).join('_')+"-"+shape(e).join('_')
-end
-
diff --git a/select-from b/select-from
new file mode 100755
index 0000000..7ab40e7
--- /dev/null
+++ b/select-from
@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+require 'zipf'
+
+opts = Trollop::options do
+ banner "select_from [--invert] -i < "
+ opt :index, "Line numbers to output.", :required => true
+ opt :invert, "Invert selection.", :type => :bool, :short => '-j', :default => false
+end
+
+accept = {}
+
+f = ReadFile.new ARGV[0]
+f.readlines_strip.each { |line|
+ accept[line.strip.to_i] = true
+}
+
+i = 0
+while line = STDIN.gets
+ if accept[i] && !opts[:invert]
+ STDOUT.write line
+ elsif !accept[i] && opts[:invert]
+ STDOUT.write line
+ end
+ i += 1
+end
+
diff --git a/select_from b/select_from
deleted file mode 100755
index 7ab40e7..0000000
--- a/select_from
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'trollop'
-require 'zipf'
-
-opts = Trollop::options do
- banner "select_from [--invert] -i < "
- opt :index, "Line numbers to output.", :required => true
- opt :invert, "Invert selection.", :type => :bool, :short => '-j', :default => false
-end
-
-accept = {}
-
-f = ReadFile.new ARGV[0]
-f.readlines_strip.each { |line|
- accept[line.strip.to_i] = true
-}
-
-i = 0
-while line = STDIN.gets
- if accept[i] && !opts[:invert]
- STDOUT.write line
- elsif !accept[i] && opts[:invert]
- STDOUT.write line
- end
- i += 1
-end
-
diff --git a/sort-features b/sort-features
new file mode 100755
index 0000000..88bd779
--- /dev/null
+++ b/sort-features
@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+
+h = {}
+while line = STDIN.gets
+ name, value = line.strip.split
+ h[name] = value.to_f
+end
+
+h.sort_by { |name, value| -value }.each { |name, value| puts "#{name}\t#{value}" }
+
diff --git a/sort_features b/sort_features
deleted file mode 100755
index 88bd779..0000000
--- a/sort_features
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env ruby
-
-h = {}
-while line = STDIN.gets
- name, value = line.strip.split
- h[name] = value.to_f
-end
-
-h.sort_by { |name, value| -value }.each { |name, value| puts "#{name}\t#{value}" }
-
diff --git a/source-sides b/source-sides
new file mode 100755
index 0000000..b4490c6
--- /dev/null
+++ b/source-sides
@@ -0,0 +1,4 @@
+#!/bin/zsh -x
+
+split_pipes -f 2 | sort | uniq | sed "s| |_|g" | sed "s|\[X,[12]\]|NX|g"
+
diff --git a/source_sides b/source_sides
deleted file mode 100755
index b4490c6..0000000
--- a/source_sides
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh -x
-
-split_pipes -f 2 | sort | uniq | sed "s| |_|g" | sed "s|\[X,[12]\]|NX|g"
-
diff --git a/split-kbest b/split-kbest
new file mode 100755
index 0000000..ab425b0
--- /dev/null
+++ b/split-kbest
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+def write_kbest l, fn
+ f = WriteFile.new fn
+ f.write l.join("")
+ f.close
+end
+
+dir = ARGV[0]
+i = 0
+l = []
+while line = STDIN.gets
+ j = line.split.first.to_i
+ if j == 0 && l.size > 0
+ write_kbest l, "#{dir}/#{i}.gz"
+ l = []
+ i += 1
+ end
+ l << line
+end
+write_kbest l, "#{dir}/#{i}.gz" # last one
+
diff --git a/split-lines b/split-lines
new file mode 100755
index 0000000..14b3a0f
--- /dev/null
+++ b/split-lines
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+dir = ARGV[0]
+i = 0
+while line = STDIN.gets
+ src, tgt = line.split " ||| "
+ f = WriteFile.new "#{dir}/#{i}.src"
+ f.write line
+ f.close
+ i += 1
+end
+
diff --git a/split-pipes b/split-pipes
new file mode 100755
index 0000000..ce8f018
--- /dev/null
+++ b/split-pipes
@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+conf = Trollop::options do
+ banner "splitpipes -f < "
+ opt :field, "field", :type => :int, :required => true
+ opt :to, "to", :type => :int, :default => nil
+end
+
+
+a = []
+range = false
+if conf[:to]
+ range = true
+end
+
+if range
+ if conf[:field] >= conf[:to]
+ STDERR.write "field >= to, exiting\n"
+ exit
+ end
+end
+
+if conf[:field]<=0 || (range && conf[:to]<=0)
+ STDERR.write "field or to <= 0, exiting"
+ exit
+end
+
+while line = STDIN.gets
+ j = 1
+ line.strip.split(' ||| ').each { |i|
+ if range && (conf[:field]..conf[:to]).include?(j)
+ a << i.strip
+ elsif j == conf[:field]
+ puts i.strip
+ break
+ end
+ j += 1
+ }
+ if range
+ puts "#{a.join " ||| "}\n"
+ end
+ a.clear
+end
+
+
+
diff --git a/split_kbest b/split_kbest
deleted file mode 100755
index ab425b0..0000000
--- a/split_kbest
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-def write_kbest l, fn
- f = WriteFile.new fn
- f.write l.join("")
- f.close
-end
-
-dir = ARGV[0]
-i = 0
-l = []
-while line = STDIN.gets
- j = line.split.first.to_i
- if j == 0 && l.size > 0
- write_kbest l, "#{dir}/#{i}.gz"
- l = []
- i += 1
- end
- l << line
-end
-write_kbest l, "#{dir}/#{i}.gz" # last one
-
diff --git a/split_lines b/split_lines
deleted file mode 100755
index 14b3a0f..0000000
--- a/split_lines
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-dir = ARGV[0]
-i = 0
-while line = STDIN.gets
- src, tgt = line.split " ||| "
- f = WriteFile.new "#{dir}/#{i}.src"
- f.write line
- f.close
- i += 1
-end
-
diff --git a/split_pipes b/split_pipes
deleted file mode 100755
index ce8f018..0000000
--- a/split_pipes
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'trollop'
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-conf = Trollop::options do
- banner "splitpipes -f < "
- opt :field, "field", :type => :int, :required => true
- opt :to, "to", :type => :int, :default => nil
-end
-
-
-a = []
-range = false
-if conf[:to]
- range = true
-end
-
-if range
- if conf[:field] >= conf[:to]
- STDERR.write "field >= to, exiting\n"
- exit
- end
-end
-
-if conf[:field]<=0 || (range && conf[:to]<=0)
- STDERR.write "field or to <= 0, exiting"
- exit
-end
-
-while line = STDIN.gets
- j = 1
- line.strip.split(' ||| ').each { |i|
- if range && (conf[:field]..conf[:to]).include?(j)
- a << i.strip
- elsif j == conf[:field]
- puts i.strip
- break
- end
- j += 1
- }
- if range
- puts "#{a.join " ||| "}\n"
- end
- a.clear
-end
-
-
-
diff --git a/stanford-parser-run b/stanford-parser-run
new file mode 100755
index 0000000..f8d4210
--- /dev/null
+++ b/stanford-parser-run
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if [ $# != 1 ]; then
+ echo "$0 text-file"
+ exit 1
+fi
+
+export CLASSPATH=:/toolbox/stanfordparser_3_2_0/*
+
+IN=$1
+
+cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp
+
diff --git a/stanford_parser_run b/stanford_parser_run
deleted file mode 100755
index f8d4210..0000000
--- a/stanford_parser_run
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-if [ $# != 1 ]; then
- echo "$0 text-file"
- exit 1
-fi
-
-export CLASSPATH=:/toolbox/stanfordparser_3_2_0/*
-
-IN=$1
-
-cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp
-
diff --git a/test/cdec-hg-to-json/cdec.ini b/test/cdec-hg-to-json/cdec.ini
new file mode 100644
index 0000000..1ad25b5
--- /dev/null
+++ b/test/cdec-hg-to-json/cdec.ini
@@ -0,0 +1,5 @@
+formalism=scfg
+grammar=test/hg2json/grammar.gz
+add_pass_through_rules=true
+feature_function=WordPenalty
+intersection_strategy=full
diff --git a/test/cdec-hg-to-json/grammar.gz b/test/cdec-hg-to-json/grammar.gz
new file mode 100644
index 0000000..78dda98
Binary files /dev/null and b/test/cdec-hg-to-json/grammar.gz differ
diff --git a/test/cdec-hg-to-json/hg.json.gz b/test/cdec-hg-to-json/hg.json.gz
new file mode 100644
index 0000000..ed178c6
Binary files /dev/null and b/test/cdec-hg-to-json/hg.json.gz differ
diff --git a/test/cdec-hg-to-json/hg.meta b/test/cdec-hg-to-json/hg.meta
new file mode 100644
index 0000000..d33a54c
--- /dev/null
+++ b/test/cdec-hg-to-json/hg.meta
@@ -0,0 +1,7 @@
+input:
+ 'in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .'
+viterbi translation:
+ 'which labor market desperate transformed into attempting gathered by failed to show any the non - is making festzuhalten gathered by pervez musharraf meant to its borders with within than the non - have pakistan 's intelligence relied constitutional for security as a its borders with declared a state of emergency - range missiles .'
+# nodes = 220
+# edges = 16640
+viterbi score = 228.95
diff --git a/test/cdec-hg-to-json/in b/test/cdec-hg-to-json/in
new file mode 100644
index 0000000..7dc411d
--- /dev/null
+++ b/test/cdec-hg-to-json/in
@@ -0,0 +1 @@
+in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .
diff --git a/test/cdec-hg-to-json/toy.cdec.ini b/test/cdec-hg-to-json/toy.cdec.ini
new file mode 100644
index 0000000..d4a2896
--- /dev/null
+++ b/test/cdec-hg-to-json/toy.cdec.ini
@@ -0,0 +1,2 @@
+formalism=scfg
+grammar=test/hg2json/toy.grammar
diff --git a/test/cdec-hg-to-json/toy.grammar b/test/cdec-hg-to-json/toy.grammar
new file mode 100644
index 0000000..382c94f
--- /dev/null
+++ b/test/cdec-hg-to-json/toy.grammar
@@ -0,0 +1,12 @@
+[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
+[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0
+[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0
+[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1
+[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1
+[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0
+[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0
+[JJ] ||| grosses ||| big ||| logp=0
+[JJ] ||| grosses ||| large ||| logp=0
+[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0
+[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0
+[V] ||| fand ||| found ||| logp=0
diff --git a/test/cdec-hg-to-json/toy.in b/test/cdec-hg-to-json/toy.in
new file mode 100644
index 0000000..e6df927
--- /dev/null
+++ b/test/cdec-hg-to-json/toy.in
@@ -0,0 +1 @@
+ich sah ein kleines haus
diff --git a/test/cdec-hg-to-json/toy.weights b/test/cdec-hg-to-json/toy.weights
new file mode 100644
index 0000000..70075b7
--- /dev/null
+++ b/test/cdec-hg-to-json/toy.weights
@@ -0,0 +1,3 @@
+logp 2
+use_house 0
+use_shell 1
diff --git a/test/cdec-hg-to-json/weights b/test/cdec-hg-to-json/weights
new file mode 100644
index 0000000..7f96f1d
--- /dev/null
+++ b/test/cdec-hg-to-json/weights
@@ -0,0 +1,17 @@
+PhraseModel_0 1.0
+PhraseModel_1 1.0
+PhraseModel_2 1.0
+PhraseModel_3 1.0
+PhraseModel_4 1.0
+PhraseModel_5 1.0
+PhraseModel_6 1.0
+PassThrough -1.0
+PassThrough_1 -1.0
+PassThrough_2 -1.0
+PassThrough_3 -1.0
+PassThrough_4 -1.0
+PassThrough_5 -1.0
+PassThrough_6 -1.0
+Glue 0.1
+LanguageModel 10.0
+LanguageModel_OOV -10
diff --git a/test/cdec_hg_to_json/cdec.ini b/test/cdec_hg_to_json/cdec.ini
deleted file mode 100644
index 1ad25b5..0000000
--- a/test/cdec_hg_to_json/cdec.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-formalism=scfg
-grammar=test/hg2json/grammar.gz
-add_pass_through_rules=true
-feature_function=WordPenalty
-intersection_strategy=full
diff --git a/test/cdec_hg_to_json/grammar.gz b/test/cdec_hg_to_json/grammar.gz
deleted file mode 100644
index 78dda98..0000000
Binary files a/test/cdec_hg_to_json/grammar.gz and /dev/null differ
diff --git a/test/cdec_hg_to_json/hg.json.gz b/test/cdec_hg_to_json/hg.json.gz
deleted file mode 100644
index ed178c6..0000000
Binary files a/test/cdec_hg_to_json/hg.json.gz and /dev/null differ
diff --git a/test/cdec_hg_to_json/hg.meta b/test/cdec_hg_to_json/hg.meta
deleted file mode 100644
index d33a54c..0000000
--- a/test/cdec_hg_to_json/hg.meta
+++ /dev/null
@@ -1,7 +0,0 @@
-input:
- 'in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .'
-viterbi translation:
- 'which labor market desperate transformed into attempting gathered by failed to show any the non - is making festzuhalten gathered by pervez musharraf meant to its borders with within than the non - have pakistan 's intelligence relied constitutional for security as a its borders with declared a state of emergency - range missiles .'
-# nodes = 220
-# edges = 16640
-viterbi score = 228.95
diff --git a/test/cdec_hg_to_json/in b/test/cdec_hg_to_json/in
deleted file mode 100644
index 7dc411d..0000000
--- a/test/cdec_hg_to_json/in
+++ /dev/null
@@ -1 +0,0 @@
-in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .
diff --git a/test/cdec_hg_to_json/toy.cdec.ini b/test/cdec_hg_to_json/toy.cdec.ini
deleted file mode 100644
index d4a2896..0000000
--- a/test/cdec_hg_to_json/toy.cdec.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-formalism=scfg
-grammar=test/hg2json/toy.grammar
diff --git a/test/cdec_hg_to_json/toy.grammar b/test/cdec_hg_to_json/toy.grammar
deleted file mode 100644
index 382c94f..0000000
--- a/test/cdec_hg_to_json/toy.grammar
+++ /dev/null
@@ -1,12 +0,0 @@
-[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
-[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0
-[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0
-[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1
-[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1
-[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0
-[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0
-[JJ] ||| grosses ||| big ||| logp=0
-[JJ] ||| grosses ||| large ||| logp=0
-[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0
-[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0
-[V] ||| fand ||| found ||| logp=0
diff --git a/test/cdec_hg_to_json/toy.in b/test/cdec_hg_to_json/toy.in
deleted file mode 100644
index e6df927..0000000
--- a/test/cdec_hg_to_json/toy.in
+++ /dev/null
@@ -1 +0,0 @@
-ich sah ein kleines haus
diff --git a/test/cdec_hg_to_json/toy.weights b/test/cdec_hg_to_json/toy.weights
deleted file mode 100644
index 70075b7..0000000
--- a/test/cdec_hg_to_json/toy.weights
+++ /dev/null
@@ -1,3 +0,0 @@
-logp 2
-use_house 0
-use_shell 1
diff --git a/test/cdec_hg_to_json/weights b/test/cdec_hg_to_json/weights
deleted file mode 100644
index 7f96f1d..0000000
--- a/test/cdec_hg_to_json/weights
+++ /dev/null
@@ -1,17 +0,0 @@
-PhraseModel_0 1.0
-PhraseModel_1 1.0
-PhraseModel_2 1.0
-PhraseModel_3 1.0
-PhraseModel_4 1.0
-PhraseModel_5 1.0
-PhraseModel_6 1.0
-PassThrough -1.0
-PassThrough_1 -1.0
-PassThrough_2 -1.0
-PassThrough_3 -1.0
-PassThrough_4 -1.0
-PassThrough_5 -1.0
-PassThrough_6 -1.0
-Glue 0.1
-LanguageModel 10.0
-LanguageModel_OOV -10
diff --git a/test/kbest-bleu-oracles/debug.kbests b/test/kbest-bleu-oracles/debug.kbests
new file mode 100644
index 0000000..1e9c894
--- /dev/null
+++ b/test/kbest-bleu-oracles/debug.kbests
@@ -0,0 +1,4 @@
+0 ||| a b c d ||| x=1 ||| 10
+0 ||| a b d c ||| x=1 ||| 9
+0 ||| a d b c ||| x=1 ||| 8
+0 ||| d a b c ||| x=1 ||| 7
diff --git a/test/kbest-bleu-oracles/debug.refs b/test/kbest-bleu-oracles/debug.refs
new file mode 100644
index 0000000..8e13e46
--- /dev/null
+++ b/test/kbest-bleu-oracles/debug.refs
@@ -0,0 +1 @@
+a b c d
diff --git a/test/kbest-bleu-oracles/example.kbests b/test/kbest-bleu-oracles/example.kbests
new file mode 100644
index 0000000..1126f1f
--- /dev/null
+++ b/test/kbest-bleu-oracles/example.kbests
@@ -0,0 +1,100 @@
+0 ||| europe races house divided ||| WordPenalty=-1.73718 LanguageModel=-18.15 PhraseModel_0=2.2467 PhraseModel_1=4.27323 PhraseModel_2=2.20952 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=1 PhraseModel_6=1 ||| -61.4791
+0 ||| europe races divided house ||| WordPenalty=-1.73718 Glue=1 LanguageModel=-18.7337 PhraseModel_0=2.75576 PhraseModel_1=8.10398 PhraseModel_2=5.5382 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=0 PhraseModel_6=0 ||| -61.5856
+0 ||| europe after racial house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.3699 PhraseModel_0=1.68395 PhraseModel_1=4.27323 PhraseModel_2=2.67025 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=1 PhraseModel_6=1 ||| -63.2049
+0 ||| europe after race divided house ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.1973 PhraseModel_0=2.47176 PhraseModel_1=8.10398 PhraseModel_2=5.73009 PhraseModel_3=5.07197 PhraseModel_4=2.11131 PhraseModel_5=0 PhraseModel_6=0 ||| -63.4497
+0 ||| europe after races house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.0216 PhraseModel_0=1.84876 PhraseModel_1=4.27323 PhraseModel_2=2.51055 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=1 PhraseModel_6=1 ||| -63.7649
+0 ||| europe after races divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.6053 PhraseModel_0=2.35782 PhraseModel_1=8.10398 PhraseModel_2=5.83923 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=0 PhraseModel_6=0 ||| -63.8715
+0 ||| europe after racial divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.2498 PhraseModel_0=2.19301 PhraseModel_1=8.10398 PhraseModel_2=5.99893 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -63.9867
+0 ||| europe following racial house divided ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.941 PhraseModel_0=1.60477 PhraseModel_1=4.27323 PhraseModel_2=2.73719 PhraseModel_3=4.67218 PhraseModel_4=2.38101 PhraseModel_5=1 PhraseModel_6=1 ||| -64.7057
+0 ||| divided europe after racial house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.6711 PhraseModel_0=3.23398 PhraseModel_1=8.10398 PhraseModel_2=5.11818 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -65.0513
+0 ||| europe race divided house ||| WordPenalty=-1.73718 LanguageModel=-19.0747 PhraseModel_0=2.95643 PhraseModel_1=8.10398 PhraseModel_2=5.34994 PhraseModel_3=7.27048 PhraseModel_4=1.26795 PhraseModel_5=0 PhraseModel_6=0 ||| -65.348
+1 ||| a common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=4 LanguageModel=-36.6093 PhraseModel_0=6.68111 PhraseModel_1=22.5747 PhraseModel_2=16.1531 PhraseModel_3=22.3782 PhraseModel_4=9.65239 PhraseModel_5=1 PhraseModel_6=1 ||| -136.567
+1 ||| a common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-39.0118 PhraseModel_0=5.94865 PhraseModel_1=18.9704 PhraseModel_2=13.3916 PhraseModel_3=22.3782 PhraseModel_4=10.0435 PhraseModel_5=1 PhraseModel_6=1 ||| -137.254
+1 ||| a common feature of europe 's extreme right is its racism and the fact that you use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=5 LanguageModel=-38.0283 PhraseModel_0=6.71292 PhraseModel_1=22.8797 PhraseModel_2=16.4585 PhraseModel_3=22.3803 PhraseModel_4=9.62847 PhraseModel_5=1 PhraseModel_6=1 ||| -140.071
+1 ||| a common feature of europe 's extreme right is its racism , and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-39.7992 PhraseModel_0=7.42421 PhraseModel_1=21.4489 PhraseModel_2=14.4345 PhraseModel_3=22.3782 PhraseModel_4=11.152 PhraseModel_5=1 PhraseModel_6=1 ||| -142.605
+1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use immigration as a political lever . ||| WordPenalty=-10.4231 Glue=4 LanguageModel=-38.8156 PhraseModel_0=7.75494 PhraseModel_1=22.8797 PhraseModel_2=15.4378 PhraseModel_3=22.3803 PhraseModel_4=10.7369 PhraseModel_5=1 PhraseModel_6=1 ||| -142.999
+1 ||| a common feature of europe 's extreme right is its racism and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-40.3141 PhraseModel_0=6.39864 PhraseModel_1=23.0373 PhraseModel_2=16.9021 PhraseModel_3=22.3976 PhraseModel_4=10.0196 PhraseModel_5=1 PhraseModel_6=1 ||| -143.611
+1 ||| one common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-37.7197 PhraseModel_0=8.27536 PhraseModel_1=21.7089 PhraseModel_2=13.9878 PhraseModel_3=22.5681 PhraseModel_4=10.7747 PhraseModel_5=1 PhraseModel_6=2 ||| -144.987
+1 ||| one common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=1 LanguageModel=-40.1222 PhraseModel_0=7.73842 PhraseModel_1=18.7826 PhraseModel_2=11.6924 PhraseModel_3=22.5681 PhraseModel_4=11.1658 PhraseModel_5=1 PhraseModel_6=2 ||| -146.502
+1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-41.1014 PhraseModel_0=7.44067 PhraseModel_1=23.0373 PhraseModel_2=15.8814 PhraseModel_3=22.3976 PhraseModel_4=11.1281 PhraseModel_5=1 PhraseModel_6=1 ||| -146.539
+1 ||| a shared feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-40.0778 PhraseModel_0=7.91847 PhraseModel_1=22.5747 PhraseModel_2=14.9464 PhraseModel_3=22.3052 PhraseModel_4=10.5431 PhraseModel_5=1 PhraseModel_6=1 ||| -146.956
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-100.983 PassThrough=3 PhraseModel_0=15.0383 PhraseModel_1=33.3621 PhraseModel_2=19.8383 PhraseModel_3=32.881 PhraseModel_4=23.2559 PhraseModel_5=0 PhraseModel_6=1 ||| -300.653
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-100.556 PassThrough=3 PhraseModel_0=16.5071 PhraseModel_1=32.7586 PhraseModel_2=17.7282 PhraseModel_3=33.296 PhraseModel_4=25.589 PhraseModel_5=0 PhraseModel_6=1 ||| -302.029
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-99.7968 PassThrough=3 PhraseModel_0=15.033 PhraseModel_1=35.4231 PhraseModel_2=21.6674 PhraseModel_3=33.4947 PhraseModel_4=24.5697 PhraseModel_5=1 PhraseModel_6=1 ||| -302.155
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : the rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-100.68 PassThrough=4 PhraseModel_0=16.6403 PhraseModel_1=35.0625 PhraseModel_2=19.9175 PhraseModel_3=32.7793 PhraseModel_4=24.2261 PhraseModel_5=0 PhraseModel_6=0 ||| -302.466
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-101.623 PassThrough=3 PhraseModel_0=14.5035 PhraseModel_1=33.3891 PhraseModel_2=20.3355 PhraseModel_3=33.2525 PhraseModel_4=22.8878 PhraseModel_5=0 PhraseModel_6=1 ||| -302.743
+2 ||| the lega nord in italy , the vlaams block , the followers of le pen 's national front in france , the netherlands are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-97.1998 PassThrough=2 PhraseModel_0=17.8853 PhraseModel_1=39.5922 PhraseModel_2=23.2216 PhraseModel_3=33.6892 PhraseModel_4=23.0537 PhraseModel_5=0 PhraseModel_6=1 ||| -302.874
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of the immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-102.542 PassThrough=3 PhraseModel_0=15.8186 PhraseModel_1=35.0751 PhraseModel_2=20.6755 PhraseModel_3=32.881 PhraseModel_4=23.6471 PhraseModel_5=0 PhraseModel_6=0 ||| -303.305
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy to regulate them . ||| WordPenalty=-22.5833 Glue=11 LanguageModel=-102.736 PassThrough=3 PhraseModel_0=13.1928 PhraseModel_1=35.2776 PhraseModel_2=23.3398 PhraseModel_3=33.1527 PhraseModel_4=21.9207 PhraseModel_5=0 PhraseModel_6=1 ||| -303.344
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy in order to regulate it . ||| WordPenalty=-23.4519 Glue=10 LanguageModel=-104.547 PassThrough=3 PhraseModel_0=13.2351 PhraseModel_1=35.2776 PhraseModel_2=23.3336 PhraseModel_3=32.2759 PhraseModel_4=23.8622 PhraseModel_5=0 PhraseModel_6=1 ||| -303.438
+2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-99.3692 PassThrough=3 PhraseModel_0=16.5018 PhraseModel_1=34.8196 PhraseModel_2=19.5572 PhraseModel_3=33.9097 PhraseModel_4=26.9028 PhraseModel_5=1 PhraseModel_6=1 ||| -303.531
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-83.9883 PhraseModel_0=10.8504 PhraseModel_1=36.0092 PhraseModel_2=25.6962 PhraseModel_3=18.8196 PhraseModel_4=12.4793 PhraseModel_5=0 PhraseModel_6=0 ||| -236.305
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-83.8116 PhraseModel_0=10.6743 PhraseModel_1=36.0092 PhraseModel_2=25.8212 PhraseModel_3=18.8196 PhraseModel_4=12.1849 PhraseModel_5=0 PhraseModel_6=0 ||| -236.56
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-82.9542 PhraseModel_0=11.3166 PhraseModel_1=35.9808 PhraseModel_2=25.3584 PhraseModel_3=19.1145 PhraseModel_4=12.9314 PhraseModel_5=0 PhraseModel_6=0 ||| -236.57
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-82.211 PhraseModel_0=11.9448 PhraseModel_1=39.5089 PhraseModel_2=28.0752 PhraseModel_3=18.9139 PhraseModel_4=13.4713 PhraseModel_5=0 PhraseModel_6=0 ||| -236.761
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-82.7775 PhraseModel_0=11.1405 PhraseModel_1=35.9808 PhraseModel_2=25.4834 PhraseModel_3=19.1145 PhraseModel_4=12.6371 PhraseModel_5=0 PhraseModel_6=0 ||| -236.825
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-81.1769 PhraseModel_0=12.411 PhraseModel_1=39.4805 PhraseModel_2=27.7374 PhraseModel_3=19.2089 PhraseModel_4=13.9234 PhraseModel_5=0 PhraseModel_6=0 ||| -237.026
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen does not , unfortunately , and ( soon ) go , the race will come from the european policy to disappear anytime soon . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-74.4647 PhraseModel_0=8.03162 PhraseModel_1=32.0087 PhraseModel_2=24.9113 PhraseModel_3=27.2046 PhraseModel_4=19.1921 PhraseModel_5=2 PhraseModel_6=4 ||| -237.241
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-80.1276 PhraseModel_0=12.2413 PhraseModel_1=38.9132 PhraseModel_2=27.227 PhraseModel_3=20.0911 PhraseModel_4=10.4871 PhraseModel_5=0 PhraseModel_6=0 ||| -237.267
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.3317 Glue=4 LanguageModel=-79.0935 PhraseModel_0=12.7075 PhraseModel_1=38.8848 PhraseModel_2=26.8892 PhraseModel_3=20.3861 PhraseModel_4=10.9392 PhraseModel_5=0 PhraseModel_6=0 ||| -237.532
+3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-79.8077 PhraseModel_0=11.0526 PhraseModel_1=33.8577 PhraseModel_2=23.7301 PhraseModel_3=20.8921 PhraseModel_4=10.9702 PhraseModel_5=0 PhraseModel_6=2 ||| -237.654
+4 ||| an aging population and ever more open borders the racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-34.4131 PhraseModel_0=7.06252 PhraseModel_1=19.7388 PhraseModel_2=13.2138 PhraseModel_3=17.7775 PhraseModel_4=5.47301 PhraseModel_5=0 PhraseModel_6=1 ||| -127.294
+4 ||| an aging population and ever more open borders the racist fragmentation in european countries . ||| WordPenalty=-6.51442 Glue=4 LanguageModel=-33.7446 PhraseModel_0=7.05156 PhraseModel_1=19.8444 PhraseModel_2=13.3428 PhraseModel_3=18.5456 PhraseModel_4=5.26208 PhraseModel_5=0 PhraseModel_6=1 ||| -128.424
+4 ||| an aging population and ever more open borders increase racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-35.0385 PhraseModel_0=8.57304 PhraseModel_1=21.0335 PhraseModel_2=12.975 PhraseModel_3=15.9006 PhraseModel_4=8.12696 PhraseModel_5=0 PhraseModel_6=1 ||| -128.599
+4 ||| an aging population and ever more open borders multiply the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5074 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.3764 PhraseModel_4=7.95875 PhraseModel_5=0 PhraseModel_6=1 ||| -129.817
+4 ||| an aging population and ever more open borders increase the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-38.8411 PhraseModel_0=6.66847 PhraseModel_1=20.0813 PhraseModel_2=13.8817 PhraseModel_3=15.1212 PhraseModel_4=7.50646 PhraseModel_5=0 PhraseModel_6=1 ||| -129.94
+4 ||| an aging population and ever more open borders reproduce the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5442 PhraseModel_0=7.70986 PhraseModel_1=20.0813 PhraseModel_2=12.97 PhraseModel_3=13.3002 PhraseModel_4=8.03794 PhraseModel_5=0 PhraseModel_6=1 ||| -130.137
+4 ||| an aging population and ever more open borders multiplying the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.82 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.2318 PhraseModel_4=7.83382 PhraseModel_5=0 PhraseModel_6=1 ||| -130.257
+4 ||| an aging population and ever more open borders the racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-33.155 PhraseModel_0=8.15791 PhraseModel_1=23.1696 PhraseModel_2=15.5542 PhraseModel_3=18.5569 PhraseModel_4=6.35432 PhraseModel_5=0 PhraseModel_6=1 ||| -130.313
+4 ||| an aging population and ever more open borders grows the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-39.0359 PhraseModel_0=7.57156 PhraseModel_1=20.0813 PhraseModel_2=13.1283 PhraseModel_3=14.2568 PhraseModel_4=8.43588 PhraseModel_5=0 PhraseModel_6=1 ||| -130.549
+4 ||| an aging population and ever more open borders multiply racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-38.6378 PhraseModel_0=8.05048 PhraseModel_1=21.0335 PhraseModel_2=13.5162 PhraseModel_3=13.3764 PhraseModel_4=7.69795 PhraseModel_5=0 PhraseModel_6=1 ||| -130.598
+5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-59.9487 PassThrough=1 PhraseModel_0=11.4712 PhraseModel_1=29.458 PhraseModel_2=19.0438 PhraseModel_3=37.8219 PhraseModel_4=21.1861 PhraseModel_5=0 PhraseModel_6=2 ||| -225.247
+5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=7 LanguageModel=-60.8539 PassThrough=2 PhraseModel_0=11.5126 PhraseModel_1=29.759 PhraseModel_2=19.2199 PhraseModel_3=37.2073 PhraseModel_4=20.0207 PhraseModel_5=0 PhraseModel_6=1 ||| -225.823
+5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=8 LanguageModel=-61.5768 PassThrough=1 PhraseModel_0=12.0156 PhraseModel_1=33.4151 PhraseModel_2=22.0984 PhraseModel_3=37.22 PhraseModel_4=20.669 PhraseModel_5=0 PhraseModel_6=0 ||| -226.173
+5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-60.6716 PassThrough=1 PhraseModel_0=11.3742 PhraseModel_1=29.458 PhraseModel_2=19.123 PhraseModel_3=37.5989 PhraseModel_4=21.4664 PhraseModel_5=0 PhraseModel_6=2 ||| -226.174
+5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-58.5133 PassThrough=1 PhraseModel_0=11.8665 PhraseModel_1=28.9017 PhraseModel_2=18.1987 PhraseModel_3=39.1001 PhraseModel_4=20.9503 PhraseModel_5=0 PhraseModel_6=2 ||| -226.221
+5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and counter all prospects have hoped that it will soon disappear . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-63.5417 PassThrough=1 PhraseModel_0=12.3574 PhraseModel_1=29.4845 PhraseModel_2=18.2384 PhraseModel_3=34.0032 PhraseModel_4=21.7025 PhraseModel_5=0 PhraseModel_6=2 ||| -226.609
+5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=7 LanguageModel=-59.4184 PassThrough=2 PhraseModel_0=11.9079 PhraseModel_1=29.2027 PhraseModel_2=18.3748 PhraseModel_3=38.4854 PhraseModel_4=19.7848 PhraseModel_5=0 PhraseModel_6=1 ||| -226.796
+5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and allen prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-61.4797 PassThrough=2 PhraseModel_0=11.3037 PhraseModel_1=26.9794 PhraseModel_2=16.7321 PhraseModel_3=36.9453 PhraseModel_4=21.0569 PhraseModel_5=0 PhraseModel_6=2 ||| -227.012
+5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=8 LanguageModel=-60.1414 PassThrough=1 PhraseModel_0=12.4109 PhraseModel_1=32.8588 PhraseModel_2=21.2533 PhraseModel_3=38.4981 PhraseModel_4=20.4332 PhraseModel_5=0 PhraseModel_6=0 ||| -227.147
+5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-59.2362 PassThrough=1 PhraseModel_0=11.7696 PhraseModel_1=28.9017 PhraseModel_2=18.2779 PhraseModel_3=38.877 PhraseModel_4=21.2305 PhraseModel_5=0 PhraseModel_6=2 ||| -227.147
+6 ||| but it will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-22.9084 PhraseModel_0=4.65824 PhraseModel_1=11.8066 PhraseModel_2=7.53935 PhraseModel_3=10.1842 PhraseModel_4=10.0823 PhraseModel_5=0 PhraseModel_6=1 ||| -74.8416
+6 ||| but that it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-25.5326 PhraseModel_0=4.09397 PhraseModel_1=11.8066 PhraseModel_2=8.04227 PhraseModel_3=8.52096 PhraseModel_4=9.91846 PhraseModel_5=0 PhraseModel_6=1 ||| -75.1773
+6 ||| but this will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.6293 PhraseModel_0=4.59129 PhraseModel_1=11.8066 PhraseModel_2=7.59734 PhraseModel_3=9.74767 PhraseModel_4=9.60367 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5277
+6 ||| but that will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.682 PhraseModel_0=4.35721 PhraseModel_1=11.8066 PhraseModel_2=7.80819 PhraseModel_3=9.95635 PhraseModel_4=9.51798 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5534
+6 ||| but it is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-21.7223 PhraseModel_0=5.33564 PhraseModel_1=14.6359 PhraseModel_2=9.75115 PhraseModel_3=10.8638 PhraseModel_4=10.5328 PhraseModel_5=0 PhraseModel_6=1 ||| -76.6276
+6 ||| but this is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.3075 PhraseModel_0=5.26869 PhraseModel_1=14.6359 PhraseModel_2=9.80914 PhraseModel_3=10.4272 PhraseModel_4=10.0542 PhraseModel_5=0 PhraseModel_6=1 ||| -77.004
+6 ||| but that is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.4246 PhraseModel_0=5.03461 PhraseModel_1=14.6359 PhraseModel_2=10.02 PhraseModel_3=10.6359 PhraseModel_4=9.9685 PhraseModel_5=0 PhraseModel_6=1 ||| -77.1766
+6 ||| this but it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.463 PhraseModel_0=4.16583 PhraseModel_1=14.2852 PhraseModel_2=10.4324 PhraseModel_3=8.31228 PhraseModel_4=10.0042 PhraseModel_5=0 PhraseModel_6=1 ||| -80.7433
+6 ||| but that it will not , as the history of racism in america clearly shows . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-29.6579 PhraseModel_0=3.45482 PhraseModel_1=14.6359 PhraseModel_2=11.3575 PhraseModel_3=7.1068 PhraseModel_4=5.12435 PhraseModel_5=0 PhraseModel_6=0 ||| -81.1791
+6 ||| but that there will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.2814 PhraseModel_0=4.61416 PhraseModel_1=13.9975 PhraseModel_2=9.83682 PhraseModel_3=8.54324 PhraseModel_4=10.4476 PhraseModel_5=0 PhraseModel_6=1 ||| -81.6067
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-192.36 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.9337 PhraseModel_1=46.7282 PhraseModel_2=30.7137 PhraseModel_3=41.8218 PhraseModel_4=23.2319 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.223
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.966 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.3513 PhraseModel_1=46.7282 PhraseModel_2=30.3066 PhraseModel_3=42.4355 PhraseModel_4=22.8408 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.311
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.031 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4157 PhraseModel_3=43.1826 PhraseModel_4=22.1641 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.563
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-191.666 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4919 PhraseModel_3=42.4907 PhraseModel_4=23.8489 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.592
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-191.425 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8923 PhraseModel_1=46.7282 PhraseModel_2=30.7515 PhraseModel_3=42.5689 PhraseModel_4=22.5552 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.643
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-190.272 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.6523 PhraseModel_1=46.7282 PhraseModel_2=30.0847 PhraseModel_3=43.1044 PhraseModel_4=23.4578 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.681
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-195.148 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8352 PhraseModel_1=42.9613 PhraseModel_2=27.1493 PhraseModel_3=39.4459 PhraseModel_4=20.4386 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.866
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-189.336 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.5358 PhraseModel_1=46.7282 PhraseModel_2=30.1939 PhraseModel_3=43.8515 PhraseModel_4=22.7811 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.932
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.753 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2528 PhraseModel_1=42.9613 PhraseModel_2=26.7421 PhraseModel_3=40.0596 PhraseModel_4=20.0474 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.954
+7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . it went so far as to say that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.555 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=18.5925 PhraseModel_1=48.5668 PhraseModel_2=31.0822 PhraseModel_3=39.825 PhraseModel_4=27.6051 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-23.4519 Glue=8 ||| -538.976
+8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-48.8088 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=29.1579 PhraseModel_4=12.7191 PhraseModel_5=0 PhraseModel_6=1 ||| -184.302
+8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.4794 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.6069 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -185.222
+8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-49.7357 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=28.6034 PhraseModel_4=11.5619 PhraseModel_5=0 PhraseModel_6=1 ||| -185.485
+8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-50.2562 PhraseModel_0=10.5379 PhraseModel_1=30.9739 PhraseModel_2=20.9386 PhraseModel_3=28.0205 PhraseModel_4=10.9116 PhraseModel_5=0 PhraseModel_6=0 ||| -185.879
+8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.9671 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.6623 PhraseModel_4=12.3598 PhraseModel_5=0 PhraseModel_6=1 ||| -185.949
+8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.4063 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.0524 PhraseModel_4=11.1169 PhraseModel_5=0 PhraseModel_6=1 ||| -186.405
+8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.9268 PhraseModel_0=11.7099 PhraseModel_1=31.5959 PhraseModel_2=20.3337 PhraseModel_3=28.4695 PhraseModel_4=10.4666 PhraseModel_5=0 PhraseModel_6=0 ||| -186.799
+8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.8939 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.1078 PhraseModel_4=11.2026 PhraseModel_5=0 PhraseModel_6=1 ||| -187.132
+8 ||| is the first step to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=3 LanguageModel=-50.3885 PhraseModel_0=10.6908 PhraseModel_1=24.5899 PhraseModel_2=14.5662 PhraseModel_3=29.0596 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -187.259
+8 ||| the first step is to deal with the race , to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-11.2917 Glue=5 LanguageModel=-51.3999 PhraseModel_0=9.91899 PhraseModel_1=27.2091 PhraseModel_2=17.8675 PhraseModel_3=28.6105 PhraseModel_4=13.0872 PhraseModel_5=0 PhraseModel_6=1 ||| -187.271
+9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-66.3972 PhraseModel_0=12.7978 PhraseModel_1=27.4385 PhraseModel_2=16.0726 PhraseModel_3=29.7781 PhraseModel_4=19.6318 PhraseModel_5=1 PhraseModel_6=3 ||| -227.555
+9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-65.7197 PhraseModel_0=13.5479 PhraseModel_1=27.4385 PhraseModel_2=15.3334 PhraseModel_3=29.9236 PhraseModel_4=20.1665 PhraseModel_5=1 PhraseModel_6=3 ||| -227.898
+9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.8363 PhraseModel_0=12.3302 PhraseModel_1=28.2004 PhraseModel_2=17.0331 PhraseModel_3=29.7781 PhraseModel_4=18.5233 PhraseModel_5=1 PhraseModel_6=2 ||| -228.818
+9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.1588 PhraseModel_0=13.0803 PhraseModel_1=28.2004 PhraseModel_2=16.2939 PhraseModel_3=29.9236 PhraseModel_4=19.058 PhraseModel_5=1 PhraseModel_6=2 ||| -229.161
+9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.7395 PassThrough=1 PhraseModel_0=12.3803 PhraseModel_1=28.2004 PhraseModel_2=17.073 PhraseModel_3=27.2846 PhraseModel_4=17.4283 PhraseModel_5=1 PhraseModel_6=2 ||| -229.261
+9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-65.4196 PhraseModel_0=12.7913 PhraseModel_1=24.778 PhraseModel_2=13.4249 PhraseModel_3=32.5845 PhraseModel_4=19.6771 PhraseModel_5=1 PhraseModel_6=3 ||| -229.441
+9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.062 PassThrough=1 PhraseModel_0=13.1304 PhraseModel_1=28.2004 PhraseModel_2=16.3338 PhraseModel_3=27.4302 PhraseModel_4=17.963 PhraseModel_5=1 PhraseModel_6=2 ||| -229.604
+9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.2845 PhraseModel_0=13.0626 PhraseModel_1=28.2004 PhraseModel_2=16.3641 PhraseModel_3=30.0645 PhraseModel_4=17.7441 PhraseModel_5=1 PhraseModel_6=2 ||| -229.932
+9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-66.0971 PhraseModel_0=11.8609 PhraseModel_1=27.4385 PhraseModel_2=16.9754 PhraseModel_3=32.4389 PhraseModel_4=19.1424 PhraseModel_5=1 PhraseModel_6=3 ||| -230.238
+9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-66.607 PhraseModel_0=13.8127 PhraseModel_1=28.2004 PhraseModel_2=15.6249 PhraseModel_3=30.2101 PhraseModel_4=18.2788 PhraseModel_5=1 PhraseModel_6=2 ||| -230.274
diff --git a/test/kbest-bleu-oracles/example.refs b/test/kbest-bleu-oracles/example.refs
new file mode 100644
index 0000000..632e27b
--- /dev/null
+++ b/test/kbest-bleu-oracles/example.refs
@@ -0,0 +1,10 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/test/kbest-bleu-oracles/example.src b/test/kbest-bleu-oracles/example.src
new file mode 100644
index 0000000..f5b910d
--- /dev/null
+++ b/test/kbest-bleu-oracles/example.src
@@ -0,0 +1,10 @@
+europas nach rassen geteiltes haus
+ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .
+der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .
+während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .
+eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .
+die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .
+das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .
+die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .
+der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .
+genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .
diff --git a/test/kbest_bleu_oracles/debug.kbests b/test/kbest_bleu_oracles/debug.kbests
deleted file mode 100644
index 1e9c894..0000000
--- a/test/kbest_bleu_oracles/debug.kbests
+++ /dev/null
@@ -1,4 +0,0 @@
-0 ||| a b c d ||| x=1 ||| 10
-0 ||| a b d c ||| x=1 ||| 9
-0 ||| a d b c ||| x=1 ||| 8
-0 ||| d a b c ||| x=1 ||| 7
diff --git a/test/kbest_bleu_oracles/debug.refs b/test/kbest_bleu_oracles/debug.refs
deleted file mode 100644
index 8e13e46..0000000
--- a/test/kbest_bleu_oracles/debug.refs
+++ /dev/null
@@ -1 +0,0 @@
-a b c d
diff --git a/test/kbest_bleu_oracles/example.kbests b/test/kbest_bleu_oracles/example.kbests
deleted file mode 100644
index 1126f1f..0000000
--- a/test/kbest_bleu_oracles/example.kbests
+++ /dev/null
@@ -1,100 +0,0 @@
-0 ||| europe races house divided ||| WordPenalty=-1.73718 LanguageModel=-18.15 PhraseModel_0=2.2467 PhraseModel_1=4.27323 PhraseModel_2=2.20952 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=1 PhraseModel_6=1 ||| -61.4791
-0 ||| europe races divided house ||| WordPenalty=-1.73718 Glue=1 LanguageModel=-18.7337 PhraseModel_0=2.75576 PhraseModel_1=8.10398 PhraseModel_2=5.5382 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=0 PhraseModel_6=0 ||| -61.5856
-0 ||| europe after racial house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.3699 PhraseModel_0=1.68395 PhraseModel_1=4.27323 PhraseModel_2=2.67025 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=1 PhraseModel_6=1 ||| -63.2049
-0 ||| europe after race divided house ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.1973 PhraseModel_0=2.47176 PhraseModel_1=8.10398 PhraseModel_2=5.73009 PhraseModel_3=5.07197 PhraseModel_4=2.11131 PhraseModel_5=0 PhraseModel_6=0 ||| -63.4497
-0 ||| europe after races house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.0216 PhraseModel_0=1.84876 PhraseModel_1=4.27323 PhraseModel_2=2.51055 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=1 PhraseModel_6=1 ||| -63.7649
-0 ||| europe after races divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.6053 PhraseModel_0=2.35782 PhraseModel_1=8.10398 PhraseModel_2=5.83923 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=0 PhraseModel_6=0 ||| -63.8715
-0 ||| europe after racial divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.2498 PhraseModel_0=2.19301 PhraseModel_1=8.10398 PhraseModel_2=5.99893 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -63.9867
-0 ||| europe following racial house divided ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.941 PhraseModel_0=1.60477 PhraseModel_1=4.27323 PhraseModel_2=2.73719 PhraseModel_3=4.67218 PhraseModel_4=2.38101 PhraseModel_5=1 PhraseModel_6=1 ||| -64.7057
-0 ||| divided europe after racial house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.6711 PhraseModel_0=3.23398 PhraseModel_1=8.10398 PhraseModel_2=5.11818 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -65.0513
-0 ||| europe race divided house ||| WordPenalty=-1.73718 LanguageModel=-19.0747 PhraseModel_0=2.95643 PhraseModel_1=8.10398 PhraseModel_2=5.34994 PhraseModel_3=7.27048 PhraseModel_4=1.26795 PhraseModel_5=0 PhraseModel_6=0 ||| -65.348
-1 ||| a common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=4 LanguageModel=-36.6093 PhraseModel_0=6.68111 PhraseModel_1=22.5747 PhraseModel_2=16.1531 PhraseModel_3=22.3782 PhraseModel_4=9.65239 PhraseModel_5=1 PhraseModel_6=1 ||| -136.567
-1 ||| a common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-39.0118 PhraseModel_0=5.94865 PhraseModel_1=18.9704 PhraseModel_2=13.3916 PhraseModel_3=22.3782 PhraseModel_4=10.0435 PhraseModel_5=1 PhraseModel_6=1 ||| -137.254
-1 ||| a common feature of europe 's extreme right is its racism and the fact that you use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=5 LanguageModel=-38.0283 PhraseModel_0=6.71292 PhraseModel_1=22.8797 PhraseModel_2=16.4585 PhraseModel_3=22.3803 PhraseModel_4=9.62847 PhraseModel_5=1 PhraseModel_6=1 ||| -140.071
-1 ||| a common feature of europe 's extreme right is its racism , and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-39.7992 PhraseModel_0=7.42421 PhraseModel_1=21.4489 PhraseModel_2=14.4345 PhraseModel_3=22.3782 PhraseModel_4=11.152 PhraseModel_5=1 PhraseModel_6=1 ||| -142.605
-1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use immigration as a political lever . ||| WordPenalty=-10.4231 Glue=4 LanguageModel=-38.8156 PhraseModel_0=7.75494 PhraseModel_1=22.8797 PhraseModel_2=15.4378 PhraseModel_3=22.3803 PhraseModel_4=10.7369 PhraseModel_5=1 PhraseModel_6=1 ||| -142.999
-1 ||| a common feature of europe 's extreme right is its racism and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-40.3141 PhraseModel_0=6.39864 PhraseModel_1=23.0373 PhraseModel_2=16.9021 PhraseModel_3=22.3976 PhraseModel_4=10.0196 PhraseModel_5=1 PhraseModel_6=1 ||| -143.611
-1 ||| one common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-37.7197 PhraseModel_0=8.27536 PhraseModel_1=21.7089 PhraseModel_2=13.9878 PhraseModel_3=22.5681 PhraseModel_4=10.7747 PhraseModel_5=1 PhraseModel_6=2 ||| -144.987
-1 ||| one common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=1 LanguageModel=-40.1222 PhraseModel_0=7.73842 PhraseModel_1=18.7826 PhraseModel_2=11.6924 PhraseModel_3=22.5681 PhraseModel_4=11.1658 PhraseModel_5=1 PhraseModel_6=2 ||| -146.502
-1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-41.1014 PhraseModel_0=7.44067 PhraseModel_1=23.0373 PhraseModel_2=15.8814 PhraseModel_3=22.3976 PhraseModel_4=11.1281 PhraseModel_5=1 PhraseModel_6=1 ||| -146.539
-1 ||| a shared feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-40.0778 PhraseModel_0=7.91847 PhraseModel_1=22.5747 PhraseModel_2=14.9464 PhraseModel_3=22.3052 PhraseModel_4=10.5431 PhraseModel_5=1 PhraseModel_6=1 ||| -146.956
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-100.983 PassThrough=3 PhraseModel_0=15.0383 PhraseModel_1=33.3621 PhraseModel_2=19.8383 PhraseModel_3=32.881 PhraseModel_4=23.2559 PhraseModel_5=0 PhraseModel_6=1 ||| -300.653
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-100.556 PassThrough=3 PhraseModel_0=16.5071 PhraseModel_1=32.7586 PhraseModel_2=17.7282 PhraseModel_3=33.296 PhraseModel_4=25.589 PhraseModel_5=0 PhraseModel_6=1 ||| -302.029
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-99.7968 PassThrough=3 PhraseModel_0=15.033 PhraseModel_1=35.4231 PhraseModel_2=21.6674 PhraseModel_3=33.4947 PhraseModel_4=24.5697 PhraseModel_5=1 PhraseModel_6=1 ||| -302.155
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : the rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-100.68 PassThrough=4 PhraseModel_0=16.6403 PhraseModel_1=35.0625 PhraseModel_2=19.9175 PhraseModel_3=32.7793 PhraseModel_4=24.2261 PhraseModel_5=0 PhraseModel_6=0 ||| -302.466
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-101.623 PassThrough=3 PhraseModel_0=14.5035 PhraseModel_1=33.3891 PhraseModel_2=20.3355 PhraseModel_3=33.2525 PhraseModel_4=22.8878 PhraseModel_5=0 PhraseModel_6=1 ||| -302.743
-2 ||| the lega nord in italy , the vlaams block , the followers of le pen 's national front in france , the netherlands are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-97.1998 PassThrough=2 PhraseModel_0=17.8853 PhraseModel_1=39.5922 PhraseModel_2=23.2216 PhraseModel_3=33.6892 PhraseModel_4=23.0537 PhraseModel_5=0 PhraseModel_6=1 ||| -302.874
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of the immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-102.542 PassThrough=3 PhraseModel_0=15.8186 PhraseModel_1=35.0751 PhraseModel_2=20.6755 PhraseModel_3=32.881 PhraseModel_4=23.6471 PhraseModel_5=0 PhraseModel_6=0 ||| -303.305
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy to regulate them . ||| WordPenalty=-22.5833 Glue=11 LanguageModel=-102.736 PassThrough=3 PhraseModel_0=13.1928 PhraseModel_1=35.2776 PhraseModel_2=23.3398 PhraseModel_3=33.1527 PhraseModel_4=21.9207 PhraseModel_5=0 PhraseModel_6=1 ||| -303.344
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy in order to regulate it . ||| WordPenalty=-23.4519 Glue=10 LanguageModel=-104.547 PassThrough=3 PhraseModel_0=13.2351 PhraseModel_1=35.2776 PhraseModel_2=23.3336 PhraseModel_3=32.2759 PhraseModel_4=23.8622 PhraseModel_5=0 PhraseModel_6=1 ||| -303.438
-2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-99.3692 PassThrough=3 PhraseModel_0=16.5018 PhraseModel_1=34.8196 PhraseModel_2=19.5572 PhraseModel_3=33.9097 PhraseModel_4=26.9028 PhraseModel_5=1 PhraseModel_6=1 ||| -303.531
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-83.9883 PhraseModel_0=10.8504 PhraseModel_1=36.0092 PhraseModel_2=25.6962 PhraseModel_3=18.8196 PhraseModel_4=12.4793 PhraseModel_5=0 PhraseModel_6=0 ||| -236.305
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-83.8116 PhraseModel_0=10.6743 PhraseModel_1=36.0092 PhraseModel_2=25.8212 PhraseModel_3=18.8196 PhraseModel_4=12.1849 PhraseModel_5=0 PhraseModel_6=0 ||| -236.56
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-82.9542 PhraseModel_0=11.3166 PhraseModel_1=35.9808 PhraseModel_2=25.3584 PhraseModel_3=19.1145 PhraseModel_4=12.9314 PhraseModel_5=0 PhraseModel_6=0 ||| -236.57
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-82.211 PhraseModel_0=11.9448 PhraseModel_1=39.5089 PhraseModel_2=28.0752 PhraseModel_3=18.9139 PhraseModel_4=13.4713 PhraseModel_5=0 PhraseModel_6=0 ||| -236.761
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-82.7775 PhraseModel_0=11.1405 PhraseModel_1=35.9808 PhraseModel_2=25.4834 PhraseModel_3=19.1145 PhraseModel_4=12.6371 PhraseModel_5=0 PhraseModel_6=0 ||| -236.825
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-81.1769 PhraseModel_0=12.411 PhraseModel_1=39.4805 PhraseModel_2=27.7374 PhraseModel_3=19.2089 PhraseModel_4=13.9234 PhraseModel_5=0 PhraseModel_6=0 ||| -237.026
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen does not , unfortunately , and ( soon ) go , the race will come from the european policy to disappear anytime soon . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-74.4647 PhraseModel_0=8.03162 PhraseModel_1=32.0087 PhraseModel_2=24.9113 PhraseModel_3=27.2046 PhraseModel_4=19.1921 PhraseModel_5=2 PhraseModel_6=4 ||| -237.241
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-80.1276 PhraseModel_0=12.2413 PhraseModel_1=38.9132 PhraseModel_2=27.227 PhraseModel_3=20.0911 PhraseModel_4=10.4871 PhraseModel_5=0 PhraseModel_6=0 ||| -237.267
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.3317 Glue=4 LanguageModel=-79.0935 PhraseModel_0=12.7075 PhraseModel_1=38.8848 PhraseModel_2=26.8892 PhraseModel_3=20.3861 PhraseModel_4=10.9392 PhraseModel_5=0 PhraseModel_6=0 ||| -237.532
-3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-79.8077 PhraseModel_0=11.0526 PhraseModel_1=33.8577 PhraseModel_2=23.7301 PhraseModel_3=20.8921 PhraseModel_4=10.9702 PhraseModel_5=0 PhraseModel_6=2 ||| -237.654
-4 ||| an aging population and ever more open borders the racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-34.4131 PhraseModel_0=7.06252 PhraseModel_1=19.7388 PhraseModel_2=13.2138 PhraseModel_3=17.7775 PhraseModel_4=5.47301 PhraseModel_5=0 PhraseModel_6=1 ||| -127.294
-4 ||| an aging population and ever more open borders the racist fragmentation in european countries . ||| WordPenalty=-6.51442 Glue=4 LanguageModel=-33.7446 PhraseModel_0=7.05156 PhraseModel_1=19.8444 PhraseModel_2=13.3428 PhraseModel_3=18.5456 PhraseModel_4=5.26208 PhraseModel_5=0 PhraseModel_6=1 ||| -128.424
-4 ||| an aging population and ever more open borders increase racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-35.0385 PhraseModel_0=8.57304 PhraseModel_1=21.0335 PhraseModel_2=12.975 PhraseModel_3=15.9006 PhraseModel_4=8.12696 PhraseModel_5=0 PhraseModel_6=1 ||| -128.599
-4 ||| an aging population and ever more open borders multiply the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5074 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.3764 PhraseModel_4=7.95875 PhraseModel_5=0 PhraseModel_6=1 ||| -129.817
-4 ||| an aging population and ever more open borders increase the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-38.8411 PhraseModel_0=6.66847 PhraseModel_1=20.0813 PhraseModel_2=13.8817 PhraseModel_3=15.1212 PhraseModel_4=7.50646 PhraseModel_5=0 PhraseModel_6=1 ||| -129.94
-4 ||| an aging population and ever more open borders reproduce the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5442 PhraseModel_0=7.70986 PhraseModel_1=20.0813 PhraseModel_2=12.97 PhraseModel_3=13.3002 PhraseModel_4=8.03794 PhraseModel_5=0 PhraseModel_6=1 ||| -130.137
-4 ||| an aging population and ever more open borders multiplying the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.82 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.2318 PhraseModel_4=7.83382 PhraseModel_5=0 PhraseModel_6=1 ||| -130.257
-4 ||| an aging population and ever more open borders the racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-33.155 PhraseModel_0=8.15791 PhraseModel_1=23.1696 PhraseModel_2=15.5542 PhraseModel_3=18.5569 PhraseModel_4=6.35432 PhraseModel_5=0 PhraseModel_6=1 ||| -130.313
-4 ||| an aging population and ever more open borders grows the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-39.0359 PhraseModel_0=7.57156 PhraseModel_1=20.0813 PhraseModel_2=13.1283 PhraseModel_3=14.2568 PhraseModel_4=8.43588 PhraseModel_5=0 PhraseModel_6=1 ||| -130.549
-4 ||| an aging population and ever more open borders multiply racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-38.6378 PhraseModel_0=8.05048 PhraseModel_1=21.0335 PhraseModel_2=13.5162 PhraseModel_3=13.3764 PhraseModel_4=7.69795 PhraseModel_5=0 PhraseModel_6=1 ||| -130.598
-5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-59.9487 PassThrough=1 PhraseModel_0=11.4712 PhraseModel_1=29.458 PhraseModel_2=19.0438 PhraseModel_3=37.8219 PhraseModel_4=21.1861 PhraseModel_5=0 PhraseModel_6=2 ||| -225.247
-5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=7 LanguageModel=-60.8539 PassThrough=2 PhraseModel_0=11.5126 PhraseModel_1=29.759 PhraseModel_2=19.2199 PhraseModel_3=37.2073 PhraseModel_4=20.0207 PhraseModel_5=0 PhraseModel_6=1 ||| -225.823
-5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=8 LanguageModel=-61.5768 PassThrough=1 PhraseModel_0=12.0156 PhraseModel_1=33.4151 PhraseModel_2=22.0984 PhraseModel_3=37.22 PhraseModel_4=20.669 PhraseModel_5=0 PhraseModel_6=0 ||| -226.173
-5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-60.6716 PassThrough=1 PhraseModel_0=11.3742 PhraseModel_1=29.458 PhraseModel_2=19.123 PhraseModel_3=37.5989 PhraseModel_4=21.4664 PhraseModel_5=0 PhraseModel_6=2 ||| -226.174
-5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-58.5133 PassThrough=1 PhraseModel_0=11.8665 PhraseModel_1=28.9017 PhraseModel_2=18.1987 PhraseModel_3=39.1001 PhraseModel_4=20.9503 PhraseModel_5=0 PhraseModel_6=2 ||| -226.221
-5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and counter all prospects have hoped that it will soon disappear . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-63.5417 PassThrough=1 PhraseModel_0=12.3574 PhraseModel_1=29.4845 PhraseModel_2=18.2384 PhraseModel_3=34.0032 PhraseModel_4=21.7025 PhraseModel_5=0 PhraseModel_6=2 ||| -226.609
-5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=7 LanguageModel=-59.4184 PassThrough=2 PhraseModel_0=11.9079 PhraseModel_1=29.2027 PhraseModel_2=18.3748 PhraseModel_3=38.4854 PhraseModel_4=19.7848 PhraseModel_5=0 PhraseModel_6=1 ||| -226.796
-5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and allen prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-61.4797 PassThrough=2 PhraseModel_0=11.3037 PhraseModel_1=26.9794 PhraseModel_2=16.7321 PhraseModel_3=36.9453 PhraseModel_4=21.0569 PhraseModel_5=0 PhraseModel_6=2 ||| -227.012
-5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=8 LanguageModel=-60.1414 PassThrough=1 PhraseModel_0=12.4109 PhraseModel_1=32.8588 PhraseModel_2=21.2533 PhraseModel_3=38.4981 PhraseModel_4=20.4332 PhraseModel_5=0 PhraseModel_6=0 ||| -227.147
-5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-59.2362 PassThrough=1 PhraseModel_0=11.7696 PhraseModel_1=28.9017 PhraseModel_2=18.2779 PhraseModel_3=38.877 PhraseModel_4=21.2305 PhraseModel_5=0 PhraseModel_6=2 ||| -227.147
-6 ||| but it will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-22.9084 PhraseModel_0=4.65824 PhraseModel_1=11.8066 PhraseModel_2=7.53935 PhraseModel_3=10.1842 PhraseModel_4=10.0823 PhraseModel_5=0 PhraseModel_6=1 ||| -74.8416
-6 ||| but that it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-25.5326 PhraseModel_0=4.09397 PhraseModel_1=11.8066 PhraseModel_2=8.04227 PhraseModel_3=8.52096 PhraseModel_4=9.91846 PhraseModel_5=0 PhraseModel_6=1 ||| -75.1773
-6 ||| but this will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.6293 PhraseModel_0=4.59129 PhraseModel_1=11.8066 PhraseModel_2=7.59734 PhraseModel_3=9.74767 PhraseModel_4=9.60367 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5277
-6 ||| but that will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.682 PhraseModel_0=4.35721 PhraseModel_1=11.8066 PhraseModel_2=7.80819 PhraseModel_3=9.95635 PhraseModel_4=9.51798 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5534
-6 ||| but it is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-21.7223 PhraseModel_0=5.33564 PhraseModel_1=14.6359 PhraseModel_2=9.75115 PhraseModel_3=10.8638 PhraseModel_4=10.5328 PhraseModel_5=0 PhraseModel_6=1 ||| -76.6276
-6 ||| but this is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.3075 PhraseModel_0=5.26869 PhraseModel_1=14.6359 PhraseModel_2=9.80914 PhraseModel_3=10.4272 PhraseModel_4=10.0542 PhraseModel_5=0 PhraseModel_6=1 ||| -77.004
-6 ||| but that is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.4246 PhraseModel_0=5.03461 PhraseModel_1=14.6359 PhraseModel_2=10.02 PhraseModel_3=10.6359 PhraseModel_4=9.9685 PhraseModel_5=0 PhraseModel_6=1 ||| -77.1766
-6 ||| this but it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.463 PhraseModel_0=4.16583 PhraseModel_1=14.2852 PhraseModel_2=10.4324 PhraseModel_3=8.31228 PhraseModel_4=10.0042 PhraseModel_5=0 PhraseModel_6=1 ||| -80.7433
-6 ||| but that it will not , as the history of racism in america clearly shows . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-29.6579 PhraseModel_0=3.45482 PhraseModel_1=14.6359 PhraseModel_2=11.3575 PhraseModel_3=7.1068 PhraseModel_4=5.12435 PhraseModel_5=0 PhraseModel_6=0 ||| -81.1791
-6 ||| but that there will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.2814 PhraseModel_0=4.61416 PhraseModel_1=13.9975 PhraseModel_2=9.83682 PhraseModel_3=8.54324 PhraseModel_4=10.4476 PhraseModel_5=0 PhraseModel_6=1 ||| -81.6067
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-192.36 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.9337 PhraseModel_1=46.7282 PhraseModel_2=30.7137 PhraseModel_3=41.8218 PhraseModel_4=23.2319 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.223
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.966 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.3513 PhraseModel_1=46.7282 PhraseModel_2=30.3066 PhraseModel_3=42.4355 PhraseModel_4=22.8408 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.311
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.031 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4157 PhraseModel_3=43.1826 PhraseModel_4=22.1641 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.563
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-191.666 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4919 PhraseModel_3=42.4907 PhraseModel_4=23.8489 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.592
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-191.425 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8923 PhraseModel_1=46.7282 PhraseModel_2=30.7515 PhraseModel_3=42.5689 PhraseModel_4=22.5552 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.643
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-190.272 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.6523 PhraseModel_1=46.7282 PhraseModel_2=30.0847 PhraseModel_3=43.1044 PhraseModel_4=23.4578 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.681
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-195.148 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8352 PhraseModel_1=42.9613 PhraseModel_2=27.1493 PhraseModel_3=39.4459 PhraseModel_4=20.4386 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.866
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-189.336 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.5358 PhraseModel_1=46.7282 PhraseModel_2=30.1939 PhraseModel_3=43.8515 PhraseModel_4=22.7811 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.932
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.753 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2528 PhraseModel_1=42.9613 PhraseModel_2=26.7421 PhraseModel_3=40.0596 PhraseModel_4=20.0474 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.954
-7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . it went so far as to say that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.555 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=18.5925 PhraseModel_1=48.5668 PhraseModel_2=31.0822 PhraseModel_3=39.825 PhraseModel_4=27.6051 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-23.4519 Glue=8 ||| -538.976
-8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-48.8088 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=29.1579 PhraseModel_4=12.7191 PhraseModel_5=0 PhraseModel_6=1 ||| -184.302
-8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.4794 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.6069 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -185.222
-8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-49.7357 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=28.6034 PhraseModel_4=11.5619 PhraseModel_5=0 PhraseModel_6=1 ||| -185.485
-8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-50.2562 PhraseModel_0=10.5379 PhraseModel_1=30.9739 PhraseModel_2=20.9386 PhraseModel_3=28.0205 PhraseModel_4=10.9116 PhraseModel_5=0 PhraseModel_6=0 ||| -185.879
-8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.9671 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.6623 PhraseModel_4=12.3598 PhraseModel_5=0 PhraseModel_6=1 ||| -185.949
-8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.4063 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.0524 PhraseModel_4=11.1169 PhraseModel_5=0 PhraseModel_6=1 ||| -186.405
-8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.9268 PhraseModel_0=11.7099 PhraseModel_1=31.5959 PhraseModel_2=20.3337 PhraseModel_3=28.4695 PhraseModel_4=10.4666 PhraseModel_5=0 PhraseModel_6=0 ||| -186.799
-8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.8939 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.1078 PhraseModel_4=11.2026 PhraseModel_5=0 PhraseModel_6=1 ||| -187.132
-8 ||| is the first step to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=3 LanguageModel=-50.3885 PhraseModel_0=10.6908 PhraseModel_1=24.5899 PhraseModel_2=14.5662 PhraseModel_3=29.0596 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -187.259
-8 ||| the first step is to deal with the race , to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-11.2917 Glue=5 LanguageModel=-51.3999 PhraseModel_0=9.91899 PhraseModel_1=27.2091 PhraseModel_2=17.8675 PhraseModel_3=28.6105 PhraseModel_4=13.0872 PhraseModel_5=0 PhraseModel_6=1 ||| -187.271
-9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-66.3972 PhraseModel_0=12.7978 PhraseModel_1=27.4385 PhraseModel_2=16.0726 PhraseModel_3=29.7781 PhraseModel_4=19.6318 PhraseModel_5=1 PhraseModel_6=3 ||| -227.555
-9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-65.7197 PhraseModel_0=13.5479 PhraseModel_1=27.4385 PhraseModel_2=15.3334 PhraseModel_3=29.9236 PhraseModel_4=20.1665 PhraseModel_5=1 PhraseModel_6=3 ||| -227.898
-9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.8363 PhraseModel_0=12.3302 PhraseModel_1=28.2004 PhraseModel_2=17.0331 PhraseModel_3=29.7781 PhraseModel_4=18.5233 PhraseModel_5=1 PhraseModel_6=2 ||| -228.818
-9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.1588 PhraseModel_0=13.0803 PhraseModel_1=28.2004 PhraseModel_2=16.2939 PhraseModel_3=29.9236 PhraseModel_4=19.058 PhraseModel_5=1 PhraseModel_6=2 ||| -229.161
-9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.7395 PassThrough=1 PhraseModel_0=12.3803 PhraseModel_1=28.2004 PhraseModel_2=17.073 PhraseModel_3=27.2846 PhraseModel_4=17.4283 PhraseModel_5=1 PhraseModel_6=2 ||| -229.261
-9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-65.4196 PhraseModel_0=12.7913 PhraseModel_1=24.778 PhraseModel_2=13.4249 PhraseModel_3=32.5845 PhraseModel_4=19.6771 PhraseModel_5=1 PhraseModel_6=3 ||| -229.441
-9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.062 PassThrough=1 PhraseModel_0=13.1304 PhraseModel_1=28.2004 PhraseModel_2=16.3338 PhraseModel_3=27.4302 PhraseModel_4=17.963 PhraseModel_5=1 PhraseModel_6=2 ||| -229.604
-9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.2845 PhraseModel_0=13.0626 PhraseModel_1=28.2004 PhraseModel_2=16.3641 PhraseModel_3=30.0645 PhraseModel_4=17.7441 PhraseModel_5=1 PhraseModel_6=2 ||| -229.932
-9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-66.0971 PhraseModel_0=11.8609 PhraseModel_1=27.4385 PhraseModel_2=16.9754 PhraseModel_3=32.4389 PhraseModel_4=19.1424 PhraseModel_5=1 PhraseModel_6=3 ||| -230.238
-9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-66.607 PhraseModel_0=13.8127 PhraseModel_1=28.2004 PhraseModel_2=15.6249 PhraseModel_3=30.2101 PhraseModel_4=18.2788 PhraseModel_5=1 PhraseModel_6=2 ||| -230.274
diff --git a/test/kbest_bleu_oracles/example.refs b/test/kbest_bleu_oracles/example.refs
deleted file mode 100644
index 632e27b..0000000
--- a/test/kbest_bleu_oracles/example.refs
+++ /dev/null
@@ -1,10 +0,0 @@
-europe 's divided racial house
-a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
-the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
-while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
-an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
-mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
-it will not , as america 's racial history clearly shows .
-race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
-the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
-this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/test/kbest_bleu_oracles/example.src b/test/kbest_bleu_oracles/example.src
deleted file mode 100644
index f5b910d..0000000
--- a/test/kbest_bleu_oracles/example.src
+++ /dev/null
@@ -1,10 +0,0 @@
-europas nach rassen geteiltes haus
-ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .
-der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .
-während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .
-eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .
-die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .
-das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .
-die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .
-der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .
-genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .
diff --git a/test/lin-reg/exptected.txt b/test/lin-reg/exptected.txt
new file mode 100644
index 0000000..13de1fc
--- /dev/null
+++ b/test/lin-reg/exptected.txt
@@ -0,0 +1,3 @@
+ran for 2527 iterations
+ R^2=0.858063223720823
+{0=>0.7501625304145768, 1=>0.06388116702419537}
diff --git a/test/lin-reg/input.dat b/test/lin-reg/input.dat
new file mode 100644
index 0000000..3d93394
--- /dev/null
+++ b/test/lin-reg/input.dat
@@ -0,0 +1,50 @@
+ 2.0658746e+00
+ 2.3684087e+00
+ 2.5399929e+00
+ 2.5420804e+00
+ 2.5490790e+00
+ 2.7866882e+00
+ 2.9116825e+00
+ 3.0356270e+00
+ 3.1146696e+00
+ 3.1582389e+00
+ 3.3275944e+00
+ 3.3793165e+00
+ 3.4122006e+00
+ 3.4215823e+00
+ 3.5315732e+00
+ 3.6393002e+00
+ 3.6732537e+00
+ 3.9256462e+00
+ 4.0498646e+00
+ 4.2483348e+00
+ 4.3440052e+00
+ 4.3826531e+00
+ 4.4230602e+00
+ 4.6102443e+00
+ 4.6881183e+00
+ 4.9777333e+00
+ 5.0359967e+00
+ 5.0684536e+00
+ 5.4161491e+00
+ 5.4395623e+00
+ 5.4563207e+00
+ 5.5698458e+00
+ 5.6015729e+00
+ 5.6877617e+00
+ 5.7215602e+00
+ 5.8538914e+00
+ 6.1978026e+00
+ 6.3510941e+00
+ 6.4797033e+00
+ 6.7383791e+00
+ 6.8637686e+00
+ 7.0223387e+00
+ 7.0782373e+00
+ 7.1514232e+00
+ 7.4664023e+00
+ 7.5973874e+00
+ 7.7440717e+00
+ 7.7729662e+00
+ 7.8264514e+00
+ 7.9306356e+00
diff --git a/test/lin-reg/output.dat b/test/lin-reg/output.dat
new file mode 100644
index 0000000..1f4f963
--- /dev/null
+++ b/test/lin-reg/output.dat
@@ -0,0 +1,50 @@
+ 7.7918926e-01
+ 9.1596757e-01
+ 9.0538354e-01
+ 9.0566138e-01
+ 9.3898890e-01
+ 9.6684740e-01
+ 9.6436824e-01
+ 9.1445939e-01
+ 9.3933944e-01
+ 9.6074971e-01
+ 8.9837094e-01
+ 9.1209739e-01
+ 9.4238499e-01
+ 9.6624578e-01
+ 1.0526500e+00
+ 1.0143791e+00
+ 9.5969426e-01
+ 9.6853716e-01
+ 1.0766065e+00
+ 1.1454978e+00
+ 1.0340625e+00
+ 1.0070009e+00
+ 9.6683648e-01
+ 1.0895919e+00
+ 1.0634462e+00
+ 1.1237239e+00
+ 1.0323374e+00
+ 1.0874452e+00
+ 1.0702988e+00
+ 1.1606493e+00
+ 1.0778037e+00
+ 1.1069758e+00
+ 1.0971875e+00
+ 1.1648603e+00
+ 1.1411796e+00
+ 1.0844156e+00
+ 1.1252493e+00
+ 1.1168341e+00
+ 1.1970789e+00
+ 1.2069462e+00
+ 1.1251046e+00
+ 1.1235672e+00
+ 1.2132829e+00
+ 1.2522652e+00
+ 1.2497065e+00
+ 1.1799706e+00
+ 1.1897299e+00
+ 1.3029934e+00
+ 1.2601134e+00
+ 1.2562267e+00
diff --git a/test/lin_reg/exptected.txt b/test/lin_reg/exptected.txt
deleted file mode 100644
index 13de1fc..0000000
--- a/test/lin_reg/exptected.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-ran for 2527 iterations
- R^2=0.858063223720823
-{0=>0.7501625304145768, 1=>0.06388116702419537}
diff --git a/test/lin_reg/input.dat b/test/lin_reg/input.dat
deleted file mode 100644
index 3d93394..0000000
--- a/test/lin_reg/input.dat
+++ /dev/null
@@ -1,50 +0,0 @@
- 2.0658746e+00
- 2.3684087e+00
- 2.5399929e+00
- 2.5420804e+00
- 2.5490790e+00
- 2.7866882e+00
- 2.9116825e+00
- 3.0356270e+00
- 3.1146696e+00
- 3.1582389e+00
- 3.3275944e+00
- 3.3793165e+00
- 3.4122006e+00
- 3.4215823e+00
- 3.5315732e+00
- 3.6393002e+00
- 3.6732537e+00
- 3.9256462e+00
- 4.0498646e+00
- 4.2483348e+00
- 4.3440052e+00
- 4.3826531e+00
- 4.4230602e+00
- 4.6102443e+00
- 4.6881183e+00
- 4.9777333e+00
- 5.0359967e+00
- 5.0684536e+00
- 5.4161491e+00
- 5.4395623e+00
- 5.4563207e+00
- 5.5698458e+00
- 5.6015729e+00
- 5.6877617e+00
- 5.7215602e+00
- 5.8538914e+00
- 6.1978026e+00
- 6.3510941e+00
- 6.4797033e+00
- 6.7383791e+00
- 6.8637686e+00
- 7.0223387e+00
- 7.0782373e+00
- 7.1514232e+00
- 7.4664023e+00
- 7.5973874e+00
- 7.7440717e+00
- 7.7729662e+00
- 7.8264514e+00
- 7.9306356e+00
diff --git a/test/lin_reg/output.dat b/test/lin_reg/output.dat
deleted file mode 100644
index 1f4f963..0000000
--- a/test/lin_reg/output.dat
+++ /dev/null
@@ -1,50 +0,0 @@
- 7.7918926e-01
- 9.1596757e-01
- 9.0538354e-01
- 9.0566138e-01
- 9.3898890e-01
- 9.6684740e-01
- 9.6436824e-01
- 9.1445939e-01
- 9.3933944e-01
- 9.6074971e-01
- 8.9837094e-01
- 9.1209739e-01
- 9.4238499e-01
- 9.6624578e-01
- 1.0526500e+00
- 1.0143791e+00
- 9.5969426e-01
- 9.6853716e-01
- 1.0766065e+00
- 1.1454978e+00
- 1.0340625e+00
- 1.0070009e+00
- 9.6683648e-01
- 1.0895919e+00
- 1.0634462e+00
- 1.1237239e+00
- 1.0323374e+00
- 1.0874452e+00
- 1.0702988e+00
- 1.1606493e+00
- 1.0778037e+00
- 1.1069758e+00
- 1.0971875e+00
- 1.1648603e+00
- 1.1411796e+00
- 1.0844156e+00
- 1.1252493e+00
- 1.1168341e+00
- 1.1970789e+00
- 1.2069462e+00
- 1.1251046e+00
- 1.1235672e+00
- 1.2132829e+00
- 1.2522652e+00
- 1.2497065e+00
- 1.1799706e+00
- 1.1897299e+00
- 1.3029934e+00
- 1.2601134e+00
- 1.2562267e+00
diff --git a/test/log-reg/expected.txt b/test/log-reg/expected.txt
new file mode 100644
index 0000000..46a03ef
--- /dev/null
+++ b/test/log-reg/expected.txt
@@ -0,0 +1,2 @@
+ran for 15 iterations
+Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473]
diff --git a/test/log-reg/input.dat b/test/log-reg/input.dat
new file mode 100644
index 0000000..eed0ab1
--- /dev/null
+++ b/test/log-reg/input.dat
@@ -0,0 +1,80 @@
+ 5.5500000e+01 6.9500000e+01
+ 4.1000000e+01 8.1500000e+01
+ 5.3500000e+01 8.6000000e+01
+ 4.6000000e+01 8.4000000e+01
+ 4.1000000e+01 7.3500000e+01
+ 5.1500000e+01 6.9000000e+01
+ 5.1000000e+01 6.2500000e+01
+ 4.2000000e+01 7.5000000e+01
+ 5.3500000e+01 8.3000000e+01
+ 5.7500000e+01 7.1000000e+01
+ 4.2500000e+01 7.2500000e+01
+ 4.1000000e+01 8.0000000e+01
+ 4.6000000e+01 8.2000000e+01
+ 4.6000000e+01 6.0500000e+01
+ 4.9500000e+01 7.6000000e+01
+ 4.1000000e+01 7.6000000e+01
+ 4.8500000e+01 7.2500000e+01
+ 5.1500000e+01 8.2500000e+01
+ 4.4500000e+01 7.0500000e+01
+ 4.4000000e+01 6.6000000e+01
+ 3.3000000e+01 7.6500000e+01
+ 3.3500000e+01 7.8500000e+01
+ 3.1500000e+01 7.2000000e+01
+ 3.3000000e+01 8.1500000e+01
+ 4.2000000e+01 5.9500000e+01
+ 3.0000000e+01 6.4000000e+01
+ 6.1000000e+01 4.5000000e+01
+ 4.9000000e+01 7.9000000e+01
+ 2.6500000e+01 6.4500000e+01
+ 3.4000000e+01 7.1500000e+01
+ 4.2000000e+01 8.3500000e+01
+ 2.9500000e+01 7.4500000e+01
+ 3.9500000e+01 7.0000000e+01
+ 5.1500000e+01 6.6000000e+01
+ 4.1500000e+01 7.1500000e+01
+ 4.2500000e+01 7.9500000e+01
+ 3.5000000e+01 5.9500000e+01
+ 3.8500000e+01 7.3500000e+01
+ 3.2000000e+01 8.1500000e+01
+ 4.6000000e+01 6.0500000e+01
+ 3.6500000e+01 5.3000000e+01
+ 3.6500000e+01 5.3500000e+01
+ 2.4000000e+01 6.0500000e+01
+ 1.9000000e+01 5.7500000e+01
+ 3.4500000e+01 6.0000000e+01
+ 3.7500000e+01 6.4500000e+01
+ 3.5500000e+01 5.1000000e+01
+ 3.7000000e+01 5.0500000e+01
+ 2.1500000e+01 4.2000000e+01
+ 3.5500000e+01 5.8500000e+01
+ 2.6500000e+01 6.8500000e+01
+ 2.6500000e+01 5.5500000e+01
+ 1.8500000e+01 6.7000000e+01
+ 4.0000000e+01 6.7000000e+01
+ 3.2500000e+01 7.1500000e+01
+ 3.9000000e+01 7.1500000e+01
+ 4.3000000e+01 5.5500000e+01
+ 2.2000000e+01 5.4000000e+01
+ 3.6000000e+01 6.2500000e+01
+ 3.1000000e+01 5.5500000e+01
+ 3.8500000e+01 7.6000000e+01
+ 4.0000000e+01 7.5000000e+01
+ 3.7500000e+01 6.3000000e+01
+ 2.4500000e+01 5.8000000e+01
+ 3.0000000e+01 6.7000000e+01
+ 3.3000000e+01 5.6000000e+01
+ 5.6500000e+01 6.1000000e+01
+ 4.1000000e+01 5.7000000e+01
+ 4.9500000e+01 6.3000000e+01
+ 3.4500000e+01 7.2500000e+01
+ 3.2500000e+01 6.9000000e+01
+ 3.6000000e+01 7.3000000e+01
+ 2.7000000e+01 5.3500000e+01
+ 4.1000000e+01 6.3500000e+01
+ 2.9500000e+01 5.2500000e+01
+ 2.0000000e+01 6.5500000e+01
+ 3.8000000e+01 6.5000000e+01
+ 1.8500000e+01 7.4500000e+01
+ 1.6000000e+01 7.2500000e+01
+ 3.3500000e+01 6.8000000e+01
diff --git a/test/log-reg/output.dat b/test/log-reg/output.dat
new file mode 100644
index 0000000..51283c0
--- /dev/null
+++ b/test/log-reg/output.dat
@@ -0,0 +1,80 @@
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
diff --git a/test/log_reg/expected.txt b/test/log_reg/expected.txt
deleted file mode 100644
index 46a03ef..0000000
--- a/test/log_reg/expected.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-ran for 15 iterations
-Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473]
diff --git a/test/log_reg/input.dat b/test/log_reg/input.dat
deleted file mode 100644
index eed0ab1..0000000
--- a/test/log_reg/input.dat
+++ /dev/null
@@ -1,80 +0,0 @@
- 5.5500000e+01 6.9500000e+01
- 4.1000000e+01 8.1500000e+01
- 5.3500000e+01 8.6000000e+01
- 4.6000000e+01 8.4000000e+01
- 4.1000000e+01 7.3500000e+01
- 5.1500000e+01 6.9000000e+01
- 5.1000000e+01 6.2500000e+01
- 4.2000000e+01 7.5000000e+01
- 5.3500000e+01 8.3000000e+01
- 5.7500000e+01 7.1000000e+01
- 4.2500000e+01 7.2500000e+01
- 4.1000000e+01 8.0000000e+01
- 4.6000000e+01 8.2000000e+01
- 4.6000000e+01 6.0500000e+01
- 4.9500000e+01 7.6000000e+01
- 4.1000000e+01 7.6000000e+01
- 4.8500000e+01 7.2500000e+01
- 5.1500000e+01 8.2500000e+01
- 4.4500000e+01 7.0500000e+01
- 4.4000000e+01 6.6000000e+01
- 3.3000000e+01 7.6500000e+01
- 3.3500000e+01 7.8500000e+01
- 3.1500000e+01 7.2000000e+01
- 3.3000000e+01 8.1500000e+01
- 4.2000000e+01 5.9500000e+01
- 3.0000000e+01 6.4000000e+01
- 6.1000000e+01 4.5000000e+01
- 4.9000000e+01 7.9000000e+01
- 2.6500000e+01 6.4500000e+01
- 3.4000000e+01 7.1500000e+01
- 4.2000000e+01 8.3500000e+01
- 2.9500000e+01 7.4500000e+01
- 3.9500000e+01 7.0000000e+01
- 5.1500000e+01 6.6000000e+01
- 4.1500000e+01 7.1500000e+01
- 4.2500000e+01 7.9500000e+01
- 3.5000000e+01 5.9500000e+01
- 3.8500000e+01 7.3500000e+01
- 3.2000000e+01 8.1500000e+01
- 4.6000000e+01 6.0500000e+01
- 3.6500000e+01 5.3000000e+01
- 3.6500000e+01 5.3500000e+01
- 2.4000000e+01 6.0500000e+01
- 1.9000000e+01 5.7500000e+01
- 3.4500000e+01 6.0000000e+01
- 3.7500000e+01 6.4500000e+01
- 3.5500000e+01 5.1000000e+01
- 3.7000000e+01 5.0500000e+01
- 2.1500000e+01 4.2000000e+01
- 3.5500000e+01 5.8500000e+01
- 2.6500000e+01 6.8500000e+01
- 2.6500000e+01 5.5500000e+01
- 1.8500000e+01 6.7000000e+01
- 4.0000000e+01 6.7000000e+01
- 3.2500000e+01 7.1500000e+01
- 3.9000000e+01 7.1500000e+01
- 4.3000000e+01 5.5500000e+01
- 2.2000000e+01 5.4000000e+01
- 3.6000000e+01 6.2500000e+01
- 3.1000000e+01 5.5500000e+01
- 3.8500000e+01 7.6000000e+01
- 4.0000000e+01 7.5000000e+01
- 3.7500000e+01 6.3000000e+01
- 2.4500000e+01 5.8000000e+01
- 3.0000000e+01 6.7000000e+01
- 3.3000000e+01 5.6000000e+01
- 5.6500000e+01 6.1000000e+01
- 4.1000000e+01 5.7000000e+01
- 4.9500000e+01 6.3000000e+01
- 3.4500000e+01 7.2500000e+01
- 3.2500000e+01 6.9000000e+01
- 3.6000000e+01 7.3000000e+01
- 2.7000000e+01 5.3500000e+01
- 4.1000000e+01 6.3500000e+01
- 2.9500000e+01 5.2500000e+01
- 2.0000000e+01 6.5500000e+01
- 3.8000000e+01 6.5000000e+01
- 1.8500000e+01 7.4500000e+01
- 1.6000000e+01 7.2500000e+01
- 3.3500000e+01 6.8000000e+01
diff --git a/test/log_reg/output.dat b/test/log_reg/output.dat
deleted file mode 100644
index 51283c0..0000000
--- a/test/log_reg/output.dat
+++ /dev/null
@@ -1,80 +0,0 @@
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 1.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
- 0.0000000e+00
diff --git a/to-ascii b/to-ascii
new file mode 100755
index 0000000..10fd1c2
--- /dev/null
+++ b/to-ascii
@@ -0,0 +1,12 @@
+#!/usr/bin/env ruby
+
+while line = STDIN.gets
+ encoding_options = {
+ :invalid => :replace,
+ :undef => :replace,
+ :replace => '?',
+ :universal_newline => true
+ }
+ puts line.encode 'ASCII', encoding_options
+end
+
diff --git a/to_ascii b/to_ascii
deleted file mode 100755
index 10fd1c2..0000000
--- a/to_ascii
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env ruby
-
-while line = STDIN.gets
- encoding_options = {
- :invalid => :replace,
- :undef => :replace,
- :replace => '?',
- :universal_newline => true
- }
- puts line.encode 'ASCII', encoding_options
-end
-
diff --git a/toks-per-line b/toks-per-line
new file mode 100755
index 0000000..012caac
--- /dev/null
+++ b/toks-per-line
@@ -0,0 +1,12 @@
+#!/usr/bin/env ruby
+
+uniq = false
+uniq = true if ARGV[0]
+
+while line = STDIN.gets
+ a = line.strip.split
+ a.uniq! if uniq
+ a.sort!
+ puts a.join " "
+end
+
diff --git a/toks_per_line b/toks_per_line
deleted file mode 100755
index 012caac..0000000
--- a/toks_per_line
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env ruby
-
-uniq = false
-uniq = true if ARGV[0]
-
-while line = STDIN.gets
- a = line.strip.split
- a.uniq! if uniq
- a.sort!
- puts a.join " "
-end
-
diff --git a/train-test-split b/train-test-split
new file mode 100755
index 0000000..4d8153a
--- /dev/null
+++ b/train-test-split
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'trollop'
+
+conf = Trollop::options do
+ opt :foreign, "foreign file", :type => :string, :required => true
+ opt :english, "english file", :type => :string, :required => true
+ opt :size, "one size", :type => :int, :required => true
+ opt :repeat, "number of repetitions", :type => :int, :default => 1
+ opt :prefix, "prefix for output files", :type => :string
+end
+fn = conf[:foreign]
+fn_ext = fn.split('.').last
+f = ReadFile.readlines fn
+en = conf[:english]
+en_ext = en.split('.').last
+e = ReadFile.readlines en
+size = conf[:size]
+nlines_f = `wc -l #{fn}`.split()[0].to_i
+nlines_e = `wc -l #{en}`.split()[0].to_i
+if nlines_f != nlines_e
+ STDERR.write "Unbalanced files (#{nlines_f} vs. #{nlines_e}), exiting!\n"
+ exit 1
+end
+
+prefix = conf[:prefix]
+a = (0..nlines_e-1).to_a
+i = 0
+conf[:repeat].times {
+ b = a.sample(size)
+ ax = a.reject{|j| b.include? j}
+ `mkdir split_#{i}`
+ new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}"
+ ax.each { |j|
+ new_f.write f[j]
+ new_e.write e[j]
+ }
+ new_f.close; new_e.close
+ new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}"
+ b.each { |j|
+ new_f.write f[j]
+ new_e.write e[j]
+ }
+ new_f.close; new_e.close
+ i += 1
+}
+
diff --git a/train_test_split b/train_test_split
deleted file mode 100755
index 4d8153a..0000000
--- a/train_test_split
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-conf = Trollop::options do
- opt :foreign, "foreign file", :type => :string, :required => true
- opt :english, "english file", :type => :string, :required => true
- opt :size, "one size", :type => :int, :required => true
- opt :repeat, "number of repetitions", :type => :int, :default => 1
- opt :prefix, "prefix for output files", :type => :string
-end
-fn = conf[:foreign]
-fn_ext = fn.split('.').last
-f = ReadFile.readlines fn
-en = conf[:english]
-en_ext = en.split('.').last
-e = ReadFile.readlines en
-size = conf[:size]
-nlines_f = `wc -l #{fn}`.split()[0].to_i
-nlines_e = `wc -l #{en}`.split()[0].to_i
-if nlines_f != nlines_e
- STDERR.write "Unbalanced files (#{nlines_f} vs. #{nlines_e}), exiting!\n"
- exit 1
-end
-
-prefix = conf[:prefix]
-a = (0..nlines_e-1).to_a
-i = 0
-conf[:repeat].times {
- b = a.sample(size)
- ax = a.reject{|j| b.include? j}
- `mkdir split_#{i}`
- new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}"
- ax.each { |j|
- new_f.write f[j]
- new_e.write e[j]
- }
- new_f.close; new_e.close
- new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}"
- b.each { |j|
- new_f.write f[j]
- new_e.write e[j]
- }
- new_f.close; new_e.close
- i += 1
-}
-
--
cgit v1.2.3