summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xavg6
-rwxr-xr-xavg_weights4
-rwxr-xr-xkbest_bleu_oracles12
-rwxr-xr-xkmeans18
-rwxr-xr-xlin_reg12
-rwxr-xr-xlog_reg6
-rwxr-xr-xmerge_ttable6
-rwxr-xr-xmin_max16
-rwxr-xr-xng6
-rwxr-xr-xnorm_german8
-rwxr-xr-xper_sentence_bleu8
-rwxr-xr-xper_sentence_bleu_kbest6
-rwxr-xr-xper_sentence_ter6
-rwxr-xr-xpt_bloom4
-rwxr-xr-xsplit_pipes12
-rwxr-xr-xstddev6
-rwxr-xr-xtf-idf18
-rwxr-xr-xtrain_test_split12
-rwxr-xr-xvar6
19 files changed, 86 insertions, 86 deletions
diff --git a/avg b/avg
index 07e3de9..db5035a 100755
--- a/avg
+++ b/avg
@@ -2,7 +2,7 @@
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "avg < <one number per line>"
opt :round, "Number of digits after decimal point.", :type => :int, :default => -1
end
@@ -16,8 +16,8 @@ end
avg = sum/i.to_f
-if cfg[:round] >= 0
- puts avg.round cfg[:round]
+if conf[:round] >= 0
+ puts avg.round conf[:round]
else
puts avg
end
diff --git a/avg_weights b/avg_weights
index 2e23440..36b051a 100755
--- a/avg_weights
+++ b/avg_weights
@@ -4,7 +4,7 @@ require 'zipf'
require 'trollop'
require 'zlib'
-cfg = Trollop::options do
+conf = Trollop::options do
opt :weights_files, "a number of weights files: name value", :required => true
opt :filter, "Filter if key does not appear in every file.", :type => :bool, :default => false
end
@@ -27,7 +27,7 @@ ARGV.each { |fn|
n = ARGV.size.to_f
h.each_pair { |k,w|
- next if cfg[:filter] and w.size < n
+ next if conf[:filter] and w.size < n
puts "#{k} #{w.inject(:+)/n}"
}
diff --git a/kbest_bleu_oracles b/kbest_bleu_oracles
index 2b11137..a36c345 100755
--- a/kbest_bleu_oracles
+++ b/kbest_bleu_oracles
@@ -12,17 +12,17 @@ def get_context kbest_lists, references, n
end
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :kbest_lists, "kbest lists", :type => :string, :required => true
opt :references, "reference", :type => :string, :required => true
opt :n, "N for BLEU", :type => :int, :default => 4
opt :weight, "how much to weigh single translations", :type => :int, :default => 1
opt :debug, "debug mode", :type => :bool, :default => false
end
- debug = cfg[:debug]
- n = cfg[:n]
- kbest_lists = read_kbest_lists cfg[:kbest_lists]
- references = ReadFile.readlines_strip cfg[:references]
+ debug = conf[:debug]
+ n = conf[:n]
+ kbest_lists = read_kbest_lists conf[:kbest_lists]
+ references = ReadFile.readlines_strip conf[:references]
context = get_context kbest_lists, references, n
kbest_lists.each_with_index { |kbest,j|
scores = []
@@ -30,7 +30,7 @@ def main
max_idx = -1
kbest.each_index { |i|
context_cp = context.dup
- context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, cfg[:weight]
+ context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight]
score = BLEU::hbleu_(context_cp, n, debug)
scores << score
if score > max_score
diff --git a/kmeans b/kmeans
index 201864b..c1684ee 100755
--- a/kmeans
+++ b/kmeans
@@ -57,7 +57,7 @@ def update assignment, data
end
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :k, "k", :type => :int, :required => true
opt :input, "input: one feature vector per line", :type => :string, :required => true
opt :max_iterations, "max. number of iterations", :type => :int, :default => 100
@@ -65,26 +65,26 @@ def main
opt :init, "centroid initialization (1: sample k features vectors, 2: k-times do sample k feature and build mean)", :type => :int, :short => '-j', :default => 2
end
# data is 'ID f1=v1 f2=v2'
- data = read_data cfg[:input]
- k = cfg[:k]
+ data = read_data conf[:input]
+ k = conf[:k]
centroids = nil
- if cfg[:init] == 1
+ if conf[:init] == 1
centroids = rand_init(data, k)
else
centroids = rand_means_init(data, k)
end
STDERR.write "\n k #{k}\n"
- STDERR.write " input #{cfg[:input]}\n"
- STDERR.write "iterations #{cfg[:max_iterations]}\n"
- STDERR.write "max no ch. #{cfg[:max_no_change]}\n"
- STDERR.write " init #{cfg[:init]}\n\n"
+ STDERR.write " input #{conf[:input]}\n"
+ STDERR.write "iterations #{conf[:max_iterations]}\n"
+ STDERR.write "max no ch. #{conf[:max_no_change]}\n"
+ STDERR.write " init #{conf[:init]}\n\n"
assignment = nil
prev_stats = []
stats = []
no_change = 0
max_no_change = 5
STDERR.write "expected cluster sz=#{data.size/k.to_f}\n\n"
- 0.upto(cfg[:max_iterations]) do |i|
+ 0.upto(conf[:max_iterations]) do |i|
s = "iteration #{i}"
STDERR.write "#{s}\n#{'-'*s.size}\n"
assignment = assign centroids, data
diff --git a/lin_reg b/lin_reg
index 4a7c3b2..7a8e614 100755
--- a/lin_reg
+++ b/lin_reg
@@ -24,7 +24,7 @@ def read_data fn, scale
end
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :input, "input data", :type => :string, :required => true
opt :output, "output data", :type => :string, :required => true
opt :learning_rate, "learning rate", :type => :float, :default => 0.07
@@ -32,9 +32,9 @@ def main
opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t'
opt :show_loss, "show loss per iter", :type => :bool, :default => false
end
- data = read_data cfg[:input], cfg[:scale_features]
+ data = read_data conf[:input], conf[:scale_features]
zeros = [0.0]*data[0].size
- t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f }
+ t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
model = SparseVector.new zeros
stop = 0
prev_model = nil
@@ -48,15 +48,15 @@ def main
overall_loss += loss**2
u += x * loss
}
- STDERR.write "#{i} #{overall_loss/data.size}\n" if cfg[:show_loss]
- u *= cfg[:learning_rate]*(1.0/t.size)
+ STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss]
+ u *= conf[:learning_rate]*(1.0/t.size)
model -= u
if model.approx_eql? prev_model
stop += 1
else
stop = 0
end
- break if stop==cfg[:stop]
+ break if stop==conf[:stop]
prev_model = model
end
tss = t.map{ |y| (y-t.mean)**2 }.sum
diff --git a/log_reg b/log_reg
index 3916d0c..82dc353 100755
--- a/log_reg
+++ b/log_reg
@@ -36,14 +36,14 @@ def approx_eql x, y, eps=10**-10
end
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :input, "input data", :type => :string, :required => true
opt :output, "1/0 output data", :type => :string, :required => true
end
- data = read_data cfg[:input]
+ data = read_data conf[:input]
dim = data[0].size
zeros = [0.0]*dim
- t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f }
+ t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
model = Vector.elements zeros
prev_model = nil
gradient = Vector.elements zeros
diff --git a/merge_ttable b/merge_ttable
index e4621f5..ac10903 100755
--- a/merge_ttable
+++ b/merge_ttable
@@ -4,13 +4,13 @@ require 'zipf'
require 'trollop'
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :f, "f files", :type => :string, :required => true
opt :e, "e files", :type => :string, :required => true
end
- f_files = cfg[:f].split
- e_files = cfg[:e].split
+ f_files = conf[:f].split
+ e_files = conf[:e].split
h = {}
f_files.each_with_index { |fn,i|
diff --git a/min_max b/min_max
index b79a743..1dbfd40 100755
--- a/min_max
+++ b/min_max
@@ -3,7 +3,7 @@
require 'zipf'
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
opt :min, "minimum #tokens", :type => :int, :default => 1
opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n'
opt :in_f, "input 'French' file", :type => :string, :required => true
@@ -15,11 +15,11 @@ end
files = {}
-files[:f_file] = ReadFile.new cfg[:in_f]
-files[:e_file] = ReadFile.new cfg[:in_e]
-files[:f_out_file] = WriteFile.new cfg[:out_f]
-files[:e_out_file] = WriteFile.new cfg[:out_e]
-files[:id_out_file] = WriteFile.new cfg[:out_id]
+files[:f_file] = ReadFile.new conf[:in_f]
+files[:e_file] = ReadFile.new conf[:in_e]
+files[:f_out_file] = WriteFile.new conf[:out_f]
+files[:e_out_file] = WriteFile.new conf[:out_e]
+files[:id_out_file] = WriteFile.new conf[:out_id]
i = 0
while f_line = files[:f_file].gets
e_line = files[:e_file].gets
@@ -27,8 +27,8 @@ while f_line = files[:f_file].gets
e_line.strip!
a = f_line.split
b = e_line.split
- if a.size >= cfg[:min] and a.size <= cfg[:max] and \
- b.size >= cfg[:min] and b.size <= cfg[:max]
+ if a.size >= conf[:min] and a.size <= conf[:max] and \
+ b.size >= conf[:min] and b.size <= conf[:max]
files[:f_out_file].write "#{f_line}\n"
files[:e_out_file].write "#{e_line}\n"
files[:id_out_file].write "#{i}\n"
diff --git a/ng b/ng
index 0a29898..7a3552f 100755
--- a/ng
+++ b/ng
@@ -3,7 +3,7 @@
require 'zipf'
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "ng < <input>"
opt :n, "n for Ngrams", :type => :int, :default => 4
opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false
@@ -12,8 +12,8 @@ end
while line = STDIN.gets
a = []
- ngrams(line, cfg[:n], cfg[:fix]) { |ng| a << ng.join(' ') }
+ ngrams(line, conf[:n], conf[:fix]) { |ng| a << ng.join(' ') }
a.reject! { |i| i.strip.size==0 }
- puts a.join cfg[:separator] if a.size>0
+ puts a.join conf[:separator] if a.size>0
end
diff --git a/norm_german b/norm_german
index ef0408e..cf9c060 100755
--- a/norm_german
+++ b/norm_german
@@ -7,7 +7,7 @@ STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "norm_german < <file w/ lowercased tokens>"
opt :upper, "uppercase", :type => :bool, :default => false
opt :threads, "#threads", :type => :int, :default => 1, :short => '-h'
@@ -18,7 +18,7 @@ end
pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ]
pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ]
-if cfg[:upper]
+if conf[:upper]
PAIRS = pairs_lower
else
PAIRS = pairs_lower+pairs_upper
@@ -58,7 +58,7 @@ while tok = STDIN.gets
token_stock << [] if !token_stock[thread_n]
token_stock[thread_n] << tok.strip!
counter += 1
- if token_stock[thread_n].size%cfg[:shard_size]==0
+ if token_stock[thread_n].size%conf[:shard_size]==0
STDERR.write "Starting thread ##{thread_n}\n"
threads << Thread.new(token_stock[thread_n]) { |tokens|
th = build_partial tokens
@@ -71,7 +71,7 @@ while tok = STDIN.gets
else
next
end
- if thread_n==cfg[:threads]
+ if thread_n==conf[:threads]
threads.each { |i| i.join }
token_stock.each { |i| i.clear }
thread_n = 0
diff --git a/per_sentence_bleu b/per_sentence_bleu
index 5bacd1a..402f364 100755
--- a/per_sentence_bleu
+++ b/per_sentence_bleu
@@ -4,23 +4,23 @@ require 'zipf'
require 'trollop'
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :input, "input", :type => :string, :default => '-'
opt :references, "references", :type => :string, :required => true
opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0
opt :n, "N", :default => 4
end
- refs = ReadFile.readlines_strip cfg[:references]
+ refs = ReadFile.readlines_strip conf[:references]
i = -1
- input = ReadFile.new cfg[:input]
+ input = ReadFile.new conf[:input]
while line = input.gets
i += 1
if line.strip == ''
puts 0.0
next
end
- puts BLEU::per_sentence_bleu line.strip, refs[i], cfg[:n], cfg[:len_hack]
+ puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack]
end
input.close
end
diff --git a/per_sentence_bleu_kbest b/per_sentence_bleu_kbest
index e6a31cb..f8bd860 100755
--- a/per_sentence_bleu_kbest
+++ b/per_sentence_bleu_kbest
@@ -4,12 +4,12 @@ require 'zipf'
require 'trollop'
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :kbests, "kbests", :type => :string, :default => '-'
opt :references, "references", :type => :string, :required => true
end
- refs = ReadFile.new cfg[:references]
- kbest_lists = read_kbest_lists cfg[:kbests]
+ refs = ReadFile.new conf[:references]
+ kbest_lists = read_kbest_lists conf[:kbests]
i = 0
kbest_lists.each { |list|
scores = []
diff --git a/per_sentence_ter b/per_sentence_ter
index 343708e..fa283ef 100755
--- a/per_sentence_ter
+++ b/per_sentence_ter
@@ -5,14 +5,14 @@ require 'trollop'
require 'tempfile'
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :input, "input", :type => :string, :default => '-'
opt :references, "references", :type => :string, :required => true
opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score'
end
- refs = ReadFile.readlines_strip cfg[:references]
- input = ReadFile.new cfg[:input]
+ refs = ReadFile.readlines_strip conf[:references]
+ input = ReadFile.new conf[:input]
i = -1
while line = input.gets
line.strip!
diff --git a/pt_bloom b/pt_bloom
index 2c3928f..5c2cf01 100755
--- a/pt_bloom
+++ b/pt_bloom
@@ -6,12 +6,12 @@ require 'trollop'
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
-cfg = Trollop::options do
+conf = Trollop::options do
opt :size, "number of entries in the filter", :type => :int, :required => true
opt :error_rate, "error rate", :type => :float, :default => 0.01
end
-f = BloomFilter.new cfg[:size], cfg[:error_rate]
+f = BloomFilter.new conf[:size], conf[:error_rate]
while line = STDIN.gets
src, tgt = splitpipe(line)[0..1]
src.strip!
diff --git a/split_pipes b/split_pipes
index a1a0128..ce8f018 100755
--- a/split_pipes
+++ b/split_pipes
@@ -5,7 +5,7 @@ require 'trollop'
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "splitpipes -f <n> < <input>"
opt :field, "field", :type => :int, :required => true
opt :to, "to", :type => :int, :default => nil
@@ -14,18 +14,18 @@ end
a = []
range = false
-if cfg[:to]
+if conf[:to]
range = true
end
if range
- if cfg[:field] >= cfg[:to]
+ if conf[:field] >= conf[:to]
STDERR.write "field >= to, exiting\n"
exit
end
end
-if cfg[:field]<=0 || (range && cfg[:to]<=0)
+if conf[:field]<=0 || (range && conf[:to]<=0)
STDERR.write "field or to <= 0, exiting"
exit
end
@@ -33,9 +33,9 @@ end
while line = STDIN.gets
j = 1
line.strip.split(' ||| ').each { |i|
- if range && (cfg[:field]..cfg[:to]).include?(j)
+ if range && (conf[:field]..conf[:to]).include?(j)
a << i.strip
- elsif j == cfg[:field]
+ elsif j == conf[:field]
puts i.strip
break
end
diff --git a/stddev b/stddev
index a7397b2..2634f63 100755
--- a/stddev
+++ b/stddev
@@ -2,7 +2,7 @@
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "stddev [-r <d>] < <one number per line>"
opt :round, "Number of digits after decimal point.", :type => :int, :default => -1
end
@@ -26,8 +26,8 @@ cached.each { |v|
stddev = Math.sqrt(var/i.to_f)
-if cfg[:round] >= 0
- puts stddev.round cfg[:round]
+if conf[:round] >= 0
+ puts stddev.round conf[:round]
else
puts stddev
end
diff --git a/tf-idf b/tf-idf
index dde2fd5..066548b 100755
--- a/tf-idf
+++ b/tf-idf
@@ -4,7 +4,7 @@ require 'zipf'
require 'trollop'
def main
- cfg = Trollop::options do
+ conf = Trollop::options do
opt :documents, "input files (documents)", :type => :string, :required => true
opt :filter_stopwords, "filter stopwords (give file)", :type => :string, :default => nil
opt :one_item_per_line, "one item per line (allow multi-word items)", :type => :bool, :default => false
@@ -13,21 +13,21 @@ def main
end
stopwords = []
- if cfg[:filter_stopwords]
- stopwords = ReadFile.readlines(cfg[:filter_stopwords]).map{ |i|
+ if conf[:filter_stopwords]
+ stopwords = ReadFile.readlines(conf[:filter_stopwords]).map{ |i|
i.split('|').first.strip
}.reject{ |i| i=='' }
end
docs = {}
a = []
- if cfg[:documents].strip[0] == "*"
- ad = Dir.glob(cfg[:documents])
+ if conf[:documents].strip[0] == "*"
+ ad = Dir.glob(conf[:documents])
else
- ad = cfg[:documents].split
+ ad = conf[:documents].split
end
ad.each { |i|
- if cfg[:one_item_per_line]
+ if conf[:one_item_per_line]
docs[i] = ReadFile.readlines_strip i
else
docs[i] = ReadFile.read(i).split(/\s/).map{ |i| i.strip }
@@ -38,9 +38,9 @@ def main
docs.each_pair { |name, words|
just_tf = TFIDF::tf words, stopwords
- just_tf = TFIDF::ntf(just_tf) if cfg[:ntf]
+ just_tf = TFIDF::ntf(just_tf) if conf[:ntf]
tf_idf = {}; tf_idf.default = 0.0
- if cfg[:idf]
+ if conf[:idf]
just_tf.each_pair { |word,f|
tf_idf[word] = idf_values[word] * f
}
diff --git a/train_test_split b/train_test_split
index db56de9..4d8153a 100755
--- a/train_test_split
+++ b/train_test_split
@@ -3,20 +3,20 @@
require 'zipf'
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
opt :foreign, "foreign file", :type => :string, :required => true
opt :english, "english file", :type => :string, :required => true
opt :size, "one size", :type => :int, :required => true
opt :repeat, "number of repetitions", :type => :int, :default => 1
opt :prefix, "prefix for output files", :type => :string
end
-fn = cfg[:foreign]
+fn = conf[:foreign]
fn_ext = fn.split('.').last
f = ReadFile.readlines fn
-en = cfg[:english]
+en = conf[:english]
en_ext = en.split('.').last
e = ReadFile.readlines en
-size = cfg[:size]
+size = conf[:size]
nlines_f = `wc -l #{fn}`.split()[0].to_i
nlines_e = `wc -l #{en}`.split()[0].to_i
if nlines_f != nlines_e
@@ -24,10 +24,10 @@ if nlines_f != nlines_e
exit 1
end
-prefix = cfg[:prefix]
+prefix = conf[:prefix]
a = (0..nlines_e-1).to_a
i = 0
-cfg[:repeat].times {
+conf[:repeat].times {
b = a.sample(size)
ax = a.reject{|j| b.include? j}
`mkdir split_#{i}`
diff --git a/var b/var
index faccefa..60f3b9b 100755
--- a/var
+++ b/var
@@ -2,7 +2,7 @@
require 'trollop'
-cfg = Trollop::options do
+conf = Trollop::options do
banner "stddev [-r <d>] < <one number per line>"
opt :round, "Number of digits after decimal point.", :type => :int, :default => -1
end
@@ -27,8 +27,8 @@ cached.each { |v|
var /= i.to_f
-if cfg[:round] >= 0
- puts var.round cfg[:round]
+if conf[:round] >= 0
+ puts var.round conf[:round]
else
puts var
end