From 8151031373c08ccd714a99f50783eafcb54d2010 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 10 Jun 2015 14:48:34 +0200 Subject: undo unfortunate variable naming: cfg -> conf! --- avg | 6 +++--- avg_weights | 4 ++-- kbest_bleu_oracles | 12 ++++++------ kmeans | 18 +++++++++--------- lin_reg | 12 ++++++------ log_reg | 6 +++--- merge_ttable | 6 +++--- min_max | 16 ++++++++-------- ng | 6 +++--- norm_german | 8 ++++---- per_sentence_bleu | 8 ++++---- per_sentence_bleu_kbest | 6 +++--- per_sentence_ter | 6 +++--- pt_bloom | 4 ++-- split_pipes | 12 ++++++------ stddev | 6 +++--- tf-idf | 18 +++++++++--------- train_test_split | 12 ++++++------ var | 6 +++--- 19 files changed, 86 insertions(+), 86 deletions(-) diff --git a/avg b/avg index 07e3de9..db5035a 100755 --- a/avg +++ b/avg @@ -2,7 +2,7 @@ require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do banner "avg < " opt :round, "Number of digits after decimal point.", :type => :int, :default => -1 end @@ -16,8 +16,8 @@ end avg = sum/i.to_f -if cfg[:round] >= 0 - puts avg.round cfg[:round] +if conf[:round] >= 0 + puts avg.round conf[:round] else puts avg end diff --git a/avg_weights b/avg_weights index 2e23440..36b051a 100755 --- a/avg_weights +++ b/avg_weights @@ -4,7 +4,7 @@ require 'zipf' require 'trollop' require 'zlib' -cfg = Trollop::options do +conf = Trollop::options do opt :weights_files, "a number of weights files: name value", :required => true opt :filter, "Filter if key does not appear in every file.", :type => :bool, :default => false end @@ -27,7 +27,7 @@ ARGV.each { |fn| n = ARGV.size.to_f h.each_pair { |k,w| - next if cfg[:filter] and w.size < n + next if conf[:filter] and w.size < n puts "#{k} #{w.inject(:+)/n}" } diff --git a/kbest_bleu_oracles b/kbest_bleu_oracles index 2b11137..a36c345 100755 --- a/kbest_bleu_oracles +++ b/kbest_bleu_oracles @@ -12,17 +12,17 @@ def get_context kbest_lists, references, n end def main - cfg = Trollop::options do + conf = Trollop::options do opt :kbest_lists, "kbest lists", :type => :string, :required => true opt :references, "reference", :type => :string, :required => true opt :n, "N for BLEU", :type => :int, :default => 4 opt :weight, "how much to weigh single translations", :type => :int, :default => 1 opt :debug, "debug mode", :type => :bool, :default => false end - debug = cfg[:debug] - n = cfg[:n] - kbest_lists = read_kbest_lists cfg[:kbest_lists] - references = ReadFile.readlines_strip cfg[:references] + debug = conf[:debug] + n = conf[:n] + kbest_lists = read_kbest_lists conf[:kbest_lists] + references = ReadFile.readlines_strip conf[:references] context = get_context kbest_lists, references, n kbest_lists.each_with_index { |kbest,j| scores = [] @@ -30,7 +30,7 @@ def main max_idx = -1 kbest.each_index { |i| context_cp = context.dup - context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, cfg[:weight] + context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight] score = BLEU::hbleu_(context_cp, n, debug) scores << score if score > max_score diff --git a/kmeans b/kmeans index 201864b..c1684ee 100755 --- a/kmeans +++ b/kmeans @@ -57,7 +57,7 @@ def update assignment, data end def main - cfg = Trollop::options do + conf = Trollop::options do opt :k, "k", :type => :int, :required => true opt :input, "input: one feature vector per line", :type => :string, :required => true opt :max_iterations, "max. number of iterations", :type => :int, :default => 100 @@ -65,26 +65,26 @@ def main opt :init, "centroid initialization (1: sample k features vectors, 2: k-times do sample k feature and build mean)", :type => :int, :short => '-j', :default => 2 end # data is 'ID f1=v1 f2=v2' - data = read_data cfg[:input] - k = cfg[:k] + data = read_data conf[:input] + k = conf[:k] centroids = nil - if cfg[:init] == 1 + if conf[:init] == 1 centroids = rand_init(data, k) else centroids = rand_means_init(data, k) end STDERR.write "\n k #{k}\n" - STDERR.write " input #{cfg[:input]}\n" - STDERR.write "iterations #{cfg[:max_iterations]}\n" - STDERR.write "max no ch. #{cfg[:max_no_change]}\n" - STDERR.write " init #{cfg[:init]}\n\n" + STDERR.write " input #{conf[:input]}\n" + STDERR.write "iterations #{conf[:max_iterations]}\n" + STDERR.write "max no ch. #{conf[:max_no_change]}\n" + STDERR.write " init #{conf[:init]}\n\n" assignment = nil prev_stats = [] stats = [] no_change = 0 max_no_change = 5 STDERR.write "expected cluster sz=#{data.size/k.to_f}\n\n" - 0.upto(cfg[:max_iterations]) do |i| + 0.upto(conf[:max_iterations]) do |i| s = "iteration #{i}" STDERR.write "#{s}\n#{'-'*s.size}\n" assignment = assign centroids, data diff --git a/lin_reg b/lin_reg index 4a7c3b2..7a8e614 100755 --- a/lin_reg +++ b/lin_reg @@ -24,7 +24,7 @@ def read_data fn, scale end def main - cfg = Trollop::options do + conf = Trollop::options do opt :input, "input data", :type => :string, :required => true opt :output, "output data", :type => :string, :required => true opt :learning_rate, "learning rate", :type => :float, :default => 0.07 @@ -32,9 +32,9 @@ def main opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t' opt :show_loss, "show loss per iter", :type => :bool, :default => false end - data = read_data cfg[:input], cfg[:scale_features] + data = read_data conf[:input], conf[:scale_features] zeros = [0.0]*data[0].size - t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f } + t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } model = SparseVector.new zeros stop = 0 prev_model = nil @@ -48,15 +48,15 @@ def main overall_loss += loss**2 u += x * loss } - STDERR.write "#{i} #{overall_loss/data.size}\n" if cfg[:show_loss] - u *= cfg[:learning_rate]*(1.0/t.size) + STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss] + u *= conf[:learning_rate]*(1.0/t.size) model -= u if model.approx_eql? prev_model stop += 1 else stop = 0 end - break if stop==cfg[:stop] + break if stop==conf[:stop] prev_model = model end tss = t.map{ |y| (y-t.mean)**2 }.sum diff --git a/log_reg b/log_reg index 3916d0c..82dc353 100755 --- a/log_reg +++ b/log_reg @@ -36,14 +36,14 @@ def approx_eql x, y, eps=10**-10 end def main - cfg = Trollop::options do + conf = Trollop::options do opt :input, "input data", :type => :string, :required => true opt :output, "1/0 output data", :type => :string, :required => true end - data = read_data cfg[:input] + data = read_data conf[:input] dim = data[0].size zeros = [0.0]*dim - t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f } + t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } model = Vector.elements zeros prev_model = nil gradient = Vector.elements zeros diff --git a/merge_ttable b/merge_ttable index e4621f5..ac10903 100755 --- a/merge_ttable +++ b/merge_ttable @@ -4,13 +4,13 @@ require 'zipf' require 'trollop' def main - cfg = Trollop::options do + conf = Trollop::options do opt :f, "f files", :type => :string, :required => true opt :e, "e files", :type => :string, :required => true end - f_files = cfg[:f].split - e_files = cfg[:e].split + f_files = conf[:f].split + e_files = conf[:e].split h = {} f_files.each_with_index { |fn,i| diff --git a/min_max b/min_max index b79a743..1dbfd40 100755 --- a/min_max +++ b/min_max @@ -3,7 +3,7 @@ require 'zipf' require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do opt :min, "minimum #tokens", :type => :int, :default => 1 opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n' opt :in_f, "input 'French' file", :type => :string, :required => true @@ -15,11 +15,11 @@ end files = {} -files[:f_file] = ReadFile.new cfg[:in_f] -files[:e_file] = ReadFile.new cfg[:in_e] -files[:f_out_file] = WriteFile.new cfg[:out_f] -files[:e_out_file] = WriteFile.new cfg[:out_e] -files[:id_out_file] = WriteFile.new cfg[:out_id] +files[:f_file] = ReadFile.new conf[:in_f] +files[:e_file] = ReadFile.new conf[:in_e] +files[:f_out_file] = WriteFile.new conf[:out_f] +files[:e_out_file] = WriteFile.new conf[:out_e] +files[:id_out_file] = WriteFile.new conf[:out_id] i = 0 while f_line = files[:f_file].gets e_line = files[:e_file].gets @@ -27,8 +27,8 @@ while f_line = files[:f_file].gets e_line.strip! a = f_line.split b = e_line.split - if a.size >= cfg[:min] and a.size <= cfg[:max] and \ - b.size >= cfg[:min] and b.size <= cfg[:max] + if a.size >= conf[:min] and a.size <= conf[:max] and \ + b.size >= conf[:min] and b.size <= conf[:max] files[:f_out_file].write "#{f_line}\n" files[:e_out_file].write "#{e_line}\n" files[:id_out_file].write "#{i}\n" diff --git a/ng b/ng index 0a29898..7a3552f 100755 --- a/ng +++ b/ng @@ -3,7 +3,7 @@ require 'zipf' require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do banner "ng < " opt :n, "n for Ngrams", :type => :int, :default => 4 opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false @@ -12,8 +12,8 @@ end while line = STDIN.gets a = [] - ngrams(line, cfg[:n], cfg[:fix]) { |ng| a << ng.join(' ') } + ngrams(line, conf[:n], conf[:fix]) { |ng| a << ng.join(' ') } a.reject! { |i| i.strip.size==0 } - puts a.join cfg[:separator] if a.size>0 + puts a.join conf[:separator] if a.size>0 end diff --git a/norm_german b/norm_german index ef0408e..cf9c060 100755 --- a/norm_german +++ b/norm_german @@ -7,7 +7,7 @@ STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' -cfg = Trollop::options do +conf = Trollop::options do banner "norm_german < " opt :upper, "uppercase", :type => :bool, :default => false opt :threads, "#threads", :type => :int, :default => 1, :short => '-h' @@ -18,7 +18,7 @@ end pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ] pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ] -if cfg[:upper] +if conf[:upper] PAIRS = pairs_lower else PAIRS = pairs_lower+pairs_upper @@ -58,7 +58,7 @@ while tok = STDIN.gets token_stock << [] if !token_stock[thread_n] token_stock[thread_n] << tok.strip! counter += 1 - if token_stock[thread_n].size%cfg[:shard_size]==0 + if token_stock[thread_n].size%conf[:shard_size]==0 STDERR.write "Starting thread ##{thread_n}\n" threads << Thread.new(token_stock[thread_n]) { |tokens| th = build_partial tokens @@ -71,7 +71,7 @@ while tok = STDIN.gets else next end - if thread_n==cfg[:threads] + if thread_n==conf[:threads] threads.each { |i| i.join } token_stock.each { |i| i.clear } thread_n = 0 diff --git a/per_sentence_bleu b/per_sentence_bleu index 5bacd1a..402f364 100755 --- a/per_sentence_bleu +++ b/per_sentence_bleu @@ -4,23 +4,23 @@ require 'zipf' require 'trollop' def main - cfg = Trollop::options do + conf = Trollop::options do opt :input, "input", :type => :string, :default => '-' opt :references, "references", :type => :string, :required => true opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0 opt :n, "N", :default => 4 end - refs = ReadFile.readlines_strip cfg[:references] + refs = ReadFile.readlines_strip conf[:references] i = -1 - input = ReadFile.new cfg[:input] + input = ReadFile.new conf[:input] while line = input.gets i += 1 if line.strip == '' puts 0.0 next end - puts BLEU::per_sentence_bleu line.strip, refs[i], cfg[:n], cfg[:len_hack] + puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack] end input.close end diff --git a/per_sentence_bleu_kbest b/per_sentence_bleu_kbest index e6a31cb..f8bd860 100755 --- a/per_sentence_bleu_kbest +++ b/per_sentence_bleu_kbest @@ -4,12 +4,12 @@ require 'zipf' require 'trollop' def main - cfg = Trollop::options do + conf = Trollop::options do opt :kbests, "kbests", :type => :string, :default => '-' opt :references, "references", :type => :string, :required => true end - refs = ReadFile.new cfg[:references] - kbest_lists = read_kbest_lists cfg[:kbests] + refs = ReadFile.new conf[:references] + kbest_lists = read_kbest_lists conf[:kbests] i = 0 kbest_lists.each { |list| scores = [] diff --git a/per_sentence_ter b/per_sentence_ter index 343708e..fa283ef 100755 --- a/per_sentence_ter +++ b/per_sentence_ter @@ -5,14 +5,14 @@ require 'trollop' require 'tempfile' def main - cfg = Trollop::options do + conf = Trollop::options do opt :input, "input", :type => :string, :default => '-' opt :references, "references", :type => :string, :required => true opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score' end - refs = ReadFile.readlines_strip cfg[:references] - input = ReadFile.new cfg[:input] + refs = ReadFile.readlines_strip conf[:references] + input = ReadFile.new conf[:input] i = -1 while line = input.gets line.strip! diff --git a/pt_bloom b/pt_bloom index 2c3928f..5c2cf01 100755 --- a/pt_bloom +++ b/pt_bloom @@ -6,12 +6,12 @@ require 'trollop' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' -cfg = Trollop::options do +conf = Trollop::options do opt :size, "number of entries in the filter", :type => :int, :required => true opt :error_rate, "error rate", :type => :float, :default => 0.01 end -f = BloomFilter.new cfg[:size], cfg[:error_rate] +f = BloomFilter.new conf[:size], conf[:error_rate] while line = STDIN.gets src, tgt = splitpipe(line)[0..1] src.strip! diff --git a/split_pipes b/split_pipes index a1a0128..ce8f018 100755 --- a/split_pipes +++ b/split_pipes @@ -5,7 +5,7 @@ require 'trollop' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' -cfg = Trollop::options do +conf = Trollop::options do banner "splitpipes -f < " opt :field, "field", :type => :int, :required => true opt :to, "to", :type => :int, :default => nil @@ -14,18 +14,18 @@ end a = [] range = false -if cfg[:to] +if conf[:to] range = true end if range - if cfg[:field] >= cfg[:to] + if conf[:field] >= conf[:to] STDERR.write "field >= to, exiting\n" exit end end -if cfg[:field]<=0 || (range && cfg[:to]<=0) +if conf[:field]<=0 || (range && conf[:to]<=0) STDERR.write "field or to <= 0, exiting" exit end @@ -33,9 +33,9 @@ end while line = STDIN.gets j = 1 line.strip.split(' ||| ').each { |i| - if range && (cfg[:field]..cfg[:to]).include?(j) + if range && (conf[:field]..conf[:to]).include?(j) a << i.strip - elsif j == cfg[:field] + elsif j == conf[:field] puts i.strip break end diff --git a/stddev b/stddev index a7397b2..2634f63 100755 --- a/stddev +++ b/stddev @@ -2,7 +2,7 @@ require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do banner "stddev [-r ] < " opt :round, "Number of digits after decimal point.", :type => :int, :default => -1 end @@ -26,8 +26,8 @@ cached.each { |v| stddev = Math.sqrt(var/i.to_f) -if cfg[:round] >= 0 - puts stddev.round cfg[:round] +if conf[:round] >= 0 + puts stddev.round conf[:round] else puts stddev end diff --git a/tf-idf b/tf-idf index dde2fd5..066548b 100755 --- a/tf-idf +++ b/tf-idf @@ -4,7 +4,7 @@ require 'zipf' require 'trollop' def main - cfg = Trollop::options do + conf = Trollop::options do opt :documents, "input files (documents)", :type => :string, :required => true opt :filter_stopwords, "filter stopwords (give file)", :type => :string, :default => nil opt :one_item_per_line, "one item per line (allow multi-word items)", :type => :bool, :default => false @@ -13,21 +13,21 @@ def main end stopwords = [] - if cfg[:filter_stopwords] - stopwords = ReadFile.readlines(cfg[:filter_stopwords]).map{ |i| + if conf[:filter_stopwords] + stopwords = ReadFile.readlines(conf[:filter_stopwords]).map{ |i| i.split('|').first.strip }.reject{ |i| i=='' } end docs = {} a = [] - if cfg[:documents].strip[0] == "*" - ad = Dir.glob(cfg[:documents]) + if conf[:documents].strip[0] == "*" + ad = Dir.glob(conf[:documents]) else - ad = cfg[:documents].split + ad = conf[:documents].split end ad.each { |i| - if cfg[:one_item_per_line] + if conf[:one_item_per_line] docs[i] = ReadFile.readlines_strip i else docs[i] = ReadFile.read(i).split(/\s/).map{ |i| i.strip } @@ -38,9 +38,9 @@ def main docs.each_pair { |name, words| just_tf = TFIDF::tf words, stopwords - just_tf = TFIDF::ntf(just_tf) if cfg[:ntf] + just_tf = TFIDF::ntf(just_tf) if conf[:ntf] tf_idf = {}; tf_idf.default = 0.0 - if cfg[:idf] + if conf[:idf] just_tf.each_pair { |word,f| tf_idf[word] = idf_values[word] * f } diff --git a/train_test_split b/train_test_split index db56de9..4d8153a 100755 --- a/train_test_split +++ b/train_test_split @@ -3,20 +3,20 @@ require 'zipf' require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do opt :foreign, "foreign file", :type => :string, :required => true opt :english, "english file", :type => :string, :required => true opt :size, "one size", :type => :int, :required => true opt :repeat, "number of repetitions", :type => :int, :default => 1 opt :prefix, "prefix for output files", :type => :string end -fn = cfg[:foreign] +fn = conf[:foreign] fn_ext = fn.split('.').last f = ReadFile.readlines fn -en = cfg[:english] +en = conf[:english] en_ext = en.split('.').last e = ReadFile.readlines en -size = cfg[:size] +size = conf[:size] nlines_f = `wc -l #{fn}`.split()[0].to_i nlines_e = `wc -l #{en}`.split()[0].to_i if nlines_f != nlines_e @@ -24,10 +24,10 @@ if nlines_f != nlines_e exit 1 end -prefix = cfg[:prefix] +prefix = conf[:prefix] a = (0..nlines_e-1).to_a i = 0 -cfg[:repeat].times { +conf[:repeat].times { b = a.sample(size) ax = a.reject{|j| b.include? j} `mkdir split_#{i}` diff --git a/var b/var index faccefa..60f3b9b 100755 --- a/var +++ b/var @@ -2,7 +2,7 @@ require 'trollop' -cfg = Trollop::options do +conf = Trollop::options do banner "stddev [-r ] < " opt :round, "Number of digits after decimal point.", :type => :int, :default => -1 end @@ -27,8 +27,8 @@ cached.each { |v| var /= i.to_f -if cfg[:round] >= 0 - puts var.round cfg[:round] +if conf[:round] >= 0 + puts var.round conf[:round] else puts var end -- cgit v1.2.3