From 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 5 Jul 2016 11:01:46 +0200 Subject: mv --- add-ln | 8 +++ add-seg | 36 ++++++++++++ add-start-end | 10 ++++ add_ln | 8 --- add_seg | 36 ------------ add_start_end | 10 ---- avg-weights | 33 +++++++++++ avg_weights | 33 ----------- cdec-hg-to-json | 80 ++++++++++++++++++++++++++ cdec_hg_to_json | 80 -------------------------- convert-to-svm-light-format | 22 ++++++++ convert_to_svmlight_format | 22 -------- dense-features.txt | 12 ++++ dense_features.txt | 12 ---- fake-svm-light | 14 +++++ fake_svm_light | 14 ----- feature-dict | 24 ++++++++ feature_dict | 24 -------- filter-by-rule-shape | 32 +++++++++++ filter-features | 16 ++++++ filter_by_rule_shape | 32 ----------- filter_features | 16 ------ first-lower | 11 ++++ first_lower | 11 ---- gigaword-collapse-tags | 39 +++++++++++++ gigaword_collapse_tags | 39 ------------- hadoop-uniq | 11 ++++ hadoop_uniq | 11 ---- kbest-bleu-oracles | 51 +++++++++++++++++ kbest_bleu_oracles | 51 ----------------- kendalls-tau | 75 +++++++++++++++++++++++++ kendalls_tau | 75 ------------------------- key-count | 14 +++++ key_count | 14 ----- lin-reg | 70 +++++++++++++++++++++++ lin_reg | 70 ----------------------- log-reg | 71 +++++++++++++++++++++++ log_reg | 71 ----------------------- make-rule-features | 44 +++++++++++++++ make_rule_features | 44 --------------- max-len | 16 ++++++ max_len | 16 ------ mem-usage | 11 ++++ mem_usage | 11 ---- merge-files | 31 ++++++++++ merge-ttable | 34 +++++++++++ merge_files | 31 ---------- merge_ttable | 34 ----------- min-max | 40 +++++++++++++ min_max | 40 ------------- moses-1best | 14 +++++ moses_1best | 14 ----- no-empty | 18 ++++++ no-non-printables | 4 ++ no_empty | 18 ------ no_non_printables | 4 -- norm-german | 87 ++++++++++++++++++++++++++++ norm-hyphens | 4 ++ norm_german | 87 ---------------------------- norm_hyphens | 4 -- normalize-punctuation | 46 +++++++++++++++ normalize_punctuation | 46 --------------- num-tok | 9 +++ num_tok | 9 --- paste-pairs | 10 ++++ paste_pairs | 10 ---- per-sentence-bleu | 29 ++++++++++ per-sentence-bleu-kbest | 32 +++++++++++ per-sentence-ter | 33 +++++++++++ per_sentence_bleu | 29 ---------- per_sentence_bleu_kbest | 32 ----------- per_sentence_ter | 33 ----------- preprocess | 2 +- preprocess-no-lower | 9 +++ preprocess_no_lower | 9 --- pt-bloom | 24 ++++++++ pt_bloom | 24 -------- push-rules | 24 ++++++++ push_rules | 24 -------- ruby-eval | 6 ++ ruby_eval | 6 -- rule-shapes | 29 ++++++++++ rule_shapes | 29 ---------- select-from | 28 +++++++++ select_from | 28 --------- sort-features | 10 ++++ sort_features | 10 ---- source-sides | 4 ++ source_sides | 4 -- split-kbest | 24 ++++++++ split-lines | 14 +++++ split-pipes | 51 +++++++++++++++++ split_kbest | 24 -------- split_lines | 14 ----- split_pipes | 51 ----------------- stanford-parser-run | 13 +++++ stanford_parser_run | 13 ----- test/cdec-hg-to-json/cdec.ini | 5 ++ test/cdec-hg-to-json/grammar.gz | Bin 0 -> 1399915 bytes test/cdec-hg-to-json/hg.json.gz | Bin 0 -> 318029 bytes test/cdec-hg-to-json/hg.meta | 7 +++ test/cdec-hg-to-json/in | 1 + test/cdec-hg-to-json/toy.cdec.ini | 2 + test/cdec-hg-to-json/toy.grammar | 12 ++++ test/cdec-hg-to-json/toy.in | 1 + test/cdec-hg-to-json/toy.weights | 3 + test/cdec-hg-to-json/weights | 17 ++++++ test/cdec_hg_to_json/cdec.ini | 5 -- test/cdec_hg_to_json/grammar.gz | Bin 1399915 -> 0 bytes test/cdec_hg_to_json/hg.json.gz | Bin 318029 -> 0 bytes test/cdec_hg_to_json/hg.meta | 7 --- test/cdec_hg_to_json/in | 1 - test/cdec_hg_to_json/toy.cdec.ini | 2 - test/cdec_hg_to_json/toy.grammar | 12 ---- test/cdec_hg_to_json/toy.in | 1 - test/cdec_hg_to_json/toy.weights | 3 - test/cdec_hg_to_json/weights | 17 ------ test/kbest-bleu-oracles/debug.kbests | 4 ++ test/kbest-bleu-oracles/debug.refs | 1 + test/kbest-bleu-oracles/example.kbests | 100 +++++++++++++++++++++++++++++++++ test/kbest-bleu-oracles/example.refs | 10 ++++ test/kbest-bleu-oracles/example.src | 10 ++++ test/kbest_bleu_oracles/debug.kbests | 4 -- test/kbest_bleu_oracles/debug.refs | 1 - test/kbest_bleu_oracles/example.kbests | 100 --------------------------------- test/kbest_bleu_oracles/example.refs | 10 ---- test/kbest_bleu_oracles/example.src | 10 ---- test/lin-reg/exptected.txt | 3 + test/lin-reg/input.dat | 50 +++++++++++++++++ test/lin-reg/output.dat | 50 +++++++++++++++++ test/lin_reg/exptected.txt | 3 - test/lin_reg/input.dat | 50 ----------------- test/lin_reg/output.dat | 50 ----------------- test/log-reg/expected.txt | 2 + test/log-reg/input.dat | 80 ++++++++++++++++++++++++++ test/log-reg/output.dat | 80 ++++++++++++++++++++++++++ test/log_reg/expected.txt | 2 - test/log_reg/input.dat | 80 -------------------------- test/log_reg/output.dat | 80 -------------------------- to-ascii | 12 ++++ to_ascii | 12 ---- toks-per-line | 12 ++++ toks_per_line | 12 ---- train-test-split | 50 +++++++++++++++++ train_test_split | 50 ----------------- 145 files changed, 1840 insertions(+), 1840 deletions(-) create mode 100755 add-ln create mode 100755 add-seg create mode 100755 add-start-end delete mode 100755 add_ln delete mode 100755 add_seg delete mode 100755 add_start_end create mode 100755 avg-weights delete mode 100755 avg_weights create mode 100755 cdec-hg-to-json delete mode 100755 cdec_hg_to_json create mode 100755 convert-to-svm-light-format delete mode 100755 convert_to_svmlight_format create mode 100644 dense-features.txt delete mode 100644 dense_features.txt create mode 100755 fake-svm-light delete mode 100755 fake_svm_light create mode 100755 feature-dict delete mode 100755 feature_dict create mode 100755 filter-by-rule-shape create mode 100755 filter-features delete mode 100755 filter_by_rule_shape delete mode 100755 filter_features create mode 100755 first-lower delete mode 100755 first_lower create mode 100755 gigaword-collapse-tags delete mode 100755 gigaword_collapse_tags create mode 100755 hadoop-uniq delete mode 100755 hadoop_uniq create mode 100755 kbest-bleu-oracles delete mode 100755 kbest_bleu_oracles create mode 100755 kendalls-tau delete mode 100755 kendalls_tau create mode 100755 key-count delete mode 100755 key_count create mode 100755 lin-reg delete mode 100755 lin_reg create mode 100755 log-reg delete mode 100755 log_reg create mode 100755 make-rule-features delete mode 100755 make_rule_features create mode 100755 max-len delete mode 100755 max_len create mode 100755 mem-usage delete mode 100755 mem_usage create mode 100755 merge-files create mode 100755 merge-ttable delete mode 100755 merge_files delete mode 100755 merge_ttable create mode 100755 min-max delete mode 100755 min_max create mode 100755 moses-1best delete mode 100755 moses_1best create mode 100755 no-empty create mode 100755 no-non-printables delete mode 100755 no_empty delete mode 100755 no_non_printables create mode 100755 norm-german create mode 100755 norm-hyphens delete mode 100755 norm_german delete mode 100755 norm_hyphens create mode 100755 normalize-punctuation delete mode 100755 normalize_punctuation create mode 100755 num-tok delete mode 100755 num_tok create mode 100755 paste-pairs delete mode 100755 paste_pairs create mode 100755 per-sentence-bleu create mode 100755 per-sentence-bleu-kbest create mode 100755 per-sentence-ter delete mode 100755 per_sentence_bleu delete mode 100755 per_sentence_bleu_kbest delete mode 100755 per_sentence_ter create mode 100755 preprocess-no-lower delete mode 100755 preprocess_no_lower create mode 100755 pt-bloom delete mode 100755 pt_bloom create mode 100755 push-rules delete mode 100755 push_rules create mode 100755 ruby-eval delete mode 100755 ruby_eval create mode 100755 rule-shapes delete mode 100755 rule_shapes create mode 100755 select-from delete mode 100755 select_from create mode 100755 sort-features delete mode 100755 sort_features create mode 100755 source-sides delete mode 100755 source_sides create mode 100755 split-kbest create mode 100755 split-lines create mode 100755 split-pipes delete mode 100755 split_kbest delete mode 100755 split_lines delete mode 100755 split_pipes create mode 100755 stanford-parser-run delete mode 100755 stanford_parser_run create mode 100644 test/cdec-hg-to-json/cdec.ini create mode 100644 test/cdec-hg-to-json/grammar.gz create mode 100644 test/cdec-hg-to-json/hg.json.gz create mode 100644 test/cdec-hg-to-json/hg.meta create mode 100644 test/cdec-hg-to-json/in create mode 100644 test/cdec-hg-to-json/toy.cdec.ini create mode 100644 test/cdec-hg-to-json/toy.grammar create mode 100644 test/cdec-hg-to-json/toy.in create mode 100644 test/cdec-hg-to-json/toy.weights create mode 100644 test/cdec-hg-to-json/weights delete mode 100644 test/cdec_hg_to_json/cdec.ini delete mode 100644 test/cdec_hg_to_json/grammar.gz delete mode 100644 test/cdec_hg_to_json/hg.json.gz delete mode 100644 test/cdec_hg_to_json/hg.meta delete mode 100644 test/cdec_hg_to_json/in delete mode 100644 test/cdec_hg_to_json/toy.cdec.ini delete mode 100644 test/cdec_hg_to_json/toy.grammar delete mode 100644 test/cdec_hg_to_json/toy.in delete mode 100644 test/cdec_hg_to_json/toy.weights delete mode 100644 test/cdec_hg_to_json/weights create mode 100644 test/kbest-bleu-oracles/debug.kbests create mode 100644 test/kbest-bleu-oracles/debug.refs create mode 100644 test/kbest-bleu-oracles/example.kbests create mode 100644 test/kbest-bleu-oracles/example.refs create mode 100644 test/kbest-bleu-oracles/example.src delete mode 100644 test/kbest_bleu_oracles/debug.kbests delete mode 100644 test/kbest_bleu_oracles/debug.refs delete mode 100644 test/kbest_bleu_oracles/example.kbests delete mode 100644 test/kbest_bleu_oracles/example.refs delete mode 100644 test/kbest_bleu_oracles/example.src create mode 100644 test/lin-reg/exptected.txt create mode 100644 test/lin-reg/input.dat create mode 100644 test/lin-reg/output.dat delete mode 100644 test/lin_reg/exptected.txt delete mode 100644 test/lin_reg/input.dat delete mode 100644 test/lin_reg/output.dat create mode 100644 test/log-reg/expected.txt create mode 100644 test/log-reg/input.dat create mode 100644 test/log-reg/output.dat delete mode 100644 test/log_reg/expected.txt delete mode 100644 test/log_reg/input.dat delete mode 100644 test/log_reg/output.dat create mode 100755 to-ascii delete mode 100755 to_ascii create mode 100755 toks-per-line delete mode 100755 toks_per_line create mode 100755 train-test-split delete mode 100755 train_test_split diff --git a/add-ln b/add-ln new file mode 100755 index 0000000..35bc44d --- /dev/null +++ b/add-ln @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby + +i = 0 +while line = STDIN.gets + puts "#{i}\t#{line}" + i += 1 +end + diff --git a/add-seg b/add-seg new file mode 100755 index 0000000..e5db580 --- /dev/null +++ b/add-seg @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby + +require 'trollop' +require 'zipf' + +o = Trollop::options do + opt :grammar, "(Absolute) path of folder containing grammars.", :type => :string, :short => '-g', :default => nil + opt :loo, "leave one out", :type => :bool, :default => false + opt :start_id, "start with this id", :type => :int, :default => 0, :short => '-i' + opt :nogz, "grammar files not gzipped", :type => :bool, :default => false + opt :index, "number according to index", :type => :string, :default => nil +end + +index = [] +if o[:index] + index = ReadFile.readlines_strip(o[:index]).map{ |i| i.to_i } +end + +i = o[:start_id] +j = 0 +while line = STDIN.gets + ext = '.gz' + ext = '' if o[:nogz] + s = " 0 + if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{index[j]}#{ext}\"" end + puts s + " id=\"#{index[j]}\"> #{line.strip} " + else + if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{i}#{ext}\"" end + puts s + " id=\"#{i}\"> #{line.strip} " + end + i += 1 + j += 1 +end + diff --git a/add-start-end b/add-start-end new file mode 100755 index 0000000..30deaec --- /dev/null +++ b/add-start-end @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + + +while line = STDIN.gets + puts " #{line.strip} " +end + diff --git a/add_ln b/add_ln deleted file mode 100755 index 35bc44d..0000000 --- a/add_ln +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby - -i = 0 -while line = STDIN.gets - puts "#{i}\t#{line}" - i += 1 -end - diff --git a/add_seg b/add_seg deleted file mode 100755 index e5db580..0000000 --- a/add_seg +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env ruby - -require 'trollop' -require 'zipf' - -o = Trollop::options do - opt :grammar, "(Absolute) path of folder containing grammars.", :type => :string, :short => '-g', :default => nil - opt :loo, "leave one out", :type => :bool, :default => false - opt :start_id, "start with this id", :type => :int, :default => 0, :short => '-i' - opt :nogz, "grammar files not gzipped", :type => :bool, :default => false - opt :index, "number according to index", :type => :string, :default => nil -end - -index = [] -if o[:index] - index = ReadFile.readlines_strip(o[:index]).map{ |i| i.to_i } -end - -i = o[:start_id] -j = 0 -while line = STDIN.gets - ext = '.gz' - ext = '' if o[:nogz] - s = " 0 - if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{index[j]}#{ext}\"" end - puts s + " id=\"#{index[j]}\"> #{line.strip} " - else - if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{i}#{ext}\"" end - puts s + " id=\"#{i}\"> #{line.strip} " - end - i += 1 - j += 1 -end - diff --git a/add_start_end b/add_start_end deleted file mode 100755 index 30deaec..0000000 --- a/add_start_end +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - - -while line = STDIN.gets - puts " #{line.strip} " -end - diff --git a/avg-weights b/avg-weights new file mode 100755 index 0000000..36b051a --- /dev/null +++ b/avg-weights @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' +require 'zlib' + +conf = Trollop::options do + opt :weights_files, "a number of weights files: name value", :required => true + opt :filter, "Filter if key does not appear in every file.", :type => :bool, :default => false +end + +h = {} +ARGV.each { |fn| + f = ReadFile.new fn + while line = f.gets + k, v = line.split + v = v.to_f + if h.has_key? k + h[k] << v + else + h[k] = [v] + end + end + f.close +} + +n = ARGV.size.to_f + +h.each_pair { |k,w| + next if conf[:filter] and w.size < n + puts "#{k} #{w.inject(:+)/n}" +} + diff --git a/avg_weights b/avg_weights deleted file mode 100755 index 36b051a..0000000 --- a/avg_weights +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' -require 'zlib' - -conf = Trollop::options do - opt :weights_files, "a number of weights files: name value", :required => true - opt :filter, "Filter if key does not appear in every file.", :type => :bool, :default => false -end - -h = {} -ARGV.each { |fn| - f = ReadFile.new fn - while line = f.gets - k, v = line.split - v = v.to_f - if h.has_key? k - h[k] << v - else - h[k] = [v] - end - end - f.close -} - -n = ARGV.size.to_f - -h.each_pair { |k,w| - next if conf[:filter] and w.size < n - puts "#{k} #{w.inject(:+)/n}" -} - diff --git a/cdec-hg-to-json b/cdec-hg-to-json new file mode 100755 index 0000000..5a26cf7 --- /dev/null +++ b/cdec-hg-to-json @@ -0,0 +1,80 @@ +#!/usr/bin/env python2 + +import cdec +import sys, argparse + +def hg2json(hg, weights): + """ + output a JSON representation of a cdec hypegraph + (see http://aclweb.org/aclwiki/index.php?title=Hypergraph_Format ) + """ + res = '' + res += "{\n" + res += '"weights":{'+"\n" + a = [] + for i in weights: + a.append( '"%s":%s'%(i[0], i[1]) ) + res += ", ".join(a)+"\n" + res += "},\n" + res += '"nodes":'+"\n" + res += "[\n" + a = [] + a.append( '{ "label":"root", "cat":"root" }' ) + for i in hg.nodes: + a.append( '{ "label":"%s", "cat":"%s" }'%(i.id, i.cat) ) + res += ",\n".join(a)+"\n" + res += "],\n" + res += '"edges":'+"\n" + res += "[\n" + a = [] + for i in hg.edges: + s = "{" + s += '"head":"%s"'%(i.head_node.id) + xs = ' "f":{' + b = [] + for j in i.feature_values: + b.append( '"%s":%s'%(j[0], j[1]) ) + xs += ", ".join(b) + xs += "}," + c = [] + for j in i.tail_nodes: + c.append( '"'+str(j.id)+'"' ) + if len(c) > 0: + s += ', "tails":[ %s ],'%(",".join(c)) + else: + s += ', "tails":[ "root" ],' + s += xs + s += ' "weight":%s }'%(i.prob) + a.append(s) + res += ",\n".join(a)+"\n" + res += "]\n" + res += "}\n" + return res + +def main(): + parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs') + parser.add_argument('-c', '--config', required=True, help='decoder configuration') + parser.add_argument('-w', '--weights', required=True, help='feature weights') + args = parser.parse_args() + with open(args.config) as config: + config = config.read() + decoder = cdec.Decoder(config) + decoder.read_weights(args.weights) + ins = sys.stdin.readline().strip() + hg = decoder.translate(ins) + + sys.stderr.write( "input:\n '%s'\n"%(ins) ) + sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) ) + num_nodes = 0 + for i in hg.nodes: num_nodes+=1 + sys.stderr.write( "# nodes = %s\n"%(num_nodes) ) + num_edges = 0 + for i in hg.edges: num_edges+=1 + sys.stderr.write( "# edges = %s\n"%(num_edges) ) + sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) ) + + print hg2json(hg, decoder.weights) + +if __name__=="__main__": + main() + diff --git a/cdec_hg_to_json b/cdec_hg_to_json deleted file mode 100755 index 5a26cf7..0000000 --- a/cdec_hg_to_json +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python2 - -import cdec -import sys, argparse - -def hg2json(hg, weights): - """ - output a JSON representation of a cdec hypegraph - (see http://aclweb.org/aclwiki/index.php?title=Hypergraph_Format ) - """ - res = '' - res += "{\n" - res += '"weights":{'+"\n" - a = [] - for i in weights: - a.append( '"%s":%s'%(i[0], i[1]) ) - res += ", ".join(a)+"\n" - res += "},\n" - res += '"nodes":'+"\n" - res += "[\n" - a = [] - a.append( '{ "label":"root", "cat":"root" }' ) - for i in hg.nodes: - a.append( '{ "label":"%s", "cat":"%s" }'%(i.id, i.cat) ) - res += ",\n".join(a)+"\n" - res += "],\n" - res += '"edges":'+"\n" - res += "[\n" - a = [] - for i in hg.edges: - s = "{" - s += '"head":"%s"'%(i.head_node.id) - xs = ' "f":{' - b = [] - for j in i.feature_values: - b.append( '"%s":%s'%(j[0], j[1]) ) - xs += ", ".join(b) - xs += "}," - c = [] - for j in i.tail_nodes: - c.append( '"'+str(j.id)+'"' ) - if len(c) > 0: - s += ', "tails":[ %s ],'%(",".join(c)) - else: - s += ', "tails":[ "root" ],' - s += xs - s += ' "weight":%s }'%(i.prob) - a.append(s) - res += ",\n".join(a)+"\n" - res += "]\n" - res += "}\n" - return res - -def main(): - parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs') - parser.add_argument('-c', '--config', required=True, help='decoder configuration') - parser.add_argument('-w', '--weights', required=True, help='feature weights') - args = parser.parse_args() - with open(args.config) as config: - config = config.read() - decoder = cdec.Decoder(config) - decoder.read_weights(args.weights) - ins = sys.stdin.readline().strip() - hg = decoder.translate(ins) - - sys.stderr.write( "input:\n '%s'\n"%(ins) ) - sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) ) - num_nodes = 0 - for i in hg.nodes: num_nodes+=1 - sys.stderr.write( "# nodes = %s\n"%(num_nodes) ) - num_edges = 0 - for i in hg.edges: num_edges+=1 - sys.stderr.write( "# edges = %s\n"%(num_edges) ) - sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) ) - - print hg2json(hg, decoder.weights) - -if __name__=="__main__": - main() - diff --git a/convert-to-svm-light-format b/convert-to-svm-light-format new file mode 100755 index 0000000..a9ce98f --- /dev/null +++ b/convert-to-svm-light-format @@ -0,0 +1,22 @@ +#!/usr/bin/env ruby + +require 'zipf' + +fd = Marshal.load ReadFile.read ARGV[0] +d = fd.size + +not_quiet = ARGV[1] +train = [] +l_i = 1 +while line = STDIN.gets + STDERR.write "#{l_i}\n" if l_i%1000==0&¬_quiet + s = [] + line.split.each { |i| + k,w = i.split '=', 2 + s << [fd[k]+1, w.to_f] + } + s.sort_by! { |i| i.first } + puts "+1 #{s.map{|i| "#{i.first}:#{i[1]}" }.join(' ')}" + l_i+= 1 +end + diff --git a/convert_to_svmlight_format b/convert_to_svmlight_format deleted file mode 100755 index a9ce98f..0000000 --- a/convert_to_svmlight_format +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -fd = Marshal.load ReadFile.read ARGV[0] -d = fd.size - -not_quiet = ARGV[1] -train = [] -l_i = 1 -while line = STDIN.gets - STDERR.write "#{l_i}\n" if l_i%1000==0&¬_quiet - s = [] - line.split.each { |i| - k,w = i.split '=', 2 - s << [fd[k]+1, w.to_f] - } - s.sort_by! { |i| i.first } - puts "+1 #{s.map{|i| "#{i.first}:#{i[1]}" }.join(' ')}" - l_i+= 1 -end - diff --git a/dense-features.txt b/dense-features.txt new file mode 100644 index 0000000..daae8d1 --- /dev/null +++ b/dense-features.txt @@ -0,0 +1,12 @@ +CountEF +EgivenFCoherent +Glue +IsSingletonF +IsSingletonFE +LanguageModel +LanguageModel_OOV +MaxLexFgivenE +MaxLexEgivenF +PassThrough +SampleCountF +WordPenalty diff --git a/dense_features.txt b/dense_features.txt deleted file mode 100644 index daae8d1..0000000 --- a/dense_features.txt +++ /dev/null @@ -1,12 +0,0 @@ -CountEF -EgivenFCoherent -Glue -IsSingletonF -IsSingletonFE -LanguageModel -LanguageModel_OOV -MaxLexFgivenE -MaxLexEgivenF -PassThrough -SampleCountF -WordPenalty diff --git a/fake-svm-light b/fake-svm-light new file mode 100755 index 0000000..eb074c1 --- /dev/null +++ b/fake-svm-light @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +while line = STDIN.gets + a = line.split + label = a.shift.to_f + label *= -1 + a.map! { |i| + k,v = i.split ":" + v = v.to_f*-1 + "#{k}:#{v}" + } + puts "#{label} #{a.join ' '}" +end + diff --git a/fake_svm_light b/fake_svm_light deleted file mode 100755 index eb074c1..0000000 --- a/fake_svm_light +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -while line = STDIN.gets - a = line.split - label = a.shift.to_f - label *= -1 - a.map! { |i| - k,v = i.split ":" - v = v.to_f*-1 - "#{k}:#{v}" - } - puts "#{label} #{a.join ' '}" -end - diff --git a/feature-dict b/feature-dict new file mode 100755 index 0000000..6849769 --- /dev/null +++ b/feature-dict @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby + +not_quiet = ARGV[1] +n = 0 +feature_dict = {} +l_i = 1 +while line = STDIN.gets + STDERR.write "#{l_i}\n" if l_i%1000==0&¬_quiet + line.split.each { |i| + f, v = i.split('=', 2) + if !feature_dict.has_key? f + feature_dict[f] = n + n += 1 + end + } + l_i += 1 +end + +f = File.new ARGV[0], 'w' +f.write Marshal.dump feature_dict +f.close + +STDERR.write "size = #{feature_dict.size}\n" + diff --git a/feature_dict b/feature_dict deleted file mode 100755 index 6849769..0000000 --- a/feature_dict +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env ruby - -not_quiet = ARGV[1] -n = 0 -feature_dict = {} -l_i = 1 -while line = STDIN.gets - STDERR.write "#{l_i}\n" if l_i%1000==0&¬_quiet - line.split.each { |i| - f, v = i.split('=', 2) - if !feature_dict.has_key? f - feature_dict[f] = n - n += 1 - end - } - l_i += 1 -end - -f = File.new ARGV[0], 'w' -f.write Marshal.dump feature_dict -f.close - -STDERR.write "size = #{feature_dict.size}\n" - diff --git a/filter-by-rule-shape b/filter-by-rule-shape new file mode 100755 index 0000000..695edec --- /dev/null +++ b/filter-by-rule-shape @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +def shape s + res = [] + in_t = false + s.split.each { |i| + if i.match /\A\[X,\d\]\z/ + if in_t + in_t = false + end + res << "NT" + next + else + res << "T" if not in_t + in_t = true + end + } + return res +end + +while line = STDIN.gets + line.strip! + parts = line.split ' ||| ' + f_shape = shape(parts[1]) + e_shape = shape(parts[2]) + next if f_shape[0]=='NT'||f_shape[-1]=='NT'||e_shape[0]=='NT'||e_shape[-1]=='NT' + puts line +end + diff --git a/filter-features b/filter-features new file mode 100755 index 0000000..fc21f6c --- /dev/null +++ b/filter-features @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby + +require 'zipf' + +dense_features = ReadFile.readlines_strip "#{File.dirname(__FILE__)}/dense_features.txt" + +sep = " " + +while line = STDIN.gets + a = line.strip.split + a.reject! { |i| + !dense_features.include?(i.split('=').first) + } + puts a.join sep +end + diff --git a/filter_by_rule_shape b/filter_by_rule_shape deleted file mode 100755 index 695edec..0000000 --- a/filter_by_rule_shape +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env ruby - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -def shape s - res = [] - in_t = false - s.split.each { |i| - if i.match /\A\[X,\d\]\z/ - if in_t - in_t = false - end - res << "NT" - next - else - res << "T" if not in_t - in_t = true - end - } - return res -end - -while line = STDIN.gets - line.strip! - parts = line.split ' ||| ' - f_shape = shape(parts[1]) - e_shape = shape(parts[2]) - next if f_shape[0]=='NT'||f_shape[-1]=='NT'||e_shape[0]=='NT'||e_shape[-1]=='NT' - puts line -end - diff --git a/filter_features b/filter_features deleted file mode 100755 index fc21f6c..0000000 --- a/filter_features +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -dense_features = ReadFile.readlines_strip "#{File.dirname(__FILE__)}/dense_features.txt" - -sep = " " - -while line = STDIN.gets - a = line.strip.split - a.reject! { |i| - !dense_features.include?(i.split('=').first) - } - puts a.join sep -end - diff --git a/first-lower b/first-lower new file mode 100755 index 0000000..1cddb8e --- /dev/null +++ b/first-lower @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby + +require 'zipf' + +while line = STDIN.gets + line.strip! + if line && line!='' && line[0].downcase? + puts line + end +end + diff --git a/first_lower b/first_lower deleted file mode 100755 index 1cddb8e..0000000 --- a/first_lower +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -while line = STDIN.gets - line.strip! - if line && line!='' && line[0].downcase? - puts line - end -end - diff --git a/gigaword-collapse-tags b/gigaword-collapse-tags new file mode 100755 index 0000000..cbaf7d7 --- /dev/null +++ b/gigaword-collapse-tags @@ -0,0 +1,39 @@ +#!/usr/bin/env ruby + +# works with gigaword en v5 + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +in_p = false +in_dateline = false +collect = [] + +while line = STDIN.gets + line.strip! + if line.downcase == "" + in_dateline = true + next + elsif line.downcase == "" + in_dateline = false + next + elsif in_dateline + next + elsif line.downcase == "

" and not in_p + in_p = true + collect = [] + next + elsif line.downcase == "

" and in_p + if collect.size > 0 + puts collect.join(" ").strip + end + in_p = false + next + elsif in_p + collect.push line + next + else + puts line + end +end + diff --git a/gigaword_collapse_tags b/gigaword_collapse_tags deleted file mode 100755 index cbaf7d7..0000000 --- a/gigaword_collapse_tags +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env ruby - -# works with gigaword en v5 - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -in_p = false -in_dateline = false -collect = [] - -while line = STDIN.gets - line.strip! - if line.downcase == "" - in_dateline = true - next - elsif line.downcase == "" - in_dateline = false - next - elsif in_dateline - next - elsif line.downcase == "

" and not in_p - in_p = true - collect = [] - next - elsif line.downcase == "

" and in_p - if collect.size > 0 - puts collect.join(" ").strip - end - in_p = false - next - elsif in_p - collect.push line - next - else - puts line - end -end - diff --git a/hadoop-uniq b/hadoop-uniq new file mode 100755 index 0000000..5052419 --- /dev/null +++ b/hadoop-uniq @@ -0,0 +1,11 @@ +#!/bin/zsh + +HADOOP_HOME=/usr/lib/hadoop + +$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \ + -D mapred.reduce.tasks=98 \ + -input d \ + -output d.uniq \ + -mapper 'cut -d " " -f 1' \ + -reducer /usr/bin/uniq + diff --git a/hadoop_uniq b/hadoop_uniq deleted file mode 100755 index 5052419..0000000 --- a/hadoop_uniq +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/zsh - -HADOOP_HOME=/usr/lib/hadoop - -$HADOOP_HOME/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \ - -D mapred.reduce.tasks=98 \ - -input d \ - -output d.uniq \ - -mapper 'cut -d " " -f 1' \ - -reducer /usr/bin/uniq - diff --git a/kbest-bleu-oracles b/kbest-bleu-oracles new file mode 100755 index 0000000..a36c345 --- /dev/null +++ b/kbest-bleu-oracles @@ -0,0 +1,51 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def get_context kbest_lists, references, n + a = [] + kbest_lists.each_index { |i| + a << BLEU::get_counts(kbest_lists[i][0].s, references[i], n, 1) + } + return a +end + +def main + conf = Trollop::options do + opt :kbest_lists, "kbest lists", :type => :string, :required => true + opt :references, "reference", :type => :string, :required => true + opt :n, "N for BLEU", :type => :int, :default => 4 + opt :weight, "how much to weigh single translations", :type => :int, :default => 1 + opt :debug, "debug mode", :type => :bool, :default => false + end + debug = conf[:debug] + n = conf[:n] + kbest_lists = read_kbest_lists conf[:kbest_lists] + references = ReadFile.readlines_strip conf[:references] + context = get_context kbest_lists, references, n + kbest_lists.each_with_index { |kbest,j| + scores = [] + max_score = -1.0/0 + max_idx = -1 + kbest.each_index { |i| + context_cp = context.dup + context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight] + score = BLEU::hbleu_(context_cp, n, debug) + scores << score + if score > max_score + max_score = score + max_idx = i + end + STDERR.write "#{i} #{kbest[i]}\t#{score}\n---\n" if debug + } + puts "#{references[j]}" + puts "BLEU=#{scores[0]} ||| #{kbest[0]}" + puts "BLEU=#{max_score} ||| #{kbest[max_idx]}" + puts + STDERR.write "\n" if debug + } +end + +main + diff --git a/kbest_bleu_oracles b/kbest_bleu_oracles deleted file mode 100755 index a36c345..0000000 --- a/kbest_bleu_oracles +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def get_context kbest_lists, references, n - a = [] - kbest_lists.each_index { |i| - a << BLEU::get_counts(kbest_lists[i][0].s, references[i], n, 1) - } - return a -end - -def main - conf = Trollop::options do - opt :kbest_lists, "kbest lists", :type => :string, :required => true - opt :references, "reference", :type => :string, :required => true - opt :n, "N for BLEU", :type => :int, :default => 4 - opt :weight, "how much to weigh single translations", :type => :int, :default => 1 - opt :debug, "debug mode", :type => :bool, :default => false - end - debug = conf[:debug] - n = conf[:n] - kbest_lists = read_kbest_lists conf[:kbest_lists] - references = ReadFile.readlines_strip conf[:references] - context = get_context kbest_lists, references, n - kbest_lists.each_with_index { |kbest,j| - scores = [] - max_score = -1.0/0 - max_idx = -1 - kbest.each_index { |i| - context_cp = context.dup - context_cp[j] = BLEU::get_counts kbest[i].s, references[j], n, conf[:weight] - score = BLEU::hbleu_(context_cp, n, debug) - scores << score - if score > max_score - max_score = score - max_idx = i - end - STDERR.write "#{i} #{kbest[i]}\t#{score}\n---\n" if debug - } - puts "#{references[j]}" - puts "BLEU=#{scores[0]} ||| #{kbest[0]}" - puts "BLEU=#{max_score} ||| #{kbest[max_idx]}" - puts - STDERR.write "\n" if debug - } -end - -main - diff --git a/kendalls-tau b/kendalls-tau new file mode 100755 index 0000000..c0c20be --- /dev/null +++ b/kendalls-tau @@ -0,0 +1,75 @@ +#!/usr/bin/env ruby + +################################################# +# reads space delimted pairs of scores as input, +# outputs Kendall's τ +################################################# + +def kendall_with_ties l + concordant = 0 + disconcordant = 0 + tie_a = 0 + tie_b = 0 + l.each_with_index { |k,i| + l[i+1,l.size].each_with_index { |m,j| + if (k.first < m.first && k[1] < m[1]) || + (k.first > m.first && k[1] > m[1]) + concordant += 1 + elsif (k.first == m.first && k[1] != m[1]) + tie_a += 1 + elsif (k.first != m.first && k[1] == m[1]) + tie_b += 1 + else + disconcordant += 1 + end + } + } + + return (concordant-disconcordant)/(Math.sqrt((concordant+disconcordant+tie_a)*(concordant+disconcordant+tie_b))) +end + +def kendall l + concordant = 0 + disconcordant = 0 + l.each_with_index { |k,i| + l[i+1,l.size].each_with_index { |m,j| + if (k.first <= m.first && k[1] <= m[1]) || + (k.first >= m.first && k[1] >= m[1]) + concordant += 1 + else + disconcordant += 1 + end + } + } + + return (concordant-disconcordant)/(0.5 * l.size * (l.size-1)) +end + +def has_ties? l + if l.map{ |p| p[1] }.uniq.size != l.size || + l.map{ |p| p[2] }.uniq.size != l.size + return true + end + + return false +end + +def main + l = [] + while line = STDIN.gets + a,b = line.split + l << [a.to_f, b.to_f] + end + + v = -1 + if has_ties? l + v = kendall_with_ties l + else + v = kendall l + end + + puts v +end + +main + diff --git a/kendalls_tau b/kendalls_tau deleted file mode 100755 index c0c20be..0000000 --- a/kendalls_tau +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env ruby - -################################################# -# reads space delimted pairs of scores as input, -# outputs Kendall's τ -################################################# - -def kendall_with_ties l - concordant = 0 - disconcordant = 0 - tie_a = 0 - tie_b = 0 - l.each_with_index { |k,i| - l[i+1,l.size].each_with_index { |m,j| - if (k.first < m.first && k[1] < m[1]) || - (k.first > m.first && k[1] > m[1]) - concordant += 1 - elsif (k.first == m.first && k[1] != m[1]) - tie_a += 1 - elsif (k.first != m.first && k[1] == m[1]) - tie_b += 1 - else - disconcordant += 1 - end - } - } - - return (concordant-disconcordant)/(Math.sqrt((concordant+disconcordant+tie_a)*(concordant+disconcordant+tie_b))) -end - -def kendall l - concordant = 0 - disconcordant = 0 - l.each_with_index { |k,i| - l[i+1,l.size].each_with_index { |m,j| - if (k.first <= m.first && k[1] <= m[1]) || - (k.first >= m.first && k[1] >= m[1]) - concordant += 1 - else - disconcordant += 1 - end - } - } - - return (concordant-disconcordant)/(0.5 * l.size * (l.size-1)) -end - -def has_ties? l - if l.map{ |p| p[1] }.uniq.size != l.size || - l.map{ |p| p[2] }.uniq.size != l.size - return true - end - - return false -end - -def main - l = [] - while line = STDIN.gets - a,b = line.split - l << [a.to_f, b.to_f] - end - - v = -1 - if has_ties? l - v = kendall_with_ties l - else - v = kendall l - end - - puts v -end - -main - diff --git a/key-count b/key-count new file mode 100755 index 0000000..deaa522 --- /dev/null +++ b/key-count @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +h = {} +h.default = 0 +while line = STDIN.gets + line.strip! + h[line] += 1 +end + +h.each_pair { |k,v| puts "#{k} #{v}" } + diff --git a/key_count b/key_count deleted file mode 100755 index deaa522..0000000 --- a/key_count +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -h = {} -h.default = 0 -while line = STDIN.gets - line.strip! - h[line] += 1 -end - -h.each_pair { |k,v| puts "#{k} #{v}" } - diff --git a/lin-reg b/lin-reg new file mode 100755 index 0000000..7a8e614 --- /dev/null +++ b/lin-reg @@ -0,0 +1,70 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def read_data fn, scale + f = ReadFile.new fn + data = [] + while line = f.gets + line.strip! + a = [] + a << 1.0 + tokenize(line).each { |i| a << i.to_f } + v = SparseVector.from_a a + data << v + end + if scale + data.map { |i| i.keys }.flatten.uniq.each { |k| + max = data.map { |i| i[k] }.max + data.each { |i| i[k] /= max } + } + end + return data +end + +def main + conf = Trollop::options do + opt :input, "input data", :type => :string, :required => true + opt :output, "output data", :type => :string, :required => true + opt :learning_rate, "learning rate", :type => :float, :default => 0.07 + opt :stop, "stopping criterion", :type => :int, :default => 100 + opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t' + opt :show_loss, "show loss per iter", :type => :bool, :default => false + end + data = read_data conf[:input], conf[:scale_features] + zeros = [0.0]*data[0].size + t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } + model = SparseVector.new zeros + stop = 0 + prev_model = nil + i = 0 + while true + i += 1 + u = SparseVector.new zeros + overall_loss = 0.0 + data.each_with_index { |x,j| + loss = model.dot(x) - t[j] + overall_loss += loss**2 + u += x * loss + } + STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss] + u *= conf[:learning_rate]*(1.0/t.size) + model -= u + if model.approx_eql? prev_model + stop += 1 + else + stop = 0 + end + break if stop==conf[:stop] + prev_model = model + end + tss = t.map{ |y| (y-t.mean)**2 }.sum + j = -1 + rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum + STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n" + puts model.to_s +end + +main + diff --git a/lin_reg b/lin_reg deleted file mode 100755 index 7a8e614..0000000 --- a/lin_reg +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def read_data fn, scale - f = ReadFile.new fn - data = [] - while line = f.gets - line.strip! - a = [] - a << 1.0 - tokenize(line).each { |i| a << i.to_f } - v = SparseVector.from_a a - data << v - end - if scale - data.map { |i| i.keys }.flatten.uniq.each { |k| - max = data.map { |i| i[k] }.max - data.each { |i| i[k] /= max } - } - end - return data -end - -def main - conf = Trollop::options do - opt :input, "input data", :type => :string, :required => true - opt :output, "output data", :type => :string, :required => true - opt :learning_rate, "learning rate", :type => :float, :default => 0.07 - opt :stop, "stopping criterion", :type => :int, :default => 100 - opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t' - opt :show_loss, "show loss per iter", :type => :bool, :default => false - end - data = read_data conf[:input], conf[:scale_features] - zeros = [0.0]*data[0].size - t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } - model = SparseVector.new zeros - stop = 0 - prev_model = nil - i = 0 - while true - i += 1 - u = SparseVector.new zeros - overall_loss = 0.0 - data.each_with_index { |x,j| - loss = model.dot(x) - t[j] - overall_loss += loss**2 - u += x * loss - } - STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss] - u *= conf[:learning_rate]*(1.0/t.size) - model -= u - if model.approx_eql? prev_model - stop += 1 - else - stop = 0 - end - break if stop==conf[:stop] - prev_model = model - end - tss = t.map{ |y| (y-t.mean)**2 }.sum - j = -1 - rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum - STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n" - puts model.to_s -end - -main - diff --git a/log-reg b/log-reg new file mode 100755 index 0000000..82dc353 --- /dev/null +++ b/log-reg @@ -0,0 +1,71 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'matrix' +require 'trollop' + +def read_data fn + f = ReadFile.new fn + data = [] + while line = f.gets + line.strip! + a = [] + a << 1.0 + tokenize(line).each { |i| a << i.to_f } + v = Vector.elements a + data << v + end + return data +end + +def dot x, y + r = 0.0 + x.each_with_index { |_,j| + r += x[j] * y[j] + } + return r +end + +def approx_eql x, y, eps=10**-10 + return false if !x||!y + return false if x.size!=y.size + x.each_with_index { |_,i| + return false if (x[i]-y[i]).abs>eps + } + return true +end + +def main + conf = Trollop::options do + opt :input, "input data", :type => :string, :required => true + opt :output, "1/0 output data", :type => :string, :required => true + end + data = read_data conf[:input] + dim = data[0].size + zeros = [0.0]*dim + t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } + model = Vector.elements zeros + prev_model = nil + gradient = Vector.elements zeros + hessian = Matrix.build(dim,dim) { |i,j| 0.0 } + i = 0 + while true + i += 1 + data.each_with_index { |x,j| + m = 1.0/(1+Math.exp(-dot(model, x))) + gradient += (m-t[j]) * x + hup = Matrix.column_vector(x) * Matrix.row_vector(x) + hessian += m*(1.0-m) * hup + } + gradient /= data.size + hessian /= data.size + model -= hessian.inverse * gradient + break if approx_eql model, prev_model + prev_model = model + end + STDERR.write "ran for #{i} iterations\n" + puts model.to_s +end + +main + diff --git a/log_reg b/log_reg deleted file mode 100755 index 82dc353..0000000 --- a/log_reg +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'matrix' -require 'trollop' - -def read_data fn - f = ReadFile.new fn - data = [] - while line = f.gets - line.strip! - a = [] - a << 1.0 - tokenize(line).each { |i| a << i.to_f } - v = Vector.elements a - data << v - end - return data -end - -def dot x, y - r = 0.0 - x.each_with_index { |_,j| - r += x[j] * y[j] - } - return r -end - -def approx_eql x, y, eps=10**-10 - return false if !x||!y - return false if x.size!=y.size - x.each_with_index { |_,i| - return false if (x[i]-y[i]).abs>eps - } - return true -end - -def main - conf = Trollop::options do - opt :input, "input data", :type => :string, :required => true - opt :output, "1/0 output data", :type => :string, :required => true - end - data = read_data conf[:input] - dim = data[0].size - zeros = [0.0]*dim - t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } - model = Vector.elements zeros - prev_model = nil - gradient = Vector.elements zeros - hessian = Matrix.build(dim,dim) { |i,j| 0.0 } - i = 0 - while true - i += 1 - data.each_with_index { |x,j| - m = 1.0/(1+Math.exp(-dot(model, x))) - gradient += (m-t[j]) * x - hup = Matrix.column_vector(x) * Matrix.row_vector(x) - hessian += m*(1.0-m) * hup - } - gradient /= data.size - hessian /= data.size - model -= hessian.inverse * gradient - break if approx_eql model, prev_model - prev_model = model - end - STDERR.write "ran for #{i} iterations\n" - puts model.to_s -end - -main - diff --git a/make-rule-features b/make-rule-features new file mode 100755 index 0000000..7adb6e9 --- /dev/null +++ b/make-rule-features @@ -0,0 +1,44 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def mkrf src, tgt + s = src.gsub /\[X,[1-9]\]/, "NX" + t = tgt.gsub /\[X,([1-9])\]/,'N\1' + return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}" +end + +def mkrbf s, t + s = String.new s + if t == "S" + s.gsub! /\[X,[1-9]\]/, "X" + else + s.gsub! /\[X,([1-9])\]/, 'X\1' + end + s.reverse! + s += " >r<" + s.reverse! + s += " " + a = [] + ngrams(s, 2, true) { |ng| + a << "RB#{t}:#{ng.join "_"}" + } + return a +end + +h = {} +while line = STDIN.gets + _,src,tgt,_,_ = splitpipe line.strip + src.strip! + tgt.strip! + mkrbf(src, "S").each { |f| + h[f] = true + } + mkrbf(tgt, "T").each { |f| + h[f] = true + } + h [mkrf(src, tgt)] = true +end + +h.keys.each { |f| puts f } + diff --git a/make_rule_features b/make_rule_features deleted file mode 100755 index 7adb6e9..0000000 --- a/make_rule_features +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -def mkrf src, tgt - s = src.gsub /\[X,[1-9]\]/, "NX" - t = tgt.gsub /\[X,([1-9])\]/,'N\1' - return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}" -end - -def mkrbf s, t - s = String.new s - if t == "S" - s.gsub! /\[X,[1-9]\]/, "X" - else - s.gsub! /\[X,([1-9])\]/, 'X\1' - end - s.reverse! - s += " >r<" - s.reverse! - s += " " - a = [] - ngrams(s, 2, true) { |ng| - a << "RB#{t}:#{ng.join "_"}" - } - return a -end - -h = {} -while line = STDIN.gets - _,src,tgt,_,_ = splitpipe line.strip - src.strip! - tgt.strip! - mkrbf(src, "S").each { |f| - h[f] = true - } - mkrbf(tgt, "T").each { |f| - h[f] = true - } - h [mkrf(src, tgt)] = true -end - -h.keys.each { |f| puts f } - diff --git a/max-len b/max-len new file mode 100755 index 0000000..69013b5 --- /dev/null +++ b/max-len @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby + +require 'zipf' + +max = ARGV[0].to_i + +i = 0 +while line = STDIN.gets + if tokenize(line).size <= max + puts i + else + STDERR.write line + end + i += 1 +end + diff --git a/max_len b/max_len deleted file mode 100755 index 69013b5..0000000 --- a/max_len +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -max = ARGV[0].to_i - -i = 0 -while line = STDIN.gets - if tokenize(line).size <= max - puts i - else - STDERR.write line - end - i += 1 -end - diff --git a/mem-usage b/mem-usage new file mode 100755 index 0000000..5c2104f --- /dev/null +++ b/mem-usage @@ -0,0 +1,11 @@ +#!/bin/bash + +"$@" & +pid=$! peak=0 +while true; do + sleep 1 + sample="$(ps -o rss= $pid 2> /dev/null)" || break + let peak='sample > peak ? sample : peak' +done +echo "$(( ${peak%% *} / 1024)) m" + diff --git a/mem_usage b/mem_usage deleted file mode 100755 index 5c2104f..0000000 --- a/mem_usage +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -"$@" & -pid=$! peak=0 -while true; do - sleep 1 - sample="$(ps -o rss= $pid 2> /dev/null)" || break - let peak='sample > peak ? sample : peak' -done -echo "$(( ${peak%% *} / 1024)) m" - diff --git a/merge-files b/merge-files new file mode 100755 index 0000000..714b57d --- /dev/null +++ b/merge-files @@ -0,0 +1,31 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def usage + STDERR.write "merge_files +\n" + exit 1 +end +usage if ARGV.size==0 + +files = ARGV +hashes = [] + +files.each { |i| + hashes.push Hash.new + hashes.last.default = 0 + f = ReadFile.new i + while line = f.gets + hashes.last[line.strip] += 1 + end + f.close +} + +hashes.each { |h| + h.each { |k,v| + counts = [] + hashes.each { |j| counts.push j[k]; j.delete k } + counts.max.times { puts k } + } +} + diff --git a/merge-ttable b/merge-ttable new file mode 100755 index 0000000..ac10903 --- /dev/null +++ b/merge-ttable @@ -0,0 +1,34 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def main + conf = Trollop::options do + opt :f, "f files", :type => :string, :required => true + opt :e, "e files", :type => :string, :required => true + end + + f_files = conf[:f].split + e_files = conf[:e].split + + h = {} + f_files.each_with_index { |fn,i| + fa = ReadFile.readlines_strip fn + ea = ReadFile.readlines_strip e_files[i] + fa.each_with_index { |fw,j| + if h.has_key? fw + h[fw] << ea[j] + else + h[fw] = [ea[j]] + end + } + } + + h.each_pair { |f,ea| + puts "#{f}\t#{ea.first}" + } +end + +main + diff --git a/merge_files b/merge_files deleted file mode 100755 index 714b57d..0000000 --- a/merge_files +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -def usage - STDERR.write "merge_files +\n" - exit 1 -end -usage if ARGV.size==0 - -files = ARGV -hashes = [] - -files.each { |i| - hashes.push Hash.new - hashes.last.default = 0 - f = ReadFile.new i - while line = f.gets - hashes.last[line.strip] += 1 - end - f.close -} - -hashes.each { |h| - h.each { |k,v| - counts = [] - hashes.each { |j| counts.push j[k]; j.delete k } - counts.max.times { puts k } - } -} - diff --git a/merge_ttable b/merge_ttable deleted file mode 100755 index ac10903..0000000 --- a/merge_ttable +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def main - conf = Trollop::options do - opt :f, "f files", :type => :string, :required => true - opt :e, "e files", :type => :string, :required => true - end - - f_files = conf[:f].split - e_files = conf[:e].split - - h = {} - f_files.each_with_index { |fn,i| - fa = ReadFile.readlines_strip fn - ea = ReadFile.readlines_strip e_files[i] - fa.each_with_index { |fw,j| - if h.has_key? fw - h[fw] << ea[j] - else - h[fw] = [ea[j]] - end - } - } - - h.each_pair { |f,ea| - puts "#{f}\t#{ea.first}" - } -end - -main - diff --git a/min-max b/min-max new file mode 100755 index 0000000..1dbfd40 --- /dev/null +++ b/min-max @@ -0,0 +1,40 @@ +#!/usr/bin/ruby + +require 'zipf' +require 'trollop' + +conf = Trollop::options do + opt :min, "minimum #tokens", :type => :int, :default => 1 + opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n' + opt :in_f, "input 'French' file", :type => :string, :required => true + opt :in_e, "input 'English' file", :type => :string, :required => true + opt :out_f, "output 'French' file", :type => :string, :required => true + opt :out_e, "output 'English' file", :type => :string, :required => true + opt :out_id, "output line Nos", :type => :string, :required => true +end + + +files = {} +files[:f_file] = ReadFile.new conf[:in_f] +files[:e_file] = ReadFile.new conf[:in_e] +files[:f_out_file] = WriteFile.new conf[:out_f] +files[:e_out_file] = WriteFile.new conf[:out_e] +files[:id_out_file] = WriteFile.new conf[:out_id] +i = 0 +while f_line = files[:f_file].gets + e_line = files[:e_file].gets + f_line.strip! + e_line.strip! + a = f_line.split + b = e_line.split + if a.size >= conf[:min] and a.size <= conf[:max] and \ + b.size >= conf[:min] and b.size <= conf[:max] + files[:f_out_file].write "#{f_line}\n" + files[:e_out_file].write "#{e_line}\n" + files[:id_out_file].write "#{i}\n" + end + i+=1 +end + +files.values.each{ |f| f.close } + diff --git a/min_max b/min_max deleted file mode 100755 index 1dbfd40..0000000 --- a/min_max +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/ruby - -require 'zipf' -require 'trollop' - -conf = Trollop::options do - opt :min, "minimum #tokens", :type => :int, :default => 1 - opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n' - opt :in_f, "input 'French' file", :type => :string, :required => true - opt :in_e, "input 'English' file", :type => :string, :required => true - opt :out_f, "output 'French' file", :type => :string, :required => true - opt :out_e, "output 'English' file", :type => :string, :required => true - opt :out_id, "output line Nos", :type => :string, :required => true -end - - -files = {} -files[:f_file] = ReadFile.new conf[:in_f] -files[:e_file] = ReadFile.new conf[:in_e] -files[:f_out_file] = WriteFile.new conf[:out_f] -files[:e_out_file] = WriteFile.new conf[:out_e] -files[:id_out_file] = WriteFile.new conf[:out_id] -i = 0 -while f_line = files[:f_file].gets - e_line = files[:e_file].gets - f_line.strip! - e_line.strip! - a = f_line.split - b = e_line.split - if a.size >= conf[:min] and a.size <= conf[:max] and \ - b.size >= conf[:min] and b.size <= conf[:max] - files[:f_out_file].write "#{f_line}\n" - files[:e_out_file].write "#{e_line}\n" - files[:id_out_file].write "#{i}\n" - end - i+=1 -end - -files.values.each{ |f| f.close } - diff --git a/moses-1best b/moses-1best new file mode 100755 index 0000000..fd35cf8 --- /dev/null +++ b/moses-1best @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +require 'zipf' + +prev_idx = nil +while line = STDIN.gets + line.strip! + idx = splitpipe(line)[0].to_i + if idx != prev_idx + puts line + prev_idx = idx + end +end + diff --git a/moses_1best b/moses_1best deleted file mode 100755 index fd35cf8..0000000 --- a/moses_1best +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -prev_idx = nil -while line = STDIN.gets - line.strip! - idx = splitpipe(line)[0].to_i - if idx != prev_idx - puts line - prev_idx = idx - end -end - diff --git a/no-empty b/no-empty new file mode 100755 index 0000000..da57e23 --- /dev/null +++ b/no-empty @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +require 'zipf' + +files = [] +(0..1).each { |i| files << ReadFile.new(ARGV[i]) } +(2..3).each { |i| files << WriteFile.new(ARGV[i]) } + +while line_f = files[0].gets + line_e = files[1].gets + line_f.strip!; line_e.strip! + next if line_f=='' || line_e=='' + files[2].write line_f+"\n" + files[3].write line_e+"\n" +end + +files.each { |f| f.close } + diff --git a/no-non-printables b/no-non-printables new file mode 100755 index 0000000..9f9e3f9 --- /dev/null +++ b/no-non-printables @@ -0,0 +1,4 @@ +#!/bin/sh + +sed 's/\xEF\xBB\xBF//g' | sed 's/\xEF\xB7\x93//g' | sed 's/[[:cntrl:]]//g' + diff --git a/no_empty b/no_empty deleted file mode 100755 index da57e23..0000000 --- a/no_empty +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -files = [] -(0..1).each { |i| files << ReadFile.new(ARGV[i]) } -(2..3).each { |i| files << WriteFile.new(ARGV[i]) } - -while line_f = files[0].gets - line_e = files[1].gets - line_f.strip!; line_e.strip! - next if line_f=='' || line_e=='' - files[2].write line_f+"\n" - files[3].write line_e+"\n" -end - -files.each { |f| f.close } - diff --git a/no_non_printables b/no_non_printables deleted file mode 100755 index 9f9e3f9..0000000 --- a/no_non_printables +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -sed 's/\xEF\xBB\xBF//g' | sed 's/\xEF\xB7\x93//g' | sed 's/[[:cntrl:]]//g' - diff --git a/norm-german b/norm-german new file mode 100755 index 0000000..cf9c060 --- /dev/null +++ b/norm-german @@ -0,0 +1,87 @@ +#!/usr/bin/env ruby + +require 'thread' +require 'trollop' + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + + +conf = Trollop::options do + banner "norm_german < " + opt :upper, "uppercase", :type => :bool, :default => false + opt :threads, "#threads", :type => :int, :default => 1, :short => '-h' + opt :shard_size, "shard size", :type => :int, :default => 1000 + opt :train, "train", :type => :bool + opt :apply, "apply", :type => :bool +end + +pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ] +pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ] +if conf[:upper] + PAIRS = pairs_lower +else + PAIRS = pairs_lower+pairs_upper +end + +def get_key(old, new) + PAIRS.each { |i| + return old if new.gsub(i[0], i[1])==old + return old if new.gsub(i[1], i[0])==old + } + return nil +end + +def build_partial(tokens) + h = {} + tokens.each { |tok| + found = false + h.keys.each { |i| + if get_key i, tok + h[i] << tok + found = true + break + end + } + h[tok] = [tok] if !found + } + return h +end + +h = {} +threads = [] +thread_n = 0 +counter = 0 +token_stock = [] +mutex = Mutex.new +while tok = STDIN.gets + token_stock << [] if !token_stock[thread_n] + token_stock[thread_n] << tok.strip! + counter += 1 + if token_stock[thread_n].size%conf[:shard_size]==0 + STDERR.write "Starting thread ##{thread_n}\n" + threads << Thread.new(token_stock[thread_n]) { |tokens| + th = build_partial tokens + mutex.synchronize do + h.merge! th + end + } + threads.last.abort_on_exception = true + thread_n += 1 + else + next + end + if thread_n==conf[:threads] + threads.each { |i| i.join } + token_stock.each { |i| i.clear } + thread_n = 0 + end + STDERR.write "#keys #{h.keys.size}\n" +end + +token_stock.each { |i| + if i.size!=0 + h.merge! build_partial i + end +} + diff --git a/norm-hyphens b/norm-hyphens new file mode 100755 index 0000000..4a152a1 --- /dev/null +++ b/norm-hyphens @@ -0,0 +1,4 @@ +#!/bin/zsh -x + +sed "s|[ \t]\+\xc2\xad[ \t]\+||g" + diff --git a/norm_german b/norm_german deleted file mode 100755 index cf9c060..0000000 --- a/norm_german +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env ruby - -require 'thread' -require 'trollop' - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - - -conf = Trollop::options do - banner "norm_german < " - opt :upper, "uppercase", :type => :bool, :default => false - opt :threads, "#threads", :type => :int, :default => 1, :short => '-h' - opt :shard_size, "shard size", :type => :int, :default => 1000 - opt :train, "train", :type => :bool - opt :apply, "apply", :type => :bool -end - -pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ] -pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ] -if conf[:upper] - PAIRS = pairs_lower -else - PAIRS = pairs_lower+pairs_upper -end - -def get_key(old, new) - PAIRS.each { |i| - return old if new.gsub(i[0], i[1])==old - return old if new.gsub(i[1], i[0])==old - } - return nil -end - -def build_partial(tokens) - h = {} - tokens.each { |tok| - found = false - h.keys.each { |i| - if get_key i, tok - h[i] << tok - found = true - break - end - } - h[tok] = [tok] if !found - } - return h -end - -h = {} -threads = [] -thread_n = 0 -counter = 0 -token_stock = [] -mutex = Mutex.new -while tok = STDIN.gets - token_stock << [] if !token_stock[thread_n] - token_stock[thread_n] << tok.strip! - counter += 1 - if token_stock[thread_n].size%conf[:shard_size]==0 - STDERR.write "Starting thread ##{thread_n}\n" - threads << Thread.new(token_stock[thread_n]) { |tokens| - th = build_partial tokens - mutex.synchronize do - h.merge! th - end - } - threads.last.abort_on_exception = true - thread_n += 1 - else - next - end - if thread_n==conf[:threads] - threads.each { |i| i.join } - token_stock.each { |i| i.clear } - thread_n = 0 - end - STDERR.write "#keys #{h.keys.size}\n" -end - -token_stock.each { |i| - if i.size!=0 - h.merge! build_partial i - end -} - diff --git a/norm_hyphens b/norm_hyphens deleted file mode 100755 index 4a152a1..0000000 --- a/norm_hyphens +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh -x - -sed "s|[ \t]\+\xc2\xad[ \t]\+||g" - diff --git a/normalize-punctuation b/normalize-punctuation new file mode 100755 index 0000000..108de44 --- /dev/null +++ b/normalize-punctuation @@ -0,0 +1,46 @@ +#!/usr/bin/perl -w +# adapted from the moses scripts + +use strict; + +my ($language) = @ARGV; + +while() { + s/\r//g; + # normalize unicode punctuation + s/„/\"/g; + s/“/\"/g; + s/”/\"/g; + s/–/-/g; + s/—/ - /g; s/ +/ /g; + s/´/\'/g; + s/([a-z])‘([a-z])/$1\'$2/gi; + s/([a-z])’([a-z])/$1\'$2/gi; + s/‘/\"/g; + s/‚/\"/g; + s/’/\"/g; + s/''/\"/g; + s/´´/\"/g; + s/…/.../g; + # French quotes + s/ « / \"/g; + s/« /\"/g; + s/«/\"/g; + s/ » /\" /g; + s/ »/\"/g; + s/»/\"/g; + # handle pseudo-spaces + s/ \%/\%/g; + s/nº /nº /g; + s/ :/:/g; + s/ ºC/ ºC/g; + s/ cm/ cm/g; + s/ \?/\?/g; + s/ \!/\!/g; + s/ ;/;/g; + s/, /, /g; s/ +/ /g; + + print STDERR $_ if //; + + print $_; +} diff --git a/normalize_punctuation b/normalize_punctuation deleted file mode 100755 index 108de44..0000000 --- a/normalize_punctuation +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/perl -w -# adapted from the moses scripts - -use strict; - -my ($language) = @ARGV; - -while() { - s/\r//g; - # normalize unicode punctuation - s/„/\"/g; - s/“/\"/g; - s/”/\"/g; - s/–/-/g; - s/—/ - /g; s/ +/ /g; - s/´/\'/g; - s/([a-z])‘([a-z])/$1\'$2/gi; - s/([a-z])’([a-z])/$1\'$2/gi; - s/‘/\"/g; - s/‚/\"/g; - s/’/\"/g; - s/''/\"/g; - s/´´/\"/g; - s/…/.../g; - # French quotes - s/ « / \"/g; - s/« /\"/g; - s/«/\"/g; - s/ » /\" /g; - s/ »/\"/g; - s/»/\"/g; - # handle pseudo-spaces - s/ \%/\%/g; - s/nº /nº /g; - s/ :/:/g; - s/ ºC/ ºC/g; - s/ cm/ cm/g; - s/ \?/\?/g; - s/ \!/\!/g; - s/ ;/;/g; - s/, /, /g; s/ +/ /g; - - print STDERR $_ if //; - - print $_; -} diff --git a/num-tok b/num-tok new file mode 100755 index 0000000..56cbae9 --- /dev/null +++ b/num-tok @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +while line = STDIN.gets + puts line.strip.split.length +end + diff --git a/num_tok b/num_tok deleted file mode 100755 index 56cbae9..0000000 --- a/num_tok +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -while line = STDIN.gets - puts line.strip.split.length -end - diff --git a/paste-pairs b/paste-pairs new file mode 100755 index 0000000..f6b8b31 --- /dev/null +++ b/paste-pairs @@ -0,0 +1,10 @@ +#!/usr/bin/python + +import sys +from itertools import izip + +for linenr, (src_line, tgt_line) in enumerate(izip(open(sys.argv[1]), open(sys.argv[2]))): + print linenr, (src_line.strip()) + print linenr, (tgt_line.strip()) + print + diff --git a/paste_pairs b/paste_pairs deleted file mode 100755 index f6b8b31..0000000 --- a/paste_pairs +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/python - -import sys -from itertools import izip - -for linenr, (src_line, tgt_line) in enumerate(izip(open(sys.argv[1]), open(sys.argv[2]))): - print linenr, (src_line.strip()) - print linenr, (tgt_line.strip()) - print - diff --git a/per-sentence-bleu b/per-sentence-bleu new file mode 100755 index 0000000..402f364 --- /dev/null +++ b/per-sentence-bleu @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def main + conf = Trollop::options do + opt :input, "input", :type => :string, :default => '-' + opt :references, "references", :type => :string, :required => true + opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0 + opt :n, "N", :default => 4 + end + + refs = ReadFile.readlines_strip conf[:references] + i = -1 + input = ReadFile.new conf[:input] + while line = input.gets + i += 1 + if line.strip == '' + puts 0.0 + next + end + puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack] + end + input.close +end + +main + diff --git a/per-sentence-bleu-kbest b/per-sentence-bleu-kbest new file mode 100755 index 0000000..f8bd860 --- /dev/null +++ b/per-sentence-bleu-kbest @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def main + conf = Trollop::options do + opt :kbests, "kbests", :type => :string, :default => '-' + opt :references, "references", :type => :string, :required => true + end + refs = ReadFile.new conf[:references] + kbest_lists = read_kbest_lists conf[:kbests] + i = 0 + kbest_lists.each { |list| + scores = [] + o = false + list.each { |e| scores << per_sentence_bleu(e, refs[i]) } + max = scores.max + scores.each_with_index { |x,j| + puts "#{j+1} ||| #{scores[j]} ||| #{list[j]}" + if scores[j]==max && !o + puts "^^^ #{j+1} #{max}" + o = true + end + } + puts + i += 1 + } +end + +main + diff --git a/per-sentence-ter b/per-sentence-ter new file mode 100755 index 0000000..fa283ef --- /dev/null +++ b/per-sentence-ter @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' +require 'tempfile' + +def main + conf = Trollop::options do + opt :input, "input", :type => :string, :default => '-' + opt :references, "references", :type => :string, :required => true + opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score' + end + + refs = ReadFile.readlines_strip conf[:references] + input = ReadFile.new conf[:input] + i = -1 + while line = input.gets + line.strip! + i += 1 + a = Tempfile.new 'pster' + b = Tempfile.new 'pster' + a.write line+"\n" + b.write refs[i]+"\n" + a.close; b.close + score = `/toolbox/cdec-dtrain/mteval/fast_score -i #{a.path} -r #{b.path} -m ter 2>/dev/null` + puts score + a.unlink; b.unlink + end + input.close +end + +main + diff --git a/per_sentence_bleu b/per_sentence_bleu deleted file mode 100755 index 402f364..0000000 --- a/per_sentence_bleu +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def main - conf = Trollop::options do - opt :input, "input", :type => :string, :default => '-' - opt :references, "references", :type => :string, :required => true - opt :len_hack, "hack of Nakov et al", :type => :int, :default => 0 - opt :n, "N", :default => 4 - end - - refs = ReadFile.readlines_strip conf[:references] - i = -1 - input = ReadFile.new conf[:input] - while line = input.gets - i += 1 - if line.strip == '' - puts 0.0 - next - end - puts BLEU::per_sentence_bleu line.strip, refs[i], conf[:n], conf[:len_hack] - end - input.close -end - -main - diff --git a/per_sentence_bleu_kbest b/per_sentence_bleu_kbest deleted file mode 100755 index f8bd860..0000000 --- a/per_sentence_bleu_kbest +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def main - conf = Trollop::options do - opt :kbests, "kbests", :type => :string, :default => '-' - opt :references, "references", :type => :string, :required => true - end - refs = ReadFile.new conf[:references] - kbest_lists = read_kbest_lists conf[:kbests] - i = 0 - kbest_lists.each { |list| - scores = [] - o = false - list.each { |e| scores << per_sentence_bleu(e, refs[i]) } - max = scores.max - scores.each_with_index { |x,j| - puts "#{j+1} ||| #{scores[j]} ||| #{list[j]}" - if scores[j]==max && !o - puts "^^^ #{j+1} #{max}" - o = true - end - } - puts - i += 1 - } -end - -main - diff --git a/per_sentence_ter b/per_sentence_ter deleted file mode 100755 index fa283ef..0000000 --- a/per_sentence_ter +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' -require 'tempfile' - -def main - conf = Trollop::options do - opt :input, "input", :type => :string, :default => '-' - opt :references, "references", :type => :string, :required => true - opt :mteval_bin, "cdec's mteval/fast_score", :type => :string, :default => '`/toolbox/cdec-dtrain/mteval/fast_score' - end - - refs = ReadFile.readlines_strip conf[:references] - input = ReadFile.new conf[:input] - i = -1 - while line = input.gets - line.strip! - i += 1 - a = Tempfile.new 'pster' - b = Tempfile.new 'pster' - a.write line+"\n" - b.write refs[i]+"\n" - a.close; b.close - score = `/toolbox/cdec-dtrain/mteval/fast_score -i #{a.path} -r #{b.path} -m ter 2>/dev/null` - puts score - a.unlink; b.unlink - end - input.close -end - -main - diff --git a/preprocess b/preprocess index 6531bf1..a46b0a8 100755 --- a/preprocess +++ b/preprocess @@ -5,5 +5,5 @@ P=`pwd -P` popd > /dev/null LANG=$1 -$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | $P/lowercase.perl 2>lowercase.$LANG.err +$P/no-non-printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize-punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | $P/lowercase.perl 2>lowercase.$LANG.err diff --git a/preprocess-no-lower b/preprocess-no-lower new file mode 100755 index 0000000..afd87e9 --- /dev/null +++ b/preprocess-no-lower @@ -0,0 +1,9 @@ +#!/bin/bash + +pushd `dirname $0` > /dev/null +P=`pwd -P` +popd > /dev/null + +LANG=$1 +$P/no-non-printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize-punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err + diff --git a/preprocess_no_lower b/preprocess_no_lower deleted file mode 100755 index 3a4d358..0000000 --- a/preprocess_no_lower +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -pushd `dirname $0` > /dev/null -P=`pwd -P` -popd > /dev/null - -LANG=$1 -$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err - diff --git a/pt-bloom b/pt-bloom new file mode 100755 index 0000000..5c2cf01 --- /dev/null +++ b/pt-bloom @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby + +require 'bloom-filter' +require 'trollop' + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +conf = Trollop::options do + opt :size, "number of entries in the filter", :type => :int, :required => true + opt :error_rate, "error rate", :type => :float, :default => 0.01 +end + +f = BloomFilter.new conf[:size], conf[:error_rate] +while line = STDIN.gets + src, tgt = splitpipe(line)[0..1] + src.strip! + tgt.strip! + f.insert(src+" ||| "+tgt) +end + +f.dump('pt.bloom') +f.close + diff --git a/pt_bloom b/pt_bloom deleted file mode 100755 index 5c2cf01..0000000 --- a/pt_bloom +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env ruby - -require 'bloom-filter' -require 'trollop' - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -conf = Trollop::options do - opt :size, "number of entries in the filter", :type => :int, :required => true - opt :error_rate, "error rate", :type => :float, :default => 0.01 -end - -f = BloomFilter.new conf[:size], conf[:error_rate] -while line = STDIN.gets - src, tgt = splitpipe(line)[0..1] - src.strip! - tgt.strip! - f.insert(src+" ||| "+tgt) -end - -f.dump('pt.bloom') -f.close - diff --git a/push-rules b/push-rules new file mode 100755 index 0000000..c97ab80 --- /dev/null +++ b/push-rules @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby + +require 'zipf' + +a = ReadFile.readlines_strip ARGV[0] +h = {} +a.each { |i| + h[i] = true +} + +f = ARGV[1].to_f +while line = STDIN.gets + line.strip! + s,weight = line.split + weight = weight.to_f + a,_,target = s.rpartition ":" + _,_,source = a.split(":",3) + if (h[source]) + puts "#{s}\t#{weight*f}" + else + puts line + end +end + diff --git a/push_rules b/push_rules deleted file mode 100755 index c97ab80..0000000 --- a/push_rules +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -a = ReadFile.readlines_strip ARGV[0] -h = {} -a.each { |i| - h[i] = true -} - -f = ARGV[1].to_f -while line = STDIN.gets - line.strip! - s,weight = line.split - weight = weight.to_f - a,_,target = s.rpartition ":" - _,_,source = a.split(":",3) - if (h[source]) - puts "#{s}\t#{weight*f}" - else - puts line - end -end - diff --git a/ruby-eval b/ruby-eval new file mode 100755 index 0000000..fe0d181 --- /dev/null +++ b/ruby-eval @@ -0,0 +1,6 @@ +#!/usr/bin/env ruby + +while line = STDIN.gets + puts "#{eval line}" +end + diff --git a/ruby_eval b/ruby_eval deleted file mode 100755 index fe0d181..0000000 --- a/ruby_eval +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env ruby - -while line = STDIN.gets - puts "#{eval line}" -end - diff --git a/rule-shapes b/rule-shapes new file mode 100755 index 0000000..589a670 --- /dev/null +++ b/rule-shapes @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +def shape s + res = [] + in_t = false + s.split.each { |i| + if i.match(/\A\[X,\d\]\z/) + if in_t + in_t = false + end + res << "NT" + next + else + res << "T" if not in_t + in_t = true + end + } + return res +end + +while line = STDIN.gets + f, e = line.split(/\t/) + f.strip!; e.strip! + puts shape(f).join('_')+"-"+shape(e).join('_') +end + diff --git a/rule_shapes b/rule_shapes deleted file mode 100755 index 589a670..0000000 --- a/rule_shapes +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env ruby - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -def shape s - res = [] - in_t = false - s.split.each { |i| - if i.match(/\A\[X,\d\]\z/) - if in_t - in_t = false - end - res << "NT" - next - else - res << "T" if not in_t - in_t = true - end - } - return res -end - -while line = STDIN.gets - f, e = line.split(/\t/) - f.strip!; e.strip! - puts shape(f).join('_')+"-"+shape(e).join('_') -end - diff --git a/select-from b/select-from new file mode 100755 index 0000000..7ab40e7 --- /dev/null +++ b/select-from @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby + +require 'trollop' +require 'zipf' + +opts = Trollop::options do + banner "select_from [--invert] -i < " + opt :index, "Line numbers to output.", :required => true + opt :invert, "Invert selection.", :type => :bool, :short => '-j', :default => false +end + +accept = {} + +f = ReadFile.new ARGV[0] +f.readlines_strip.each { |line| + accept[line.strip.to_i] = true +} + +i = 0 +while line = STDIN.gets + if accept[i] && !opts[:invert] + STDOUT.write line + elsif !accept[i] && opts[:invert] + STDOUT.write line + end + i += 1 +end + diff --git a/select_from b/select_from deleted file mode 100755 index 7ab40e7..0000000 --- a/select_from +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env ruby - -require 'trollop' -require 'zipf' - -opts = Trollop::options do - banner "select_from [--invert] -i < " - opt :index, "Line numbers to output.", :required => true - opt :invert, "Invert selection.", :type => :bool, :short => '-j', :default => false -end - -accept = {} - -f = ReadFile.new ARGV[0] -f.readlines_strip.each { |line| - accept[line.strip.to_i] = true -} - -i = 0 -while line = STDIN.gets - if accept[i] && !opts[:invert] - STDOUT.write line - elsif !accept[i] && opts[:invert] - STDOUT.write line - end - i += 1 -end - diff --git a/sort-features b/sort-features new file mode 100755 index 0000000..88bd779 --- /dev/null +++ b/sort-features @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +h = {} +while line = STDIN.gets + name, value = line.strip.split + h[name] = value.to_f +end + +h.sort_by { |name, value| -value }.each { |name, value| puts "#{name}\t#{value}" } + diff --git a/sort_features b/sort_features deleted file mode 100755 index 88bd779..0000000 --- a/sort_features +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby - -h = {} -while line = STDIN.gets - name, value = line.strip.split - h[name] = value.to_f -end - -h.sort_by { |name, value| -value }.each { |name, value| puts "#{name}\t#{value}" } - diff --git a/source-sides b/source-sides new file mode 100755 index 0000000..b4490c6 --- /dev/null +++ b/source-sides @@ -0,0 +1,4 @@ +#!/bin/zsh -x + +split_pipes -f 2 | sort | uniq | sed "s| |_|g" | sed "s|\[X,[12]\]|NX|g" + diff --git a/source_sides b/source_sides deleted file mode 100755 index b4490c6..0000000 --- a/source_sides +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh -x - -split_pipes -f 2 | sort | uniq | sed "s| |_|g" | sed "s|\[X,[12]\]|NX|g" - diff --git a/split-kbest b/split-kbest new file mode 100755 index 0000000..ab425b0 --- /dev/null +++ b/split-kbest @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def write_kbest l, fn + f = WriteFile.new fn + f.write l.join("") + f.close +end + +dir = ARGV[0] +i = 0 +l = [] +while line = STDIN.gets + j = line.split.first.to_i + if j == 0 && l.size > 0 + write_kbest l, "#{dir}/#{i}.gz" + l = [] + i += 1 + end + l << line +end +write_kbest l, "#{dir}/#{i}.gz" # last one + diff --git a/split-lines b/split-lines new file mode 100755 index 0000000..14b3a0f --- /dev/null +++ b/split-lines @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +require 'zipf' + +dir = ARGV[0] +i = 0 +while line = STDIN.gets + src, tgt = line.split " ||| " + f = WriteFile.new "#{dir}/#{i}.src" + f.write line + f.close + i += 1 +end + diff --git a/split-pipes b/split-pipes new file mode 100755 index 0000000..ce8f018 --- /dev/null +++ b/split-pipes @@ -0,0 +1,51 @@ +#!/usr/bin/env ruby + +require 'trollop' + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +conf = Trollop::options do + banner "splitpipes -f < " + opt :field, "field", :type => :int, :required => true + opt :to, "to", :type => :int, :default => nil +end + + +a = [] +range = false +if conf[:to] + range = true +end + +if range + if conf[:field] >= conf[:to] + STDERR.write "field >= to, exiting\n" + exit + end +end + +if conf[:field]<=0 || (range && conf[:to]<=0) + STDERR.write "field or to <= 0, exiting" + exit +end + +while line = STDIN.gets + j = 1 + line.strip.split(' ||| ').each { |i| + if range && (conf[:field]..conf[:to]).include?(j) + a << i.strip + elsif j == conf[:field] + puts i.strip + break + end + j += 1 + } + if range + puts "#{a.join " ||| "}\n" + end + a.clear +end + + + diff --git a/split_kbest b/split_kbest deleted file mode 100755 index ab425b0..0000000 --- a/split_kbest +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -def write_kbest l, fn - f = WriteFile.new fn - f.write l.join("") - f.close -end - -dir = ARGV[0] -i = 0 -l = [] -while line = STDIN.gets - j = line.split.first.to_i - if j == 0 && l.size > 0 - write_kbest l, "#{dir}/#{i}.gz" - l = [] - i += 1 - end - l << line -end -write_kbest l, "#{dir}/#{i}.gz" # last one - diff --git a/split_lines b/split_lines deleted file mode 100755 index 14b3a0f..0000000 --- a/split_lines +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -dir = ARGV[0] -i = 0 -while line = STDIN.gets - src, tgt = line.split " ||| " - f = WriteFile.new "#{dir}/#{i}.src" - f.write line - f.close - i += 1 -end - diff --git a/split_pipes b/split_pipes deleted file mode 100755 index ce8f018..0000000 --- a/split_pipes +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env ruby - -require 'trollop' - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -conf = Trollop::options do - banner "splitpipes -f < " - opt :field, "field", :type => :int, :required => true - opt :to, "to", :type => :int, :default => nil -end - - -a = [] -range = false -if conf[:to] - range = true -end - -if range - if conf[:field] >= conf[:to] - STDERR.write "field >= to, exiting\n" - exit - end -end - -if conf[:field]<=0 || (range && conf[:to]<=0) - STDERR.write "field or to <= 0, exiting" - exit -end - -while line = STDIN.gets - j = 1 - line.strip.split(' ||| ').each { |i| - if range && (conf[:field]..conf[:to]).include?(j) - a << i.strip - elsif j == conf[:field] - puts i.strip - break - end - j += 1 - } - if range - puts "#{a.join " ||| "}\n" - end - a.clear -end - - - diff --git a/stanford-parser-run b/stanford-parser-run new file mode 100755 index 0000000..f8d4210 --- /dev/null +++ b/stanford-parser-run @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ $# != 1 ]; then + echo "$0 text-file" + exit 1 +fi + +export CLASSPATH=:/toolbox/stanfordparser_3_2_0/* + +IN=$1 + +cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp + diff --git a/stanford_parser_run b/stanford_parser_run deleted file mode 100755 index f8d4210..0000000 --- a/stanford_parser_run +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -if [ $# != 1 ]; then - echo "$0 text-file" - exit 1 -fi - -export CLASSPATH=:/toolbox/stanfordparser_3_2_0/* - -IN=$1 - -cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp - diff --git a/test/cdec-hg-to-json/cdec.ini b/test/cdec-hg-to-json/cdec.ini new file mode 100644 index 0000000..1ad25b5 --- /dev/null +++ b/test/cdec-hg-to-json/cdec.ini @@ -0,0 +1,5 @@ +formalism=scfg +grammar=test/hg2json/grammar.gz +add_pass_through_rules=true +feature_function=WordPenalty +intersection_strategy=full diff --git a/test/cdec-hg-to-json/grammar.gz b/test/cdec-hg-to-json/grammar.gz new file mode 100644 index 0000000..78dda98 Binary files /dev/null and b/test/cdec-hg-to-json/grammar.gz differ diff --git a/test/cdec-hg-to-json/hg.json.gz b/test/cdec-hg-to-json/hg.json.gz new file mode 100644 index 0000000..ed178c6 Binary files /dev/null and b/test/cdec-hg-to-json/hg.json.gz differ diff --git a/test/cdec-hg-to-json/hg.meta b/test/cdec-hg-to-json/hg.meta new file mode 100644 index 0000000..d33a54c --- /dev/null +++ b/test/cdec-hg-to-json/hg.meta @@ -0,0 +1,7 @@ +input: + 'in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .' +viterbi translation: + 'which labor market desperate transformed into attempting gathered by failed to show any the non - is making festzuhalten gathered by pervez musharraf meant to its borders with within than the non - have pakistan 's intelligence relied constitutional for security as a its borders with declared a state of emergency - range missiles .' +# nodes = 220 +# edges = 16640 +viterbi score = 228.95 diff --git a/test/cdec-hg-to-json/in b/test/cdec-hg-to-json/in new file mode 100644 index 0000000..7dc411d --- /dev/null +++ b/test/cdec-hg-to-json/in @@ -0,0 +1 @@ +in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen . diff --git a/test/cdec-hg-to-json/toy.cdec.ini b/test/cdec-hg-to-json/toy.cdec.ini new file mode 100644 index 0000000..d4a2896 --- /dev/null +++ b/test/cdec-hg-to-json/toy.cdec.ini @@ -0,0 +1,2 @@ +formalism=scfg +grammar=test/hg2json/toy.grammar diff --git a/test/cdec-hg-to-json/toy.grammar b/test/cdec-hg-to-json/toy.grammar new file mode 100644 index 0000000..382c94f --- /dev/null +++ b/test/cdec-hg-to-json/toy.grammar @@ -0,0 +1,12 @@ +[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 +[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 +[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 +[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 +[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 +[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 +[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0 +[JJ] ||| grosses ||| big ||| logp=0 +[JJ] ||| grosses ||| large ||| logp=0 +[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 +[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0 +[V] ||| fand ||| found ||| logp=0 diff --git a/test/cdec-hg-to-json/toy.in b/test/cdec-hg-to-json/toy.in new file mode 100644 index 0000000..e6df927 --- /dev/null +++ b/test/cdec-hg-to-json/toy.in @@ -0,0 +1 @@ +ich sah ein kleines haus diff --git a/test/cdec-hg-to-json/toy.weights b/test/cdec-hg-to-json/toy.weights new file mode 100644 index 0000000..70075b7 --- /dev/null +++ b/test/cdec-hg-to-json/toy.weights @@ -0,0 +1,3 @@ +logp 2 +use_house 0 +use_shell 1 diff --git a/test/cdec-hg-to-json/weights b/test/cdec-hg-to-json/weights new file mode 100644 index 0000000..7f96f1d --- /dev/null +++ b/test/cdec-hg-to-json/weights @@ -0,0 +1,17 @@ +PhraseModel_0 1.0 +PhraseModel_1 1.0 +PhraseModel_2 1.0 +PhraseModel_3 1.0 +PhraseModel_4 1.0 +PhraseModel_5 1.0 +PhraseModel_6 1.0 +PassThrough -1.0 +PassThrough_1 -1.0 +PassThrough_2 -1.0 +PassThrough_3 -1.0 +PassThrough_4 -1.0 +PassThrough_5 -1.0 +PassThrough_6 -1.0 +Glue 0.1 +LanguageModel 10.0 +LanguageModel_OOV -10 diff --git a/test/cdec_hg_to_json/cdec.ini b/test/cdec_hg_to_json/cdec.ini deleted file mode 100644 index 1ad25b5..0000000 --- a/test/cdec_hg_to_json/cdec.ini +++ /dev/null @@ -1,5 +0,0 @@ -formalism=scfg -grammar=test/hg2json/grammar.gz -add_pass_through_rules=true -feature_function=WordPenalty -intersection_strategy=full diff --git a/test/cdec_hg_to_json/grammar.gz b/test/cdec_hg_to_json/grammar.gz deleted file mode 100644 index 78dda98..0000000 Binary files a/test/cdec_hg_to_json/grammar.gz and /dev/null differ diff --git a/test/cdec_hg_to_json/hg.json.gz b/test/cdec_hg_to_json/hg.json.gz deleted file mode 100644 index ed178c6..0000000 Binary files a/test/cdec_hg_to_json/hg.json.gz and /dev/null differ diff --git a/test/cdec_hg_to_json/hg.meta b/test/cdec_hg_to_json/hg.meta deleted file mode 100644 index d33a54c..0000000 --- a/test/cdec_hg_to_json/hg.meta +++ /dev/null @@ -1,7 +0,0 @@ -input: - 'in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .' -viterbi translation: - 'which labor market desperate transformed into attempting gathered by failed to show any the non - is making festzuhalten gathered by pervez musharraf meant to its borders with within than the non - have pakistan 's intelligence relied constitutional for security as a its borders with declared a state of emergency - range missiles .' -# nodes = 220 -# edges = 16640 -viterbi score = 228.95 diff --git a/test/cdec_hg_to_json/in b/test/cdec_hg_to_json/in deleted file mode 100644 index 7dc411d..0000000 --- a/test/cdec_hg_to_json/in +++ /dev/null @@ -1 +0,0 @@ -in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen . diff --git a/test/cdec_hg_to_json/toy.cdec.ini b/test/cdec_hg_to_json/toy.cdec.ini deleted file mode 100644 index d4a2896..0000000 --- a/test/cdec_hg_to_json/toy.cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -grammar=test/hg2json/toy.grammar diff --git a/test/cdec_hg_to_json/toy.grammar b/test/cdec_hg_to_json/toy.grammar deleted file mode 100644 index 382c94f..0000000 --- a/test/cdec_hg_to_json/toy.grammar +++ /dev/null @@ -1,12 +0,0 @@ -[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 -[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 -[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 -[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 -[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 -[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 -[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0 -[JJ] ||| grosses ||| big ||| logp=0 -[JJ] ||| grosses ||| large ||| logp=0 -[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 -[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0 -[V] ||| fand ||| found ||| logp=0 diff --git a/test/cdec_hg_to_json/toy.in b/test/cdec_hg_to_json/toy.in deleted file mode 100644 index e6df927..0000000 --- a/test/cdec_hg_to_json/toy.in +++ /dev/null @@ -1 +0,0 @@ -ich sah ein kleines haus diff --git a/test/cdec_hg_to_json/toy.weights b/test/cdec_hg_to_json/toy.weights deleted file mode 100644 index 70075b7..0000000 --- a/test/cdec_hg_to_json/toy.weights +++ /dev/null @@ -1,3 +0,0 @@ -logp 2 -use_house 0 -use_shell 1 diff --git a/test/cdec_hg_to_json/weights b/test/cdec_hg_to_json/weights deleted file mode 100644 index 7f96f1d..0000000 --- a/test/cdec_hg_to_json/weights +++ /dev/null @@ -1,17 +0,0 @@ -PhraseModel_0 1.0 -PhraseModel_1 1.0 -PhraseModel_2 1.0 -PhraseModel_3 1.0 -PhraseModel_4 1.0 -PhraseModel_5 1.0 -PhraseModel_6 1.0 -PassThrough -1.0 -PassThrough_1 -1.0 -PassThrough_2 -1.0 -PassThrough_3 -1.0 -PassThrough_4 -1.0 -PassThrough_5 -1.0 -PassThrough_6 -1.0 -Glue 0.1 -LanguageModel 10.0 -LanguageModel_OOV -10 diff --git a/test/kbest-bleu-oracles/debug.kbests b/test/kbest-bleu-oracles/debug.kbests new file mode 100644 index 0000000..1e9c894 --- /dev/null +++ b/test/kbest-bleu-oracles/debug.kbests @@ -0,0 +1,4 @@ +0 ||| a b c d ||| x=1 ||| 10 +0 ||| a b d c ||| x=1 ||| 9 +0 ||| a d b c ||| x=1 ||| 8 +0 ||| d a b c ||| x=1 ||| 7 diff --git a/test/kbest-bleu-oracles/debug.refs b/test/kbest-bleu-oracles/debug.refs new file mode 100644 index 0000000..8e13e46 --- /dev/null +++ b/test/kbest-bleu-oracles/debug.refs @@ -0,0 +1 @@ +a b c d diff --git a/test/kbest-bleu-oracles/example.kbests b/test/kbest-bleu-oracles/example.kbests new file mode 100644 index 0000000..1126f1f --- /dev/null +++ b/test/kbest-bleu-oracles/example.kbests @@ -0,0 +1,100 @@ +0 ||| europe races house divided ||| WordPenalty=-1.73718 LanguageModel=-18.15 PhraseModel_0=2.2467 PhraseModel_1=4.27323 PhraseModel_2=2.20952 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=1 PhraseModel_6=1 ||| -61.4791 +0 ||| europe races divided house ||| WordPenalty=-1.73718 Glue=1 LanguageModel=-18.7337 PhraseModel_0=2.75576 PhraseModel_1=8.10398 PhraseModel_2=5.5382 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=0 PhraseModel_6=0 ||| -61.5856 +0 ||| europe after racial house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.3699 PhraseModel_0=1.68395 PhraseModel_1=4.27323 PhraseModel_2=2.67025 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=1 PhraseModel_6=1 ||| -63.2049 +0 ||| europe after race divided house ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.1973 PhraseModel_0=2.47176 PhraseModel_1=8.10398 PhraseModel_2=5.73009 PhraseModel_3=5.07197 PhraseModel_4=2.11131 PhraseModel_5=0 PhraseModel_6=0 ||| -63.4497 +0 ||| europe after races house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.0216 PhraseModel_0=1.84876 PhraseModel_1=4.27323 PhraseModel_2=2.51055 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=1 PhraseModel_6=1 ||| -63.7649 +0 ||| europe after races divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.6053 PhraseModel_0=2.35782 PhraseModel_1=8.10398 PhraseModel_2=5.83923 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=0 PhraseModel_6=0 ||| -63.8715 +0 ||| europe after racial divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.2498 PhraseModel_0=2.19301 PhraseModel_1=8.10398 PhraseModel_2=5.99893 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -63.9867 +0 ||| europe following racial house divided ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.941 PhraseModel_0=1.60477 PhraseModel_1=4.27323 PhraseModel_2=2.73719 PhraseModel_3=4.67218 PhraseModel_4=2.38101 PhraseModel_5=1 PhraseModel_6=1 ||| -64.7057 +0 ||| divided europe after racial house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.6711 PhraseModel_0=3.23398 PhraseModel_1=8.10398 PhraseModel_2=5.11818 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -65.0513 +0 ||| europe race divided house ||| WordPenalty=-1.73718 LanguageModel=-19.0747 PhraseModel_0=2.95643 PhraseModel_1=8.10398 PhraseModel_2=5.34994 PhraseModel_3=7.27048 PhraseModel_4=1.26795 PhraseModel_5=0 PhraseModel_6=0 ||| -65.348 +1 ||| a common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=4 LanguageModel=-36.6093 PhraseModel_0=6.68111 PhraseModel_1=22.5747 PhraseModel_2=16.1531 PhraseModel_3=22.3782 PhraseModel_4=9.65239 PhraseModel_5=1 PhraseModel_6=1 ||| -136.567 +1 ||| a common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-39.0118 PhraseModel_0=5.94865 PhraseModel_1=18.9704 PhraseModel_2=13.3916 PhraseModel_3=22.3782 PhraseModel_4=10.0435 PhraseModel_5=1 PhraseModel_6=1 ||| -137.254 +1 ||| a common feature of europe 's extreme right is its racism and the fact that you use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=5 LanguageModel=-38.0283 PhraseModel_0=6.71292 PhraseModel_1=22.8797 PhraseModel_2=16.4585 PhraseModel_3=22.3803 PhraseModel_4=9.62847 PhraseModel_5=1 PhraseModel_6=1 ||| -140.071 +1 ||| a common feature of europe 's extreme right is its racism , and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-39.7992 PhraseModel_0=7.42421 PhraseModel_1=21.4489 PhraseModel_2=14.4345 PhraseModel_3=22.3782 PhraseModel_4=11.152 PhraseModel_5=1 PhraseModel_6=1 ||| -142.605 +1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use immigration as a political lever . ||| WordPenalty=-10.4231 Glue=4 LanguageModel=-38.8156 PhraseModel_0=7.75494 PhraseModel_1=22.8797 PhraseModel_2=15.4378 PhraseModel_3=22.3803 PhraseModel_4=10.7369 PhraseModel_5=1 PhraseModel_6=1 ||| -142.999 +1 ||| a common feature of europe 's extreme right is its racism and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-40.3141 PhraseModel_0=6.39864 PhraseModel_1=23.0373 PhraseModel_2=16.9021 PhraseModel_3=22.3976 PhraseModel_4=10.0196 PhraseModel_5=1 PhraseModel_6=1 ||| -143.611 +1 ||| one common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-37.7197 PhraseModel_0=8.27536 PhraseModel_1=21.7089 PhraseModel_2=13.9878 PhraseModel_3=22.5681 PhraseModel_4=10.7747 PhraseModel_5=1 PhraseModel_6=2 ||| -144.987 +1 ||| one common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=1 LanguageModel=-40.1222 PhraseModel_0=7.73842 PhraseModel_1=18.7826 PhraseModel_2=11.6924 PhraseModel_3=22.5681 PhraseModel_4=11.1658 PhraseModel_5=1 PhraseModel_6=2 ||| -146.502 +1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-41.1014 PhraseModel_0=7.44067 PhraseModel_1=23.0373 PhraseModel_2=15.8814 PhraseModel_3=22.3976 PhraseModel_4=11.1281 PhraseModel_5=1 PhraseModel_6=1 ||| -146.539 +1 ||| a shared feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-40.0778 PhraseModel_0=7.91847 PhraseModel_1=22.5747 PhraseModel_2=14.9464 PhraseModel_3=22.3052 PhraseModel_4=10.5431 PhraseModel_5=1 PhraseModel_6=1 ||| -146.956 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-100.983 PassThrough=3 PhraseModel_0=15.0383 PhraseModel_1=33.3621 PhraseModel_2=19.8383 PhraseModel_3=32.881 PhraseModel_4=23.2559 PhraseModel_5=0 PhraseModel_6=1 ||| -300.653 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-100.556 PassThrough=3 PhraseModel_0=16.5071 PhraseModel_1=32.7586 PhraseModel_2=17.7282 PhraseModel_3=33.296 PhraseModel_4=25.589 PhraseModel_5=0 PhraseModel_6=1 ||| -302.029 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-99.7968 PassThrough=3 PhraseModel_0=15.033 PhraseModel_1=35.4231 PhraseModel_2=21.6674 PhraseModel_3=33.4947 PhraseModel_4=24.5697 PhraseModel_5=1 PhraseModel_6=1 ||| -302.155 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : the rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-100.68 PassThrough=4 PhraseModel_0=16.6403 PhraseModel_1=35.0625 PhraseModel_2=19.9175 PhraseModel_3=32.7793 PhraseModel_4=24.2261 PhraseModel_5=0 PhraseModel_6=0 ||| -302.466 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-101.623 PassThrough=3 PhraseModel_0=14.5035 PhraseModel_1=33.3891 PhraseModel_2=20.3355 PhraseModel_3=33.2525 PhraseModel_4=22.8878 PhraseModel_5=0 PhraseModel_6=1 ||| -302.743 +2 ||| the lega nord in italy , the vlaams block , the followers of le pen 's national front in france , the netherlands are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-97.1998 PassThrough=2 PhraseModel_0=17.8853 PhraseModel_1=39.5922 PhraseModel_2=23.2216 PhraseModel_3=33.6892 PhraseModel_4=23.0537 PhraseModel_5=0 PhraseModel_6=1 ||| -302.874 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of the immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-102.542 PassThrough=3 PhraseModel_0=15.8186 PhraseModel_1=35.0751 PhraseModel_2=20.6755 PhraseModel_3=32.881 PhraseModel_4=23.6471 PhraseModel_5=0 PhraseModel_6=0 ||| -303.305 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy to regulate them . ||| WordPenalty=-22.5833 Glue=11 LanguageModel=-102.736 PassThrough=3 PhraseModel_0=13.1928 PhraseModel_1=35.2776 PhraseModel_2=23.3398 PhraseModel_3=33.1527 PhraseModel_4=21.9207 PhraseModel_5=0 PhraseModel_6=1 ||| -303.344 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy in order to regulate it . ||| WordPenalty=-23.4519 Glue=10 LanguageModel=-104.547 PassThrough=3 PhraseModel_0=13.2351 PhraseModel_1=35.2776 PhraseModel_2=23.3336 PhraseModel_3=32.2759 PhraseModel_4=23.8622 PhraseModel_5=0 PhraseModel_6=1 ||| -303.438 +2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-99.3692 PassThrough=3 PhraseModel_0=16.5018 PhraseModel_1=34.8196 PhraseModel_2=19.5572 PhraseModel_3=33.9097 PhraseModel_4=26.9028 PhraseModel_5=1 PhraseModel_6=1 ||| -303.531 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-83.9883 PhraseModel_0=10.8504 PhraseModel_1=36.0092 PhraseModel_2=25.6962 PhraseModel_3=18.8196 PhraseModel_4=12.4793 PhraseModel_5=0 PhraseModel_6=0 ||| -236.305 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-83.8116 PhraseModel_0=10.6743 PhraseModel_1=36.0092 PhraseModel_2=25.8212 PhraseModel_3=18.8196 PhraseModel_4=12.1849 PhraseModel_5=0 PhraseModel_6=0 ||| -236.56 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-82.9542 PhraseModel_0=11.3166 PhraseModel_1=35.9808 PhraseModel_2=25.3584 PhraseModel_3=19.1145 PhraseModel_4=12.9314 PhraseModel_5=0 PhraseModel_6=0 ||| -236.57 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-82.211 PhraseModel_0=11.9448 PhraseModel_1=39.5089 PhraseModel_2=28.0752 PhraseModel_3=18.9139 PhraseModel_4=13.4713 PhraseModel_5=0 PhraseModel_6=0 ||| -236.761 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-82.7775 PhraseModel_0=11.1405 PhraseModel_1=35.9808 PhraseModel_2=25.4834 PhraseModel_3=19.1145 PhraseModel_4=12.6371 PhraseModel_5=0 PhraseModel_6=0 ||| -236.825 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-81.1769 PhraseModel_0=12.411 PhraseModel_1=39.4805 PhraseModel_2=27.7374 PhraseModel_3=19.2089 PhraseModel_4=13.9234 PhraseModel_5=0 PhraseModel_6=0 ||| -237.026 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen does not , unfortunately , and ( soon ) go , the race will come from the european policy to disappear anytime soon . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-74.4647 PhraseModel_0=8.03162 PhraseModel_1=32.0087 PhraseModel_2=24.9113 PhraseModel_3=27.2046 PhraseModel_4=19.1921 PhraseModel_5=2 PhraseModel_6=4 ||| -237.241 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-80.1276 PhraseModel_0=12.2413 PhraseModel_1=38.9132 PhraseModel_2=27.227 PhraseModel_3=20.0911 PhraseModel_4=10.4871 PhraseModel_5=0 PhraseModel_6=0 ||| -237.267 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.3317 Glue=4 LanguageModel=-79.0935 PhraseModel_0=12.7075 PhraseModel_1=38.8848 PhraseModel_2=26.8892 PhraseModel_3=20.3861 PhraseModel_4=10.9392 PhraseModel_5=0 PhraseModel_6=0 ||| -237.532 +3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-79.8077 PhraseModel_0=11.0526 PhraseModel_1=33.8577 PhraseModel_2=23.7301 PhraseModel_3=20.8921 PhraseModel_4=10.9702 PhraseModel_5=0 PhraseModel_6=2 ||| -237.654 +4 ||| an aging population and ever more open borders the racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-34.4131 PhraseModel_0=7.06252 PhraseModel_1=19.7388 PhraseModel_2=13.2138 PhraseModel_3=17.7775 PhraseModel_4=5.47301 PhraseModel_5=0 PhraseModel_6=1 ||| -127.294 +4 ||| an aging population and ever more open borders the racist fragmentation in european countries . ||| WordPenalty=-6.51442 Glue=4 LanguageModel=-33.7446 PhraseModel_0=7.05156 PhraseModel_1=19.8444 PhraseModel_2=13.3428 PhraseModel_3=18.5456 PhraseModel_4=5.26208 PhraseModel_5=0 PhraseModel_6=1 ||| -128.424 +4 ||| an aging population and ever more open borders increase racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-35.0385 PhraseModel_0=8.57304 PhraseModel_1=21.0335 PhraseModel_2=12.975 PhraseModel_3=15.9006 PhraseModel_4=8.12696 PhraseModel_5=0 PhraseModel_6=1 ||| -128.599 +4 ||| an aging population and ever more open borders multiply the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5074 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.3764 PhraseModel_4=7.95875 PhraseModel_5=0 PhraseModel_6=1 ||| -129.817 +4 ||| an aging population and ever more open borders increase the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-38.8411 PhraseModel_0=6.66847 PhraseModel_1=20.0813 PhraseModel_2=13.8817 PhraseModel_3=15.1212 PhraseModel_4=7.50646 PhraseModel_5=0 PhraseModel_6=1 ||| -129.94 +4 ||| an aging population and ever more open borders reproduce the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5442 PhraseModel_0=7.70986 PhraseModel_1=20.0813 PhraseModel_2=12.97 PhraseModel_3=13.3002 PhraseModel_4=8.03794 PhraseModel_5=0 PhraseModel_6=1 ||| -130.137 +4 ||| an aging population and ever more open borders multiplying the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.82 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.2318 PhraseModel_4=7.83382 PhraseModel_5=0 PhraseModel_6=1 ||| -130.257 +4 ||| an aging population and ever more open borders the racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-33.155 PhraseModel_0=8.15791 PhraseModel_1=23.1696 PhraseModel_2=15.5542 PhraseModel_3=18.5569 PhraseModel_4=6.35432 PhraseModel_5=0 PhraseModel_6=1 ||| -130.313 +4 ||| an aging population and ever more open borders grows the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-39.0359 PhraseModel_0=7.57156 PhraseModel_1=20.0813 PhraseModel_2=13.1283 PhraseModel_3=14.2568 PhraseModel_4=8.43588 PhraseModel_5=0 PhraseModel_6=1 ||| -130.549 +4 ||| an aging population and ever more open borders multiply racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-38.6378 PhraseModel_0=8.05048 PhraseModel_1=21.0335 PhraseModel_2=13.5162 PhraseModel_3=13.3764 PhraseModel_4=7.69795 PhraseModel_5=0 PhraseModel_6=1 ||| -130.598 +5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-59.9487 PassThrough=1 PhraseModel_0=11.4712 PhraseModel_1=29.458 PhraseModel_2=19.0438 PhraseModel_3=37.8219 PhraseModel_4=21.1861 PhraseModel_5=0 PhraseModel_6=2 ||| -225.247 +5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=7 LanguageModel=-60.8539 PassThrough=2 PhraseModel_0=11.5126 PhraseModel_1=29.759 PhraseModel_2=19.2199 PhraseModel_3=37.2073 PhraseModel_4=20.0207 PhraseModel_5=0 PhraseModel_6=1 ||| -225.823 +5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=8 LanguageModel=-61.5768 PassThrough=1 PhraseModel_0=12.0156 PhraseModel_1=33.4151 PhraseModel_2=22.0984 PhraseModel_3=37.22 PhraseModel_4=20.669 PhraseModel_5=0 PhraseModel_6=0 ||| -226.173 +5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-60.6716 PassThrough=1 PhraseModel_0=11.3742 PhraseModel_1=29.458 PhraseModel_2=19.123 PhraseModel_3=37.5989 PhraseModel_4=21.4664 PhraseModel_5=0 PhraseModel_6=2 ||| -226.174 +5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-58.5133 PassThrough=1 PhraseModel_0=11.8665 PhraseModel_1=28.9017 PhraseModel_2=18.1987 PhraseModel_3=39.1001 PhraseModel_4=20.9503 PhraseModel_5=0 PhraseModel_6=2 ||| -226.221 +5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and counter all prospects have hoped that it will soon disappear . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-63.5417 PassThrough=1 PhraseModel_0=12.3574 PhraseModel_1=29.4845 PhraseModel_2=18.2384 PhraseModel_3=34.0032 PhraseModel_4=21.7025 PhraseModel_5=0 PhraseModel_6=2 ||| -226.609 +5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=7 LanguageModel=-59.4184 PassThrough=2 PhraseModel_0=11.9079 PhraseModel_1=29.2027 PhraseModel_2=18.3748 PhraseModel_3=38.4854 PhraseModel_4=19.7848 PhraseModel_5=0 PhraseModel_6=1 ||| -226.796 +5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and allen prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-61.4797 PassThrough=2 PhraseModel_0=11.3037 PhraseModel_1=26.9794 PhraseModel_2=16.7321 PhraseModel_3=36.9453 PhraseModel_4=21.0569 PhraseModel_5=0 PhraseModel_6=2 ||| -227.012 +5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=8 LanguageModel=-60.1414 PassThrough=1 PhraseModel_0=12.4109 PhraseModel_1=32.8588 PhraseModel_2=21.2533 PhraseModel_3=38.4981 PhraseModel_4=20.4332 PhraseModel_5=0 PhraseModel_6=0 ||| -227.147 +5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-59.2362 PassThrough=1 PhraseModel_0=11.7696 PhraseModel_1=28.9017 PhraseModel_2=18.2779 PhraseModel_3=38.877 PhraseModel_4=21.2305 PhraseModel_5=0 PhraseModel_6=2 ||| -227.147 +6 ||| but it will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-22.9084 PhraseModel_0=4.65824 PhraseModel_1=11.8066 PhraseModel_2=7.53935 PhraseModel_3=10.1842 PhraseModel_4=10.0823 PhraseModel_5=0 PhraseModel_6=1 ||| -74.8416 +6 ||| but that it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-25.5326 PhraseModel_0=4.09397 PhraseModel_1=11.8066 PhraseModel_2=8.04227 PhraseModel_3=8.52096 PhraseModel_4=9.91846 PhraseModel_5=0 PhraseModel_6=1 ||| -75.1773 +6 ||| but this will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.6293 PhraseModel_0=4.59129 PhraseModel_1=11.8066 PhraseModel_2=7.59734 PhraseModel_3=9.74767 PhraseModel_4=9.60367 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5277 +6 ||| but that will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.682 PhraseModel_0=4.35721 PhraseModel_1=11.8066 PhraseModel_2=7.80819 PhraseModel_3=9.95635 PhraseModel_4=9.51798 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5534 +6 ||| but it is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-21.7223 PhraseModel_0=5.33564 PhraseModel_1=14.6359 PhraseModel_2=9.75115 PhraseModel_3=10.8638 PhraseModel_4=10.5328 PhraseModel_5=0 PhraseModel_6=1 ||| -76.6276 +6 ||| but this is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.3075 PhraseModel_0=5.26869 PhraseModel_1=14.6359 PhraseModel_2=9.80914 PhraseModel_3=10.4272 PhraseModel_4=10.0542 PhraseModel_5=0 PhraseModel_6=1 ||| -77.004 +6 ||| but that is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.4246 PhraseModel_0=5.03461 PhraseModel_1=14.6359 PhraseModel_2=10.02 PhraseModel_3=10.6359 PhraseModel_4=9.9685 PhraseModel_5=0 PhraseModel_6=1 ||| -77.1766 +6 ||| this but it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.463 PhraseModel_0=4.16583 PhraseModel_1=14.2852 PhraseModel_2=10.4324 PhraseModel_3=8.31228 PhraseModel_4=10.0042 PhraseModel_5=0 PhraseModel_6=1 ||| -80.7433 +6 ||| but that it will not , as the history of racism in america clearly shows . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-29.6579 PhraseModel_0=3.45482 PhraseModel_1=14.6359 PhraseModel_2=11.3575 PhraseModel_3=7.1068 PhraseModel_4=5.12435 PhraseModel_5=0 PhraseModel_6=0 ||| -81.1791 +6 ||| but that there will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.2814 PhraseModel_0=4.61416 PhraseModel_1=13.9975 PhraseModel_2=9.83682 PhraseModel_3=8.54324 PhraseModel_4=10.4476 PhraseModel_5=0 PhraseModel_6=1 ||| -81.6067 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-192.36 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.9337 PhraseModel_1=46.7282 PhraseModel_2=30.7137 PhraseModel_3=41.8218 PhraseModel_4=23.2319 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.223 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.966 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.3513 PhraseModel_1=46.7282 PhraseModel_2=30.3066 PhraseModel_3=42.4355 PhraseModel_4=22.8408 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.311 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.031 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4157 PhraseModel_3=43.1826 PhraseModel_4=22.1641 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.563 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-191.666 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4919 PhraseModel_3=42.4907 PhraseModel_4=23.8489 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.592 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-191.425 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8923 PhraseModel_1=46.7282 PhraseModel_2=30.7515 PhraseModel_3=42.5689 PhraseModel_4=22.5552 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.643 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-190.272 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.6523 PhraseModel_1=46.7282 PhraseModel_2=30.0847 PhraseModel_3=43.1044 PhraseModel_4=23.4578 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.681 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-195.148 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8352 PhraseModel_1=42.9613 PhraseModel_2=27.1493 PhraseModel_3=39.4459 PhraseModel_4=20.4386 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.866 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-189.336 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.5358 PhraseModel_1=46.7282 PhraseModel_2=30.1939 PhraseModel_3=43.8515 PhraseModel_4=22.7811 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.932 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.753 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2528 PhraseModel_1=42.9613 PhraseModel_2=26.7421 PhraseModel_3=40.0596 PhraseModel_4=20.0474 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.954 +7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . it went so far as to say that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.555 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=18.5925 PhraseModel_1=48.5668 PhraseModel_2=31.0822 PhraseModel_3=39.825 PhraseModel_4=27.6051 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-23.4519 Glue=8 ||| -538.976 +8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-48.8088 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=29.1579 PhraseModel_4=12.7191 PhraseModel_5=0 PhraseModel_6=1 ||| -184.302 +8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.4794 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.6069 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -185.222 +8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-49.7357 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=28.6034 PhraseModel_4=11.5619 PhraseModel_5=0 PhraseModel_6=1 ||| -185.485 +8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-50.2562 PhraseModel_0=10.5379 PhraseModel_1=30.9739 PhraseModel_2=20.9386 PhraseModel_3=28.0205 PhraseModel_4=10.9116 PhraseModel_5=0 PhraseModel_6=0 ||| -185.879 +8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.9671 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.6623 PhraseModel_4=12.3598 PhraseModel_5=0 PhraseModel_6=1 ||| -185.949 +8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.4063 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.0524 PhraseModel_4=11.1169 PhraseModel_5=0 PhraseModel_6=1 ||| -186.405 +8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.9268 PhraseModel_0=11.7099 PhraseModel_1=31.5959 PhraseModel_2=20.3337 PhraseModel_3=28.4695 PhraseModel_4=10.4666 PhraseModel_5=0 PhraseModel_6=0 ||| -186.799 +8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.8939 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.1078 PhraseModel_4=11.2026 PhraseModel_5=0 PhraseModel_6=1 ||| -187.132 +8 ||| is the first step to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=3 LanguageModel=-50.3885 PhraseModel_0=10.6908 PhraseModel_1=24.5899 PhraseModel_2=14.5662 PhraseModel_3=29.0596 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -187.259 +8 ||| the first step is to deal with the race , to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-11.2917 Glue=5 LanguageModel=-51.3999 PhraseModel_0=9.91899 PhraseModel_1=27.2091 PhraseModel_2=17.8675 PhraseModel_3=28.6105 PhraseModel_4=13.0872 PhraseModel_5=0 PhraseModel_6=1 ||| -187.271 +9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-66.3972 PhraseModel_0=12.7978 PhraseModel_1=27.4385 PhraseModel_2=16.0726 PhraseModel_3=29.7781 PhraseModel_4=19.6318 PhraseModel_5=1 PhraseModel_6=3 ||| -227.555 +9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-65.7197 PhraseModel_0=13.5479 PhraseModel_1=27.4385 PhraseModel_2=15.3334 PhraseModel_3=29.9236 PhraseModel_4=20.1665 PhraseModel_5=1 PhraseModel_6=3 ||| -227.898 +9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.8363 PhraseModel_0=12.3302 PhraseModel_1=28.2004 PhraseModel_2=17.0331 PhraseModel_3=29.7781 PhraseModel_4=18.5233 PhraseModel_5=1 PhraseModel_6=2 ||| -228.818 +9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.1588 PhraseModel_0=13.0803 PhraseModel_1=28.2004 PhraseModel_2=16.2939 PhraseModel_3=29.9236 PhraseModel_4=19.058 PhraseModel_5=1 PhraseModel_6=2 ||| -229.161 +9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.7395 PassThrough=1 PhraseModel_0=12.3803 PhraseModel_1=28.2004 PhraseModel_2=17.073 PhraseModel_3=27.2846 PhraseModel_4=17.4283 PhraseModel_5=1 PhraseModel_6=2 ||| -229.261 +9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-65.4196 PhraseModel_0=12.7913 PhraseModel_1=24.778 PhraseModel_2=13.4249 PhraseModel_3=32.5845 PhraseModel_4=19.6771 PhraseModel_5=1 PhraseModel_6=3 ||| -229.441 +9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.062 PassThrough=1 PhraseModel_0=13.1304 PhraseModel_1=28.2004 PhraseModel_2=16.3338 PhraseModel_3=27.4302 PhraseModel_4=17.963 PhraseModel_5=1 PhraseModel_6=2 ||| -229.604 +9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.2845 PhraseModel_0=13.0626 PhraseModel_1=28.2004 PhraseModel_2=16.3641 PhraseModel_3=30.0645 PhraseModel_4=17.7441 PhraseModel_5=1 PhraseModel_6=2 ||| -229.932 +9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-66.0971 PhraseModel_0=11.8609 PhraseModel_1=27.4385 PhraseModel_2=16.9754 PhraseModel_3=32.4389 PhraseModel_4=19.1424 PhraseModel_5=1 PhraseModel_6=3 ||| -230.238 +9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-66.607 PhraseModel_0=13.8127 PhraseModel_1=28.2004 PhraseModel_2=15.6249 PhraseModel_3=30.2101 PhraseModel_4=18.2788 PhraseModel_5=1 PhraseModel_6=2 ||| -230.274 diff --git a/test/kbest-bleu-oracles/example.refs b/test/kbest-bleu-oracles/example.refs new file mode 100644 index 0000000..632e27b --- /dev/null +++ b/test/kbest-bleu-oracles/example.refs @@ -0,0 +1,10 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/test/kbest-bleu-oracles/example.src b/test/kbest-bleu-oracles/example.src new file mode 100644 index 0000000..f5b910d --- /dev/null +++ b/test/kbest-bleu-oracles/example.src @@ -0,0 +1,10 @@ +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/test/kbest_bleu_oracles/debug.kbests b/test/kbest_bleu_oracles/debug.kbests deleted file mode 100644 index 1e9c894..0000000 --- a/test/kbest_bleu_oracles/debug.kbests +++ /dev/null @@ -1,4 +0,0 @@ -0 ||| a b c d ||| x=1 ||| 10 -0 ||| a b d c ||| x=1 ||| 9 -0 ||| a d b c ||| x=1 ||| 8 -0 ||| d a b c ||| x=1 ||| 7 diff --git a/test/kbest_bleu_oracles/debug.refs b/test/kbest_bleu_oracles/debug.refs deleted file mode 100644 index 8e13e46..0000000 --- a/test/kbest_bleu_oracles/debug.refs +++ /dev/null @@ -1 +0,0 @@ -a b c d diff --git a/test/kbest_bleu_oracles/example.kbests b/test/kbest_bleu_oracles/example.kbests deleted file mode 100644 index 1126f1f..0000000 --- a/test/kbest_bleu_oracles/example.kbests +++ /dev/null @@ -1,100 +0,0 @@ -0 ||| europe races house divided ||| WordPenalty=-1.73718 LanguageModel=-18.15 PhraseModel_0=2.2467 PhraseModel_1=4.27323 PhraseModel_2=2.20952 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=1 PhraseModel_6=1 ||| -61.4791 -0 ||| europe races divided house ||| WordPenalty=-1.73718 Glue=1 LanguageModel=-18.7337 PhraseModel_0=2.75576 PhraseModel_1=8.10398 PhraseModel_2=5.5382 PhraseModel_3=6.01559 PhraseModel_4=1.19831 PhraseModel_5=0 PhraseModel_6=0 ||| -61.5856 -0 ||| europe after racial house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.3699 PhraseModel_0=1.68395 PhraseModel_1=4.27323 PhraseModel_2=2.67025 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=1 PhraseModel_6=1 ||| -63.2049 -0 ||| europe after race divided house ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.1973 PhraseModel_0=2.47176 PhraseModel_1=8.10398 PhraseModel_2=5.73009 PhraseModel_3=5.07197 PhraseModel_4=2.11131 PhraseModel_5=0 PhraseModel_6=0 ||| -63.4497 -0 ||| europe after races house divided ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.0216 PhraseModel_0=1.84876 PhraseModel_1=4.27323 PhraseModel_2=2.51055 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=1 PhraseModel_6=1 ||| -63.7649 -0 ||| europe after races divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.6053 PhraseModel_0=2.35782 PhraseModel_1=8.10398 PhraseModel_2=5.83923 PhraseModel_3=3.81707 PhraseModel_4=2.04167 PhraseModel_5=0 PhraseModel_6=0 ||| -63.8715 -0 ||| europe after racial divided house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-22.2498 PhraseModel_0=2.19301 PhraseModel_1=8.10398 PhraseModel_2=5.99893 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -63.9867 -0 ||| europe following racial house divided ||| WordPenalty=-2.17147 Glue=2 LanguageModel=-21.941 PhraseModel_0=1.60477 PhraseModel_1=4.27323 PhraseModel_2=2.73719 PhraseModel_3=4.67218 PhraseModel_4=2.38101 PhraseModel_5=1 PhraseModel_6=1 ||| -64.7057 -0 ||| divided europe after racial house ||| WordPenalty=-2.17147 Glue=1 LanguageModel=-21.6711 PhraseModel_0=3.23398 PhraseModel_1=8.10398 PhraseModel_2=5.11818 PhraseModel_3=4.44249 PhraseModel_4=1.87098 PhraseModel_5=0 PhraseModel_6=0 ||| -65.0513 -0 ||| europe race divided house ||| WordPenalty=-1.73718 LanguageModel=-19.0747 PhraseModel_0=2.95643 PhraseModel_1=8.10398 PhraseModel_2=5.34994 PhraseModel_3=7.27048 PhraseModel_4=1.26795 PhraseModel_5=0 PhraseModel_6=0 ||| -65.348 -1 ||| a common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=4 LanguageModel=-36.6093 PhraseModel_0=6.68111 PhraseModel_1=22.5747 PhraseModel_2=16.1531 PhraseModel_3=22.3782 PhraseModel_4=9.65239 PhraseModel_5=1 PhraseModel_6=1 ||| -136.567 -1 ||| a common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-39.0118 PhraseModel_0=5.94865 PhraseModel_1=18.9704 PhraseModel_2=13.3916 PhraseModel_3=22.3782 PhraseModel_4=10.0435 PhraseModel_5=1 PhraseModel_6=1 ||| -137.254 -1 ||| a common feature of europe 's extreme right is its racism and the fact that you use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=5 LanguageModel=-38.0283 PhraseModel_0=6.71292 PhraseModel_1=22.8797 PhraseModel_2=16.4585 PhraseModel_3=22.3803 PhraseModel_4=9.62847 PhraseModel_5=1 PhraseModel_6=1 ||| -140.071 -1 ||| a common feature of europe 's extreme right is its racism , and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-39.7992 PhraseModel_0=7.42421 PhraseModel_1=21.4489 PhraseModel_2=14.4345 PhraseModel_3=22.3782 PhraseModel_4=11.152 PhraseModel_5=1 PhraseModel_6=1 ||| -142.605 -1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use immigration as a political lever . ||| WordPenalty=-10.4231 Glue=4 LanguageModel=-38.8156 PhraseModel_0=7.75494 PhraseModel_1=22.8797 PhraseModel_2=15.4378 PhraseModel_3=22.3803 PhraseModel_4=10.7369 PhraseModel_5=1 PhraseModel_6=1 ||| -142.999 -1 ||| a common feature of europe 's extreme right is its racism and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=5 LanguageModel=-40.3141 PhraseModel_0=6.39864 PhraseModel_1=23.0373 PhraseModel_2=16.9021 PhraseModel_3=22.3976 PhraseModel_4=10.0196 PhraseModel_5=1 PhraseModel_6=1 ||| -143.611 -1 ||| one common feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-37.7197 PhraseModel_0=8.27536 PhraseModel_1=21.7089 PhraseModel_2=13.9878 PhraseModel_3=22.5681 PhraseModel_4=10.7747 PhraseModel_5=1 PhraseModel_6=2 ||| -144.987 -1 ||| one common feature of europe 's extreme right is its racism and the fact that they use the immigration as a political lever . ||| WordPenalty=-10.4231 Glue=1 LanguageModel=-40.1222 PhraseModel_0=7.73842 PhraseModel_1=18.7826 PhraseModel_2=11.6924 PhraseModel_3=22.5681 PhraseModel_4=11.1658 PhraseModel_5=1 PhraseModel_6=2 ||| -146.502 -1 ||| a common feature of europe 's extreme right is its racism , and the fact that you use the immigration as a political lever . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-41.1014 PhraseModel_0=7.44067 PhraseModel_1=23.0373 PhraseModel_2=15.8814 PhraseModel_3=22.3976 PhraseModel_4=11.1281 PhraseModel_5=1 PhraseModel_6=1 ||| -146.539 -1 ||| a shared feature of europe 's extreme right is its racism and the fact that they use immigration as a political lever . ||| WordPenalty=-9.98877 Glue=1 LanguageModel=-40.0778 PhraseModel_0=7.91847 PhraseModel_1=22.5747 PhraseModel_2=14.9464 PhraseModel_3=22.3052 PhraseModel_4=10.5431 PhraseModel_5=1 PhraseModel_6=1 ||| -146.956 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-100.983 PassThrough=3 PhraseModel_0=15.0383 PhraseModel_1=33.3621 PhraseModel_2=19.8383 PhraseModel_3=32.881 PhraseModel_4=23.2559 PhraseModel_5=0 PhraseModel_6=1 ||| -300.653 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-100.556 PassThrough=3 PhraseModel_0=16.5071 PhraseModel_1=32.7586 PhraseModel_2=17.7282 PhraseModel_3=33.296 PhraseModel_4=25.589 PhraseModel_5=0 PhraseModel_6=1 ||| -302.029 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.0176 Glue=6 LanguageModel=-99.7968 PassThrough=3 PhraseModel_0=15.033 PhraseModel_1=35.4231 PhraseModel_2=21.6674 PhraseModel_3=33.4947 PhraseModel_4=24.5697 PhraseModel_5=1 PhraseModel_6=1 ||| -302.155 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : the rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-100.68 PassThrough=4 PhraseModel_0=16.6403 PhraseModel_1=35.0625 PhraseModel_2=19.9175 PhraseModel_3=32.7793 PhraseModel_4=24.2261 PhraseModel_5=0 PhraseModel_6=0 ||| -302.466 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-101.623 PassThrough=3 PhraseModel_0=14.5035 PhraseModel_1=33.3891 PhraseModel_2=20.3355 PhraseModel_3=33.2525 PhraseModel_4=22.8878 PhraseModel_5=0 PhraseModel_6=1 ||| -302.743 -2 ||| the lega nord in italy , the vlaams block , the followers of le pen 's national front in france , the netherlands are examples of parties or movements , which have formed the common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-22.5833 Glue=6 LanguageModel=-97.1998 PassThrough=2 PhraseModel_0=17.8853 PhraseModel_1=39.5922 PhraseModel_2=23.2216 PhraseModel_3=33.6892 PhraseModel_4=23.0537 PhraseModel_5=0 PhraseModel_6=1 ||| -302.874 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of the immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=6 LanguageModel=-102.542 PassThrough=3 PhraseModel_0=15.8186 PhraseModel_1=35.0751 PhraseModel_2=20.6755 PhraseModel_3=32.881 PhraseModel_4=23.6471 PhraseModel_5=0 PhraseModel_6=0 ||| -303.305 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy to regulate them . ||| WordPenalty=-22.5833 Glue=11 LanguageModel=-102.736 PassThrough=3 PhraseModel_0=13.1928 PhraseModel_1=35.2776 PhraseModel_2=23.3398 PhraseModel_3=33.1527 PhraseModel_4=21.9207 PhraseModel_5=0 PhraseModel_6=1 ||| -303.344 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of parties or movements , which have formed the common theme : rejection of immigration and to call for a simplified policy in order to regulate it . ||| WordPenalty=-23.4519 Glue=10 LanguageModel=-104.547 PassThrough=3 PhraseModel_0=13.2351 PhraseModel_1=35.2776 PhraseModel_2=23.3336 PhraseModel_3=32.2759 PhraseModel_4=23.8622 PhraseModel_5=0 PhraseModel_6=1 ||| -303.438 -2 ||| the lega nord in italy , the vlaams block in the netherlands , the followers of le pen 's national front in france , are examples of political parties or movements , which have formed a common theme : rejection of immigration policy and call for a simplified in order to regulate it . ||| WordPenalty=-23.4519 Glue=7 LanguageModel=-99.3692 PassThrough=3 PhraseModel_0=16.5018 PhraseModel_1=34.8196 PhraseModel_2=19.5572 PhraseModel_3=33.9097 PhraseModel_4=26.9028 PhraseModel_5=1 PhraseModel_6=1 ||| -303.531 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-83.9883 PhraseModel_0=10.8504 PhraseModel_1=36.0092 PhraseModel_2=25.6962 PhraseModel_3=18.8196 PhraseModel_4=12.4793 PhraseModel_5=0 PhraseModel_6=0 ||| -236.305 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-83.8116 PhraseModel_0=10.6743 PhraseModel_1=36.0092 PhraseModel_2=25.8212 PhraseModel_3=18.8196 PhraseModel_4=12.1849 PhraseModel_5=0 PhraseModel_6=0 ||| -236.56 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-82.9542 PhraseModel_0=11.3166 PhraseModel_1=35.9808 PhraseModel_2=25.3584 PhraseModel_3=19.1145 PhraseModel_4=12.9314 PhraseModel_5=0 PhraseModel_6=0 ||| -236.57 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-15.6346 Glue=4 LanguageModel=-82.211 PhraseModel_0=11.9448 PhraseModel_1=39.5089 PhraseModel_2=28.0752 PhraseModel_3=18.9139 PhraseModel_4=13.4713 PhraseModel_5=0 PhraseModel_6=0 ||| -236.761 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-82.7775 PhraseModel_0=11.1405 PhraseModel_1=35.9808 PhraseModel_2=25.4834 PhraseModel_3=19.1145 PhraseModel_4=12.6371 PhraseModel_5=0 PhraseModel_6=0 ||| -236.825 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( sadly not to go too soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-81.1769 PhraseModel_0=12.411 PhraseModel_1=39.4805 PhraseModel_2=27.7374 PhraseModel_3=19.2089 PhraseModel_4=13.9234 PhraseModel_5=0 PhraseModel_6=0 ||| -237.026 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen does not , unfortunately , and ( soon ) go , the race will come from the european policy to disappear anytime soon . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-74.4647 PhraseModel_0=8.03162 PhraseModel_1=32.0087 PhraseModel_2=24.9113 PhraseModel_3=27.2046 PhraseModel_4=19.1921 PhraseModel_5=2 PhraseModel_6=4 ||| -237.241 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-80.1276 PhraseModel_0=12.2413 PhraseModel_1=38.9132 PhraseModel_2=27.227 PhraseModel_3=20.0911 PhraseModel_4=10.4871 PhraseModel_5=0 PhraseModel_6=0 ||| -237.267 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and go ( unfortunately not soon ) once , will not disappear as soon the race from european politics . ||| WordPenalty=-14.3317 Glue=4 LanguageModel=-79.0935 PhraseModel_0=12.7075 PhraseModel_1=38.8848 PhraseModel_2=26.8892 PhraseModel_3=20.3861 PhraseModel_4=10.9392 PhraseModel_5=0 PhraseModel_6=0 ||| -237.532 -3 ||| while individuals like jörg haidar and jean @-@ marie le pen may come and ( unfortunately not go too soon ) , will not disappear as soon the race from the european policy . ||| WordPenalty=-14.766 Glue=4 LanguageModel=-79.8077 PhraseModel_0=11.0526 PhraseModel_1=33.8577 PhraseModel_2=23.7301 PhraseModel_3=20.8921 PhraseModel_4=10.9702 PhraseModel_5=0 PhraseModel_6=2 ||| -237.654 -4 ||| an aging population and ever more open borders the racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-34.4131 PhraseModel_0=7.06252 PhraseModel_1=19.7388 PhraseModel_2=13.2138 PhraseModel_3=17.7775 PhraseModel_4=5.47301 PhraseModel_5=0 PhraseModel_6=1 ||| -127.294 -4 ||| an aging population and ever more open borders the racist fragmentation in european countries . ||| WordPenalty=-6.51442 Glue=4 LanguageModel=-33.7446 PhraseModel_0=7.05156 PhraseModel_1=19.8444 PhraseModel_2=13.3428 PhraseModel_3=18.5456 PhraseModel_4=5.26208 PhraseModel_5=0 PhraseModel_6=1 ||| -128.424 -4 ||| an aging population and ever more open borders increase racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-35.0385 PhraseModel_0=8.57304 PhraseModel_1=21.0335 PhraseModel_2=12.975 PhraseModel_3=15.9006 PhraseModel_4=8.12696 PhraseModel_5=0 PhraseModel_6=1 ||| -128.599 -4 ||| an aging population and ever more open borders multiply the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5074 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.3764 PhraseModel_4=7.95875 PhraseModel_5=0 PhraseModel_6=1 ||| -129.817 -4 ||| an aging population and ever more open borders increase the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-38.8411 PhraseModel_0=6.66847 PhraseModel_1=20.0813 PhraseModel_2=13.8817 PhraseModel_3=15.1212 PhraseModel_4=7.50646 PhraseModel_5=0 PhraseModel_6=1 ||| -129.94 -4 ||| an aging population and ever more open borders reproduce the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.5442 PhraseModel_0=7.70986 PhraseModel_1=20.0813 PhraseModel_2=12.97 PhraseModel_3=13.3002 PhraseModel_4=8.03794 PhraseModel_5=0 PhraseModel_6=1 ||| -130.137 -4 ||| an aging population and ever more open borders multiplying the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=5 LanguageModel=-39.82 PhraseModel_0=7.53377 PhraseModel_1=20.0813 PhraseModel_2=13.1161 PhraseModel_3=13.2318 PhraseModel_4=7.83382 PhraseModel_5=0 PhraseModel_6=1 ||| -130.257 -4 ||| an aging population and ever more open borders the racial fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=4 LanguageModel=-33.155 PhraseModel_0=8.15791 PhraseModel_1=23.1696 PhraseModel_2=15.5542 PhraseModel_3=18.5569 PhraseModel_4=6.35432 PhraseModel_5=0 PhraseModel_6=1 ||| -130.313 -4 ||| an aging population and ever more open borders grows the racist fragmentation in the european countries . ||| WordPenalty=-7.38301 Glue=6 LanguageModel=-39.0359 PhraseModel_0=7.57156 PhraseModel_1=20.0813 PhraseModel_2=13.1283 PhraseModel_3=14.2568 PhraseModel_4=8.43588 PhraseModel_5=0 PhraseModel_6=1 ||| -130.549 -4 ||| an aging population and ever more open borders multiply racist fragmentation in the european countries . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-38.6378 PhraseModel_0=8.05048 PhraseModel_1=21.0335 PhraseModel_2=13.5162 PhraseModel_3=13.3764 PhraseModel_4=7.69795 PhraseModel_5=0 PhraseModel_6=1 ||| -130.598 -5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-59.9487 PassThrough=1 PhraseModel_0=11.4712 PhraseModel_1=29.458 PhraseModel_2=19.0438 PhraseModel_3=37.8219 PhraseModel_4=21.1861 PhraseModel_5=0 PhraseModel_6=2 ||| -225.247 -5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=7 LanguageModel=-60.8539 PassThrough=2 PhraseModel_0=11.5126 PhraseModel_1=29.759 PhraseModel_2=19.2199 PhraseModel_3=37.2073 PhraseModel_4=20.0207 PhraseModel_5=0 PhraseModel_6=1 ||| -225.823 -5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.766 Glue=8 LanguageModel=-61.5768 PassThrough=1 PhraseModel_0=12.0156 PhraseModel_1=33.4151 PhraseModel_2=22.0984 PhraseModel_3=37.22 PhraseModel_4=20.669 PhraseModel_5=0 PhraseModel_6=0 ||| -226.173 -5 ||| the big parties have the right and the centre left is the problem , in which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-60.6716 PassThrough=1 PhraseModel_0=11.3742 PhraseModel_1=29.458 PhraseModel_2=19.123 PhraseModel_3=37.5989 PhraseModel_4=21.4664 PhraseModel_5=0 PhraseModel_6=2 ||| -226.174 -5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-58.5133 PassThrough=1 PhraseModel_0=11.8665 PhraseModel_1=28.9017 PhraseModel_2=18.1987 PhraseModel_3=39.1001 PhraseModel_4=20.9503 PhraseModel_5=0 PhraseModel_6=2 ||| -226.221 -5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and counter all prospects have hoped that it will soon disappear . ||| WordPenalty=-15.2003 Glue=4 LanguageModel=-63.5417 PassThrough=1 PhraseModel_0=12.3574 PhraseModel_1=29.4845 PhraseModel_2=18.2384 PhraseModel_3=34.0032 PhraseModel_4=21.7025 PhraseModel_5=0 PhraseModel_6=2 ||| -226.609 -5 ||| the major parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=7 LanguageModel=-59.4184 PassThrough=2 PhraseModel_0=11.9079 PhraseModel_1=29.2027 PhraseModel_2=18.3748 PhraseModel_3=38.4854 PhraseModel_4=19.7848 PhraseModel_5=0 PhraseModel_6=1 ||| -226.796 -5 ||| the major parties have the right and the centre left is the problem , in which they bury our heads in the sand and allen prospects have hoped that it will soon disappear . ||| WordPenalty=-14.766 Glue=5 LanguageModel=-61.4797 PassThrough=2 PhraseModel_0=11.3037 PhraseModel_1=26.9794 PhraseModel_2=16.7321 PhraseModel_3=36.9453 PhraseModel_4=21.0569 PhraseModel_5=0 PhraseModel_6=2 ||| -227.012 -5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped , it will soon disappear . ||| WordPenalty=-14.3317 Glue=8 LanguageModel=-60.1414 PassThrough=1 PhraseModel_0=12.4109 PhraseModel_1=32.8588 PhraseModel_2=21.2533 PhraseModel_3=38.4981 PhraseModel_4=20.4332 PhraseModel_5=0 PhraseModel_6=0 ||| -227.147 -5 ||| the big parties have the right and the centre left is the problem , which they bury our heads in the sand and all prospects have hoped that it will soon disappear . ||| WordPenalty=-14.3317 Glue=5 LanguageModel=-59.2362 PassThrough=1 PhraseModel_0=11.7696 PhraseModel_1=28.9017 PhraseModel_2=18.2779 PhraseModel_3=38.877 PhraseModel_4=21.2305 PhraseModel_5=0 PhraseModel_6=2 ||| -227.147 -6 ||| but it will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-22.9084 PhraseModel_0=4.65824 PhraseModel_1=11.8066 PhraseModel_2=7.53935 PhraseModel_3=10.1842 PhraseModel_4=10.0823 PhraseModel_5=0 PhraseModel_6=1 ||| -74.8416 -6 ||| but that it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-25.5326 PhraseModel_0=4.09397 PhraseModel_1=11.8066 PhraseModel_2=8.04227 PhraseModel_3=8.52096 PhraseModel_4=9.91846 PhraseModel_5=0 PhraseModel_6=1 ||| -75.1773 -6 ||| but this will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.6293 PhraseModel_0=4.59129 PhraseModel_1=11.8066 PhraseModel_2=7.59734 PhraseModel_3=9.74767 PhraseModel_4=9.60367 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5277 -6 ||| but that will not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=1 LanguageModel=-23.682 PhraseModel_0=4.35721 PhraseModel_1=11.8066 PhraseModel_2=7.80819 PhraseModel_3=9.95635 PhraseModel_4=9.51798 PhraseModel_5=0 PhraseModel_6=1 ||| -75.5534 -6 ||| but it is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-21.7223 PhraseModel_0=5.33564 PhraseModel_1=14.6359 PhraseModel_2=9.75115 PhraseModel_3=10.8638 PhraseModel_4=10.5328 PhraseModel_5=0 PhraseModel_6=1 ||| -76.6276 -6 ||| but this is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.3075 PhraseModel_0=5.26869 PhraseModel_1=14.6359 PhraseModel_2=9.80914 PhraseModel_3=10.4272 PhraseModel_4=10.0542 PhraseModel_5=0 PhraseModel_6=1 ||| -77.004 -6 ||| but that is not , as is evident from the history of racism in america . ||| WordPenalty=-6.94871 Glue=2 LanguageModel=-22.4246 PhraseModel_0=5.03461 PhraseModel_1=14.6359 PhraseModel_2=10.02 PhraseModel_3=10.6359 PhraseModel_4=9.9685 PhraseModel_5=0 PhraseModel_6=1 ||| -77.1766 -6 ||| this but it will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.463 PhraseModel_0=4.16583 PhraseModel_1=14.2852 PhraseModel_2=10.4324 PhraseModel_3=8.31228 PhraseModel_4=10.0042 PhraseModel_5=0 PhraseModel_6=1 ||| -80.7433 -6 ||| but that it will not , as the history of racism in america clearly shows . ||| WordPenalty=-6.94871 Glue=3 LanguageModel=-29.6579 PhraseModel_0=3.45482 PhraseModel_1=14.6359 PhraseModel_2=11.3575 PhraseModel_3=7.1068 PhraseModel_4=5.12435 PhraseModel_5=0 PhraseModel_6=0 ||| -81.1791 -6 ||| but that there will not , as is evident from the history of racism in america . ||| WordPenalty=-7.38301 Glue=1 LanguageModel=-27.2814 PhraseModel_0=4.61416 PhraseModel_1=13.9975 PhraseModel_2=9.83682 PhraseModel_3=8.54324 PhraseModel_4=10.4476 PhraseModel_5=0 PhraseModel_6=1 ||| -81.6067 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-192.36 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.9337 PhraseModel_1=46.7282 PhraseModel_2=30.7137 PhraseModel_3=41.8218 PhraseModel_4=23.2319 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.223 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.966 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.3513 PhraseModel_1=46.7282 PhraseModel_2=30.3066 PhraseModel_3=42.4355 PhraseModel_4=22.8408 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.311 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-190.031 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4157 PhraseModel_3=43.1826 PhraseModel_4=22.1641 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.563 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as the income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-191.666 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2348 PhraseModel_1=46.7282 PhraseModel_2=30.4919 PhraseModel_3=42.4907 PhraseModel_4=23.8489 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.5833 Glue=7 ||| -538.592 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-191.425 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8923 PhraseModel_1=46.7282 PhraseModel_2=30.7515 PhraseModel_3=42.5689 PhraseModel_4=22.5552 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.643 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was just as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-190.272 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.6523 PhraseModel_1=46.7282 PhraseModel_2=30.0847 PhraseModel_3=43.1044 PhraseModel_4=23.4578 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.681 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-195.148 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=16.8352 PhraseModel_1=42.9613 PhraseModel_2=27.1493 PhraseModel_3=39.4459 PhraseModel_4=20.4386 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-22.149 Glue=7 ||| -538.866 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of the political debate , which went so far that segregation was as important as income - if not even more important - to define political zuneigungen and attitudes . ||| LanguageModel=-189.336 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.5358 PhraseModel_1=46.7282 PhraseModel_2=30.1939 PhraseModel_3=43.8515 PhraseModel_4=22.7811 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.932 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . the went so far that segregation was just as important as income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.753 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=17.2528 PhraseModel_1=42.9613 PhraseModel_2=26.7421 PhraseModel_3=40.0596 PhraseModel_4=20.0474 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-21.7147 Glue=7 ||| -538.954 -7 ||| the relations between the races in the united states for decades - and still do today - at the centre of political debate . it went so far as to say that segregation was just as important as the income - if not even more important - to determine political zuneigungen and attitudes . ||| LanguageModel=-193.555 LanguageModel_OOV=1 PassThrough=1 PhraseModel_0=18.5925 PhraseModel_1=48.5668 PhraseModel_2=31.0822 PhraseModel_3=39.825 PhraseModel_4=27.6051 PhraseModel_5=0 PhraseModel_6=1 WordPenalty=-23.4519 Glue=8 ||| -538.976 -8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-48.8088 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=29.1579 PhraseModel_4=12.7191 PhraseModel_5=0 PhraseModel_6=1 ||| -184.302 -8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.4794 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.6069 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -185.222 -8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-49.7357 PhraseModel_0=10.2462 PhraseModel_1=27.2091 PhraseModel_2=17.5849 PhraseModel_3=28.6034 PhraseModel_4=11.5619 PhraseModel_5=0 PhraseModel_6=1 ||| -185.485 -8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=4 LanguageModel=-50.2562 PhraseModel_0=10.5379 PhraseModel_1=30.9739 PhraseModel_2=20.9386 PhraseModel_3=28.0205 PhraseModel_4=10.9116 PhraseModel_5=0 PhraseModel_6=0 ||| -185.879 -8 ||| the first step is to deal with the race to understand cause and consequence of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-47.9671 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.6623 PhraseModel_4=12.3598 PhraseModel_5=0 PhraseModel_6=1 ||| -185.949 -8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.4063 PhraseModel_0=11.4183 PhraseModel_1=27.831 PhraseModel_2=16.98 PhraseModel_3=29.0524 PhraseModel_4=11.1169 PhraseModel_5=0 PhraseModel_6=1 ||| -186.405 -8 ||| the first step is to deal with the race to understand cause and consequences of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.9268 PhraseModel_0=11.7099 PhraseModel_1=31.5959 PhraseModel_2=20.3337 PhraseModel_3=28.4695 PhraseModel_4=10.4666 PhraseModel_5=0 PhraseModel_6=0 ||| -186.799 -8 ||| the first step is to deal with the race to understand cause and effects of racist hostility , even if this means unpleasant facts . ||| WordPenalty=-10.8574 Glue=2 LanguageModel=-48.8939 PhraseModel_0=11.4465 PhraseModel_1=26.85 PhraseModel_2=16.0501 PhraseModel_3=29.1078 PhraseModel_4=11.2026 PhraseModel_5=0 PhraseModel_6=1 ||| -187.132 -8 ||| is the first step to deal with the race to understand cause and consequence of racist hostility , even if that means unpleasant facts . ||| WordPenalty=-10.8574 Glue=3 LanguageModel=-50.3885 PhraseModel_0=10.6908 PhraseModel_1=24.5899 PhraseModel_2=14.5662 PhraseModel_3=29.0596 PhraseModel_4=12.2741 PhraseModel_5=0 PhraseModel_6=1 ||| -187.259 -8 ||| the first step is to deal with the race , to understand cause and consequence of racist hostility , even when that means unpleasant facts . ||| WordPenalty=-11.2917 Glue=5 LanguageModel=-51.3999 PhraseModel_0=9.91899 PhraseModel_1=27.2091 PhraseModel_2=17.8675 PhraseModel_3=28.6105 PhraseModel_4=13.0872 PhraseModel_5=0 PhraseModel_6=1 ||| -187.271 -9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-66.3972 PhraseModel_0=12.7978 PhraseModel_1=27.4385 PhraseModel_2=16.0726 PhraseModel_3=29.7781 PhraseModel_4=19.6318 PhraseModel_5=1 PhraseModel_6=3 ||| -227.555 -9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=3 LanguageModel=-65.7197 PhraseModel_0=13.5479 PhraseModel_1=27.4385 PhraseModel_2=15.3334 PhraseModel_3=29.9236 PhraseModel_4=20.1665 PhraseModel_5=1 PhraseModel_6=3 ||| -227.898 -9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.8363 PhraseModel_0=12.3302 PhraseModel_1=28.2004 PhraseModel_2=17.0331 PhraseModel_3=29.7781 PhraseModel_4=18.5233 PhraseModel_5=1 PhraseModel_6=2 ||| -228.818 -9 ||| exactly as in the united states a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.1588 PhraseModel_0=13.0803 PhraseModel_1=28.2004 PhraseModel_2=16.2939 PhraseModel_3=29.9236 PhraseModel_4=19.058 PhraseModel_5=1 PhraseModel_6=2 ||| -229.161 -9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.7395 PassThrough=1 PhraseModel_0=12.3803 PhraseModel_1=28.2004 PhraseModel_2=17.073 PhraseModel_3=27.2846 PhraseModel_4=17.4283 PhraseModel_5=1 PhraseModel_6=2 ||| -229.261 -9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-65.4196 PhraseModel_0=12.7913 PhraseModel_1=24.778 PhraseModel_2=13.4249 PhraseModel_3=32.5845 PhraseModel_4=19.6771 PhraseModel_5=1 PhraseModel_6=3 ||| -229.441 -9 ||| exactly that in the united states have a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-15.2003 Glue=6 LanguageModel=-69.062 PassThrough=1 PhraseModel_0=13.1304 PhraseModel_1=28.2004 PhraseModel_2=16.3338 PhraseModel_3=27.4302 PhraseModel_4=17.963 PhraseModel_5=1 PhraseModel_6=2 ||| -229.604 -9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different race trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-67.2845 PhraseModel_0=13.0626 PhraseModel_1=28.2004 PhraseModel_2=16.3641 PhraseModel_3=30.0645 PhraseModel_4=17.7441 PhraseModel_5=1 PhraseModel_6=2 ||| -229.932 -9 ||| exactly as in the united states , a large number of research in economics , sociology , psychology and conventionally . this research showed that people of different race trust each other much less . ||| WordPenalty=-15.2003 Glue=5 LanguageModel=-66.0971 PhraseModel_0=11.8609 PhraseModel_1=27.4385 PhraseModel_2=16.9754 PhraseModel_3=32.4389 PhraseModel_4=19.1424 PhraseModel_5=1 PhraseModel_6=3 ||| -230.238 -9 ||| exactly as in the us , a large number of research in economics , sociology , psychology and conventionally done this research showed that people of different racial trust each other much less . ||| WordPenalty=-14.766 Glue=3 LanguageModel=-66.607 PhraseModel_0=13.8127 PhraseModel_1=28.2004 PhraseModel_2=15.6249 PhraseModel_3=30.2101 PhraseModel_4=18.2788 PhraseModel_5=1 PhraseModel_6=2 ||| -230.274 diff --git a/test/kbest_bleu_oracles/example.refs b/test/kbest_bleu_oracles/example.refs deleted file mode 100644 index 632e27b..0000000 --- a/test/kbest_bleu_oracles/example.refs +++ /dev/null @@ -1,10 +0,0 @@ -europe 's divided racial house -a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . -the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . -while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . -an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . -mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . -it will not , as america 's racial history clearly shows . -race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . -the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . -this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/test/kbest_bleu_oracles/example.src b/test/kbest_bleu_oracles/example.src deleted file mode 100644 index f5b910d..0000000 --- a/test/kbest_bleu_oracles/example.src +++ /dev/null @@ -1,10 +0,0 @@ -europas nach rassen geteiltes haus -ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . -der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . -während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . -eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . -die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . -das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . -die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . -der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . -genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/test/lin-reg/exptected.txt b/test/lin-reg/exptected.txt new file mode 100644 index 0000000..13de1fc --- /dev/null +++ b/test/lin-reg/exptected.txt @@ -0,0 +1,3 @@ +ran for 2527 iterations + R^2=0.858063223720823 +{0=>0.7501625304145768, 1=>0.06388116702419537} diff --git a/test/lin-reg/input.dat b/test/lin-reg/input.dat new file mode 100644 index 0000000..3d93394 --- /dev/null +++ b/test/lin-reg/input.dat @@ -0,0 +1,50 @@ + 2.0658746e+00 + 2.3684087e+00 + 2.5399929e+00 + 2.5420804e+00 + 2.5490790e+00 + 2.7866882e+00 + 2.9116825e+00 + 3.0356270e+00 + 3.1146696e+00 + 3.1582389e+00 + 3.3275944e+00 + 3.3793165e+00 + 3.4122006e+00 + 3.4215823e+00 + 3.5315732e+00 + 3.6393002e+00 + 3.6732537e+00 + 3.9256462e+00 + 4.0498646e+00 + 4.2483348e+00 + 4.3440052e+00 + 4.3826531e+00 + 4.4230602e+00 + 4.6102443e+00 + 4.6881183e+00 + 4.9777333e+00 + 5.0359967e+00 + 5.0684536e+00 + 5.4161491e+00 + 5.4395623e+00 + 5.4563207e+00 + 5.5698458e+00 + 5.6015729e+00 + 5.6877617e+00 + 5.7215602e+00 + 5.8538914e+00 + 6.1978026e+00 + 6.3510941e+00 + 6.4797033e+00 + 6.7383791e+00 + 6.8637686e+00 + 7.0223387e+00 + 7.0782373e+00 + 7.1514232e+00 + 7.4664023e+00 + 7.5973874e+00 + 7.7440717e+00 + 7.7729662e+00 + 7.8264514e+00 + 7.9306356e+00 diff --git a/test/lin-reg/output.dat b/test/lin-reg/output.dat new file mode 100644 index 0000000..1f4f963 --- /dev/null +++ b/test/lin-reg/output.dat @@ -0,0 +1,50 @@ + 7.7918926e-01 + 9.1596757e-01 + 9.0538354e-01 + 9.0566138e-01 + 9.3898890e-01 + 9.6684740e-01 + 9.6436824e-01 + 9.1445939e-01 + 9.3933944e-01 + 9.6074971e-01 + 8.9837094e-01 + 9.1209739e-01 + 9.4238499e-01 + 9.6624578e-01 + 1.0526500e+00 + 1.0143791e+00 + 9.5969426e-01 + 9.6853716e-01 + 1.0766065e+00 + 1.1454978e+00 + 1.0340625e+00 + 1.0070009e+00 + 9.6683648e-01 + 1.0895919e+00 + 1.0634462e+00 + 1.1237239e+00 + 1.0323374e+00 + 1.0874452e+00 + 1.0702988e+00 + 1.1606493e+00 + 1.0778037e+00 + 1.1069758e+00 + 1.0971875e+00 + 1.1648603e+00 + 1.1411796e+00 + 1.0844156e+00 + 1.1252493e+00 + 1.1168341e+00 + 1.1970789e+00 + 1.2069462e+00 + 1.1251046e+00 + 1.1235672e+00 + 1.2132829e+00 + 1.2522652e+00 + 1.2497065e+00 + 1.1799706e+00 + 1.1897299e+00 + 1.3029934e+00 + 1.2601134e+00 + 1.2562267e+00 diff --git a/test/lin_reg/exptected.txt b/test/lin_reg/exptected.txt deleted file mode 100644 index 13de1fc..0000000 --- a/test/lin_reg/exptected.txt +++ /dev/null @@ -1,3 +0,0 @@ -ran for 2527 iterations - R^2=0.858063223720823 -{0=>0.7501625304145768, 1=>0.06388116702419537} diff --git a/test/lin_reg/input.dat b/test/lin_reg/input.dat deleted file mode 100644 index 3d93394..0000000 --- a/test/lin_reg/input.dat +++ /dev/null @@ -1,50 +0,0 @@ - 2.0658746e+00 - 2.3684087e+00 - 2.5399929e+00 - 2.5420804e+00 - 2.5490790e+00 - 2.7866882e+00 - 2.9116825e+00 - 3.0356270e+00 - 3.1146696e+00 - 3.1582389e+00 - 3.3275944e+00 - 3.3793165e+00 - 3.4122006e+00 - 3.4215823e+00 - 3.5315732e+00 - 3.6393002e+00 - 3.6732537e+00 - 3.9256462e+00 - 4.0498646e+00 - 4.2483348e+00 - 4.3440052e+00 - 4.3826531e+00 - 4.4230602e+00 - 4.6102443e+00 - 4.6881183e+00 - 4.9777333e+00 - 5.0359967e+00 - 5.0684536e+00 - 5.4161491e+00 - 5.4395623e+00 - 5.4563207e+00 - 5.5698458e+00 - 5.6015729e+00 - 5.6877617e+00 - 5.7215602e+00 - 5.8538914e+00 - 6.1978026e+00 - 6.3510941e+00 - 6.4797033e+00 - 6.7383791e+00 - 6.8637686e+00 - 7.0223387e+00 - 7.0782373e+00 - 7.1514232e+00 - 7.4664023e+00 - 7.5973874e+00 - 7.7440717e+00 - 7.7729662e+00 - 7.8264514e+00 - 7.9306356e+00 diff --git a/test/lin_reg/output.dat b/test/lin_reg/output.dat deleted file mode 100644 index 1f4f963..0000000 --- a/test/lin_reg/output.dat +++ /dev/null @@ -1,50 +0,0 @@ - 7.7918926e-01 - 9.1596757e-01 - 9.0538354e-01 - 9.0566138e-01 - 9.3898890e-01 - 9.6684740e-01 - 9.6436824e-01 - 9.1445939e-01 - 9.3933944e-01 - 9.6074971e-01 - 8.9837094e-01 - 9.1209739e-01 - 9.4238499e-01 - 9.6624578e-01 - 1.0526500e+00 - 1.0143791e+00 - 9.5969426e-01 - 9.6853716e-01 - 1.0766065e+00 - 1.1454978e+00 - 1.0340625e+00 - 1.0070009e+00 - 9.6683648e-01 - 1.0895919e+00 - 1.0634462e+00 - 1.1237239e+00 - 1.0323374e+00 - 1.0874452e+00 - 1.0702988e+00 - 1.1606493e+00 - 1.0778037e+00 - 1.1069758e+00 - 1.0971875e+00 - 1.1648603e+00 - 1.1411796e+00 - 1.0844156e+00 - 1.1252493e+00 - 1.1168341e+00 - 1.1970789e+00 - 1.2069462e+00 - 1.1251046e+00 - 1.1235672e+00 - 1.2132829e+00 - 1.2522652e+00 - 1.2497065e+00 - 1.1799706e+00 - 1.1897299e+00 - 1.3029934e+00 - 1.2601134e+00 - 1.2562267e+00 diff --git a/test/log-reg/expected.txt b/test/log-reg/expected.txt new file mode 100644 index 0000000..46a03ef --- /dev/null +++ b/test/log-reg/expected.txt @@ -0,0 +1,2 @@ +ran for 15 iterations +Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473] diff --git a/test/log-reg/input.dat b/test/log-reg/input.dat new file mode 100644 index 0000000..eed0ab1 --- /dev/null +++ b/test/log-reg/input.dat @@ -0,0 +1,80 @@ + 5.5500000e+01 6.9500000e+01 + 4.1000000e+01 8.1500000e+01 + 5.3500000e+01 8.6000000e+01 + 4.6000000e+01 8.4000000e+01 + 4.1000000e+01 7.3500000e+01 + 5.1500000e+01 6.9000000e+01 + 5.1000000e+01 6.2500000e+01 + 4.2000000e+01 7.5000000e+01 + 5.3500000e+01 8.3000000e+01 + 5.7500000e+01 7.1000000e+01 + 4.2500000e+01 7.2500000e+01 + 4.1000000e+01 8.0000000e+01 + 4.6000000e+01 8.2000000e+01 + 4.6000000e+01 6.0500000e+01 + 4.9500000e+01 7.6000000e+01 + 4.1000000e+01 7.6000000e+01 + 4.8500000e+01 7.2500000e+01 + 5.1500000e+01 8.2500000e+01 + 4.4500000e+01 7.0500000e+01 + 4.4000000e+01 6.6000000e+01 + 3.3000000e+01 7.6500000e+01 + 3.3500000e+01 7.8500000e+01 + 3.1500000e+01 7.2000000e+01 + 3.3000000e+01 8.1500000e+01 + 4.2000000e+01 5.9500000e+01 + 3.0000000e+01 6.4000000e+01 + 6.1000000e+01 4.5000000e+01 + 4.9000000e+01 7.9000000e+01 + 2.6500000e+01 6.4500000e+01 + 3.4000000e+01 7.1500000e+01 + 4.2000000e+01 8.3500000e+01 + 2.9500000e+01 7.4500000e+01 + 3.9500000e+01 7.0000000e+01 + 5.1500000e+01 6.6000000e+01 + 4.1500000e+01 7.1500000e+01 + 4.2500000e+01 7.9500000e+01 + 3.5000000e+01 5.9500000e+01 + 3.8500000e+01 7.3500000e+01 + 3.2000000e+01 8.1500000e+01 + 4.6000000e+01 6.0500000e+01 + 3.6500000e+01 5.3000000e+01 + 3.6500000e+01 5.3500000e+01 + 2.4000000e+01 6.0500000e+01 + 1.9000000e+01 5.7500000e+01 + 3.4500000e+01 6.0000000e+01 + 3.7500000e+01 6.4500000e+01 + 3.5500000e+01 5.1000000e+01 + 3.7000000e+01 5.0500000e+01 + 2.1500000e+01 4.2000000e+01 + 3.5500000e+01 5.8500000e+01 + 2.6500000e+01 6.8500000e+01 + 2.6500000e+01 5.5500000e+01 + 1.8500000e+01 6.7000000e+01 + 4.0000000e+01 6.7000000e+01 + 3.2500000e+01 7.1500000e+01 + 3.9000000e+01 7.1500000e+01 + 4.3000000e+01 5.5500000e+01 + 2.2000000e+01 5.4000000e+01 + 3.6000000e+01 6.2500000e+01 + 3.1000000e+01 5.5500000e+01 + 3.8500000e+01 7.6000000e+01 + 4.0000000e+01 7.5000000e+01 + 3.7500000e+01 6.3000000e+01 + 2.4500000e+01 5.8000000e+01 + 3.0000000e+01 6.7000000e+01 + 3.3000000e+01 5.6000000e+01 + 5.6500000e+01 6.1000000e+01 + 4.1000000e+01 5.7000000e+01 + 4.9500000e+01 6.3000000e+01 + 3.4500000e+01 7.2500000e+01 + 3.2500000e+01 6.9000000e+01 + 3.6000000e+01 7.3000000e+01 + 2.7000000e+01 5.3500000e+01 + 4.1000000e+01 6.3500000e+01 + 2.9500000e+01 5.2500000e+01 + 2.0000000e+01 6.5500000e+01 + 3.8000000e+01 6.5000000e+01 + 1.8500000e+01 7.4500000e+01 + 1.6000000e+01 7.2500000e+01 + 3.3500000e+01 6.8000000e+01 diff --git a/test/log-reg/output.dat b/test/log-reg/output.dat new file mode 100644 index 0000000..51283c0 --- /dev/null +++ b/test/log-reg/output.dat @@ -0,0 +1,80 @@ + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 diff --git a/test/log_reg/expected.txt b/test/log_reg/expected.txt deleted file mode 100644 index 46a03ef..0000000 --- a/test/log_reg/expected.txt +++ /dev/null @@ -1,2 +0,0 @@ -ran for 15 iterations -Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473] diff --git a/test/log_reg/input.dat b/test/log_reg/input.dat deleted file mode 100644 index eed0ab1..0000000 --- a/test/log_reg/input.dat +++ /dev/null @@ -1,80 +0,0 @@ - 5.5500000e+01 6.9500000e+01 - 4.1000000e+01 8.1500000e+01 - 5.3500000e+01 8.6000000e+01 - 4.6000000e+01 8.4000000e+01 - 4.1000000e+01 7.3500000e+01 - 5.1500000e+01 6.9000000e+01 - 5.1000000e+01 6.2500000e+01 - 4.2000000e+01 7.5000000e+01 - 5.3500000e+01 8.3000000e+01 - 5.7500000e+01 7.1000000e+01 - 4.2500000e+01 7.2500000e+01 - 4.1000000e+01 8.0000000e+01 - 4.6000000e+01 8.2000000e+01 - 4.6000000e+01 6.0500000e+01 - 4.9500000e+01 7.6000000e+01 - 4.1000000e+01 7.6000000e+01 - 4.8500000e+01 7.2500000e+01 - 5.1500000e+01 8.2500000e+01 - 4.4500000e+01 7.0500000e+01 - 4.4000000e+01 6.6000000e+01 - 3.3000000e+01 7.6500000e+01 - 3.3500000e+01 7.8500000e+01 - 3.1500000e+01 7.2000000e+01 - 3.3000000e+01 8.1500000e+01 - 4.2000000e+01 5.9500000e+01 - 3.0000000e+01 6.4000000e+01 - 6.1000000e+01 4.5000000e+01 - 4.9000000e+01 7.9000000e+01 - 2.6500000e+01 6.4500000e+01 - 3.4000000e+01 7.1500000e+01 - 4.2000000e+01 8.3500000e+01 - 2.9500000e+01 7.4500000e+01 - 3.9500000e+01 7.0000000e+01 - 5.1500000e+01 6.6000000e+01 - 4.1500000e+01 7.1500000e+01 - 4.2500000e+01 7.9500000e+01 - 3.5000000e+01 5.9500000e+01 - 3.8500000e+01 7.3500000e+01 - 3.2000000e+01 8.1500000e+01 - 4.6000000e+01 6.0500000e+01 - 3.6500000e+01 5.3000000e+01 - 3.6500000e+01 5.3500000e+01 - 2.4000000e+01 6.0500000e+01 - 1.9000000e+01 5.7500000e+01 - 3.4500000e+01 6.0000000e+01 - 3.7500000e+01 6.4500000e+01 - 3.5500000e+01 5.1000000e+01 - 3.7000000e+01 5.0500000e+01 - 2.1500000e+01 4.2000000e+01 - 3.5500000e+01 5.8500000e+01 - 2.6500000e+01 6.8500000e+01 - 2.6500000e+01 5.5500000e+01 - 1.8500000e+01 6.7000000e+01 - 4.0000000e+01 6.7000000e+01 - 3.2500000e+01 7.1500000e+01 - 3.9000000e+01 7.1500000e+01 - 4.3000000e+01 5.5500000e+01 - 2.2000000e+01 5.4000000e+01 - 3.6000000e+01 6.2500000e+01 - 3.1000000e+01 5.5500000e+01 - 3.8500000e+01 7.6000000e+01 - 4.0000000e+01 7.5000000e+01 - 3.7500000e+01 6.3000000e+01 - 2.4500000e+01 5.8000000e+01 - 3.0000000e+01 6.7000000e+01 - 3.3000000e+01 5.6000000e+01 - 5.6500000e+01 6.1000000e+01 - 4.1000000e+01 5.7000000e+01 - 4.9500000e+01 6.3000000e+01 - 3.4500000e+01 7.2500000e+01 - 3.2500000e+01 6.9000000e+01 - 3.6000000e+01 7.3000000e+01 - 2.7000000e+01 5.3500000e+01 - 4.1000000e+01 6.3500000e+01 - 2.9500000e+01 5.2500000e+01 - 2.0000000e+01 6.5500000e+01 - 3.8000000e+01 6.5000000e+01 - 1.8500000e+01 7.4500000e+01 - 1.6000000e+01 7.2500000e+01 - 3.3500000e+01 6.8000000e+01 diff --git a/test/log_reg/output.dat b/test/log_reg/output.dat deleted file mode 100644 index 51283c0..0000000 --- a/test/log_reg/output.dat +++ /dev/null @@ -1,80 +0,0 @@ - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 1.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 - 0.0000000e+00 diff --git a/to-ascii b/to-ascii new file mode 100755 index 0000000..10fd1c2 --- /dev/null +++ b/to-ascii @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby + +while line = STDIN.gets + encoding_options = { + :invalid => :replace, + :undef => :replace, + :replace => '?', + :universal_newline => true + } + puts line.encode 'ASCII', encoding_options +end + diff --git a/to_ascii b/to_ascii deleted file mode 100755 index 10fd1c2..0000000 --- a/to_ascii +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env ruby - -while line = STDIN.gets - encoding_options = { - :invalid => :replace, - :undef => :replace, - :replace => '?', - :universal_newline => true - } - puts line.encode 'ASCII', encoding_options -end - diff --git a/toks-per-line b/toks-per-line new file mode 100755 index 0000000..012caac --- /dev/null +++ b/toks-per-line @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby + +uniq = false +uniq = true if ARGV[0] + +while line = STDIN.gets + a = line.strip.split + a.uniq! if uniq + a.sort! + puts a.join " " +end + diff --git a/toks_per_line b/toks_per_line deleted file mode 100755 index 012caac..0000000 --- a/toks_per_line +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env ruby - -uniq = false -uniq = true if ARGV[0] - -while line = STDIN.gets - a = line.strip.split - a.uniq! if uniq - a.sort! - puts a.join " " -end - diff --git a/train-test-split b/train-test-split new file mode 100755 index 0000000..4d8153a --- /dev/null +++ b/train-test-split @@ -0,0 +1,50 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +conf = Trollop::options do + opt :foreign, "foreign file", :type => :string, :required => true + opt :english, "english file", :type => :string, :required => true + opt :size, "one size", :type => :int, :required => true + opt :repeat, "number of repetitions", :type => :int, :default => 1 + opt :prefix, "prefix for output files", :type => :string +end +fn = conf[:foreign] +fn_ext = fn.split('.').last +f = ReadFile.readlines fn +en = conf[:english] +en_ext = en.split('.').last +e = ReadFile.readlines en +size = conf[:size] +nlines_f = `wc -l #{fn}`.split()[0].to_i +nlines_e = `wc -l #{en}`.split()[0].to_i +if nlines_f != nlines_e + STDERR.write "Unbalanced files (#{nlines_f} vs. #{nlines_e}), exiting!\n" + exit 1 +end + +prefix = conf[:prefix] +a = (0..nlines_e-1).to_a +i = 0 +conf[:repeat].times { + b = a.sample(size) + ax = a.reject{|j| b.include? j} + `mkdir split_#{i}` + new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}" + new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}" + ax.each { |j| + new_f.write f[j] + new_e.write e[j] + } + new_f.close; new_e.close + new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}" + new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}" + b.each { |j| + new_f.write f[j] + new_e.write e[j] + } + new_f.close; new_e.close + i += 1 +} + diff --git a/train_test_split b/train_test_split deleted file mode 100755 index 4d8153a..0000000 --- a/train_test_split +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -conf = Trollop::options do - opt :foreign, "foreign file", :type => :string, :required => true - opt :english, "english file", :type => :string, :required => true - opt :size, "one size", :type => :int, :required => true - opt :repeat, "number of repetitions", :type => :int, :default => 1 - opt :prefix, "prefix for output files", :type => :string -end -fn = conf[:foreign] -fn_ext = fn.split('.').last -f = ReadFile.readlines fn -en = conf[:english] -en_ext = en.split('.').last -e = ReadFile.readlines en -size = conf[:size] -nlines_f = `wc -l #{fn}`.split()[0].to_i -nlines_e = `wc -l #{en}`.split()[0].to_i -if nlines_f != nlines_e - STDERR.write "Unbalanced files (#{nlines_f} vs. #{nlines_e}), exiting!\n" - exit 1 -end - -prefix = conf[:prefix] -a = (0..nlines_e-1).to_a -i = 0 -conf[:repeat].times { - b = a.sample(size) - ax = a.reject{|j| b.include? j} - `mkdir split_#{i}` - new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}" - new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}" - ax.each { |j| - new_f.write f[j] - new_e.write e[j] - } - new_f.close; new_e.close - new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}" - new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}" - b.each { |j| - new_f.write f[j] - new_e.write e[j] - } - new_f.close; new_e.close - i += 1 -} - -- cgit v1.2.3