From 26c490f404731d053a6205719b6246502c07b449 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 14 Jun 2014 16:46:27 +0200 Subject: init --- overlapping_rules/word_pair_keys.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 overlapping_rules/word_pair_keys.rb (limited to 'overlapping_rules/word_pair_keys.rb') diff --git a/overlapping_rules/word_pair_keys.rb b/overlapping_rules/word_pair_keys.rb new file mode 100755 index 0000000..8b54441 --- /dev/null +++ b/overlapping_rules/word_pair_keys.rb @@ -0,0 +1,19 @@ +#!/usr/bin/env ruby + +require_relative './util.rb' + + +fn = 'newstest2008-grammar+index' +approx_lines_per_shard = 12285856#/23 +STDOUT.sync = true + +i = 0 +read_rules_from_file1(STDIN, 'stdin') { |r| + i += 1 + shard_percentage = ((i*100.0)/approx_lines_per_shard).round 2 + STDERR.write "reporter:status:word pair key ##{i} #{shard_percentage}%\n" + r.fe_word_pairs.each { |p| + puts "#{p}\t#{r.id} ||| #{r.to_s}" + } +} + -- cgit v1.2.3