summaryrefslogtreecommitdiff
path: root/overlapping_rules/word_pair_keys.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/word_pair_keys.rb
init
Diffstat (limited to 'overlapping_rules/word_pair_keys.rb')
-rwxr-xr-xoverlapping_rules/word_pair_keys.rb19
1 files changed, 19 insertions, 0 deletions
diff --git a/overlapping_rules/word_pair_keys.rb b/overlapping_rules/word_pair_keys.rb
new file mode 100755
index 0000000..8b54441
--- /dev/null
+++ b/overlapping_rules/word_pair_keys.rb
@@ -0,0 +1,19 @@
+#!/usr/bin/env ruby
+
+require_relative './util.rb'
+
+
+fn = 'newstest2008-grammar+index'
+approx_lines_per_shard = 12285856#/23
+STDOUT.sync = true
+
+i = 0
+read_rules_from_file1(STDIN, 'stdin') { |r|
+ i += 1
+ shard_percentage = ((i*100.0)/approx_lines_per_shard).round 2
+ STDERR.write "reporter:status:word pair key ##{i} #{shard_percentage}%\n"
+ r.fe_word_pairs.each { |p|
+ puts "#{p}\t#{r.id} ||| #{r.to_s}"
+ }
+}
+