summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-11-10 16:39:41 +0100
committerPatrick Simianer <p@simianer.de>2017-11-10 16:39:41 +0100
commit93075eaf07540ee5c51a17371e3ddc1b680adda6 (patch)
tree39598121e16b5f2ea00c17eb305c973522408eb9
parent8840c569c862e10c79f30015875bae1e03ae38b7 (diff)
rr
-rw-r--r--rr28
1 files changed, 28 insertions, 0 deletions
diff --git a/rr b/rr
new file mode 100644
index 0000000..30bf2d3
--- /dev/null
+++ b/rr
@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+windows = []
+cur = []
+cur_sz = 0
+while line = STDIN.gets
+ if cur_sz >= 1000
+ windows << cur
+ cur = []
+ else
+ cur << line.strip
+ cur_size += cur[-1].split.size
+ end
+end
+
+windows.each { |w|
+ h = {}
+ h.default = 0
+ w.each { |seg|
+ ngrams(seg, 4) { |ng|
+ h[ng] += 1
+ }
+ }
+ puts h
+}
+