diff options
author | Patrick Simianer <p@simianer.de> | 2017-11-10 16:39:41 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-11-10 16:39:41 +0100 |
commit | 93075eaf07540ee5c51a17371e3ddc1b680adda6 (patch) | |
tree | 39598121e16b5f2ea00c17eb305c973522408eb9 | |
parent | 8840c569c862e10c79f30015875bae1e03ae38b7 (diff) |
rr
-rw-r--r-- | rr | 28 |
1 files changed, 28 insertions, 0 deletions
@@ -0,0 +1,28 @@ +#!/usr/bin/env ruby + +require 'zipf' + +windows = [] +cur = [] +cur_sz = 0 +while line = STDIN.gets + if cur_sz >= 1000 + windows << cur + cur = [] + else + cur << line.strip + cur_size += cur[-1].split.size + end +end + +windows.each { |w| + h = {} + h.default = 0 + w.each { |seg| + ngrams(seg, 4) { |ng| + h[ng] += 1 + } + } + puts h +} + |