From 93075eaf07540ee5c51a17371e3ddc1b680adda6 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Fri, 10 Nov 2017 16:39:41 +0100 Subject: rr --- rr | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 rr diff --git a/rr b/rr new file mode 100644 index 0000000..30bf2d3 --- /dev/null +++ b/rr @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby + +require 'zipf' + +windows = [] +cur = [] +cur_sz = 0 +while line = STDIN.gets + if cur_sz >= 1000 + windows << cur + cur = [] + else + cur << line.strip + cur_size += cur[-1].split.size + end +end + +windows.each { |w| + h = {} + h.default = 0 + w.each { |seg| + ngrams(seg, 4) { |ng| + h[ng] += 1 + } + } + puts h +} + -- cgit v1.2.3