From f91c57877a60d69c8e779101e1c245656ee6bc8f Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 10 Nov 2017 22:28:48 +0100 Subject: rr: fix --- rr | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/rr b/rr index 30bf2d3..2faec7b 100755 --- a/rr +++ b/rr @@ -9,20 +9,37 @@ while line = STDIN.gets if cur_sz >= 1000 windows << cur cur = [] - else - cur << line.strip - cur_size += cur[-1].split.size + cur_sz = 0 end + cur << line.strip + cur_sz += cur.last.split.size end +enums = [0.0]*4 +denoms = [0.0]*4 windows.each { |w| - h = {} - h.default = 0 + ng_by_n = [{}]*4 w.each { |seg| ngrams(seg, 4) { |ng| - h[ng] += 1 + if ng_by_n[ng.size-1].has_key? ng + ng_by_n[ng.size-1][ng] += 1 + else + ng_by_n[ng.size-1][ng] = 1 + end } } - puts h + ng_by_n.each_with_index { |ng,j| + puts ng.to_s + singletons = ng.reject { |k,v| v > 1 }.size + enums[j] += ng.size - singletons + denoms[j] += ng.size.to_f + } +} + +rr = 1.0 +enums.each_with_index { |i,j| + rr *= i/denoms[j] } +puts ((rr**0.25)*100).round 2 + -- cgit v1.2.3