diff options
author | Patrick Simianer <p@simianer.de> | 2017-11-11 12:04:57 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-11-11 12:04:57 +0100 |
commit | 4bf6ab567b2358122139130dc02932048a2882e8 (patch) | |
tree | 41df47bf583652f7346338bb91f7a7a34272a73b /rr | |
parent | c9c9f14ee768be723013ad850473541fabfdbe13 (diff) |
repetition rate
Diffstat (limited to 'rr')
-rwxr-xr-x | rr | 44 |
1 files changed, 0 insertions, 44 deletions
@@ -1,44 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -windows = [] -cur = [] -cur_sz = 0 -while line = STDIN.gets - if cur_sz >= 1000 - windows << cur - cur = [] - cur_sz = 0 - end - cur << line.strip - cur_sz += cur.last.split.size -end - -enums = [0.0]*4 -denoms = [0.0]*4 -windows.each { |w| - ng_by_n = [{}]*4 - w.each { |seg| - ngrams(seg, 4) { |ng| - if ng_by_n[ng.size-1].has_key? ng - ng_by_n[ng.size-1][ng] += 1 - else - ng_by_n[ng.size-1][ng] = 1 - end - } - } - ng_by_n.each_with_index { |ng,j| - singletons = ng.reject { |k,v| v > 1 }.size - enums[j] += ng.size - singletons - denoms[j] += ng.size.to_f - } -} - -rr = 1.0 -enums.each_with_index { |i,j| - rr *= i/denoms[j] -} - -puts ((rr**0.25)*100).round 2 - |