summaryrefslogtreecommitdiff
path: root/rr
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-11-11 12:04:57 +0100
committerPatrick Simianer <p@simianer.de>2017-11-11 12:04:57 +0100
commit4bf6ab567b2358122139130dc02932048a2882e8 (patch)
tree41df47bf583652f7346338bb91f7a7a34272a73b /rr
parentc9c9f14ee768be723013ad850473541fabfdbe13 (diff)
repetition rate
Diffstat (limited to 'rr')
-rwxr-xr-xrr44
1 files changed, 0 insertions, 44 deletions
diff --git a/rr b/rr
deleted file mode 100755
index 87938ae..0000000
--- a/rr
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-
-windows = []
-cur = []
-cur_sz = 0
-while line = STDIN.gets
- if cur_sz >= 1000
- windows << cur
- cur = []
- cur_sz = 0
- end
- cur << line.strip
- cur_sz += cur.last.split.size
-end
-
-enums = [0.0]*4
-denoms = [0.0]*4
-windows.each { |w|
- ng_by_n = [{}]*4
- w.each { |seg|
- ngrams(seg, 4) { |ng|
- if ng_by_n[ng.size-1].has_key? ng
- ng_by_n[ng.size-1][ng] += 1
- else
- ng_by_n[ng.size-1][ng] = 1
- end
- }
- }
- ng_by_n.each_with_index { |ng,j|
- singletons = ng.reject { |k,v| v > 1 }.size
- enums[j] += ng.size - singletons
- denoms[j] += ng.size.to_f
- }
-}
-
-rr = 1.0
-enums.each_with_index { |i,j|
- rr *= i/denoms[j]
-}
-
-puts ((rr**0.25)*100).round 2
-