summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xrr31
1 files changed, 24 insertions, 7 deletions
diff --git a/rr b/rr
index 30bf2d3..2faec7b 100755
--- a/rr
+++ b/rr
@@ -9,20 +9,37 @@ while line = STDIN.gets
if cur_sz >= 1000
windows << cur
cur = []
- else
- cur << line.strip
- cur_size += cur[-1].split.size
+ cur_sz = 0
end
+ cur << line.strip
+ cur_sz += cur.last.split.size
end
+enums = [0.0]*4
+denoms = [0.0]*4
windows.each { |w|
- h = {}
- h.default = 0
+ ng_by_n = [{}]*4
w.each { |seg|
ngrams(seg, 4) { |ng|
- h[ng] += 1
+ if ng_by_n[ng.size-1].has_key? ng
+ ng_by_n[ng.size-1][ng] += 1
+ else
+ ng_by_n[ng.size-1][ng] = 1
+ end
}
}
- puts h
+ ng_by_n.each_with_index { |ng,j|
+ puts ng.to_s
+ singletons = ng.reject { |k,v| v > 1 }.size
+ enums[j] += ng.size - singletons
+ denoms[j] += ng.size.to_f
+ }
+}
+
+rr = 1.0
+enums.each_with_index { |i,j|
+ rr *= i/denoms[j]
}
+puts ((rr**0.25)*100).round 2
+