diff options
author | Patrick Simianer <p@simianer.de> | 2017-11-10 22:28:48 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-11-10 22:28:48 +0100 |
commit | f91c57877a60d69c8e779101e1c245656ee6bc8f (patch) | |
tree | c0fabd8f4054978c8a7207955d89d89733516193 | |
parent | 114d9eeb7cfc90061026bc09181abf0614fe485e (diff) |
rr: fix
-rwxr-xr-x | rr | 31 |
1 files changed, 24 insertions, 7 deletions
@@ -9,20 +9,37 @@ while line = STDIN.gets if cur_sz >= 1000 windows << cur cur = [] - else - cur << line.strip - cur_size += cur[-1].split.size + cur_sz = 0 end + cur << line.strip + cur_sz += cur.last.split.size end +enums = [0.0]*4 +denoms = [0.0]*4 windows.each { |w| - h = {} - h.default = 0 + ng_by_n = [{}]*4 w.each { |seg| ngrams(seg, 4) { |ng| - h[ng] += 1 + if ng_by_n[ng.size-1].has_key? ng + ng_by_n[ng.size-1][ng] += 1 + else + ng_by_n[ng.size-1][ng] = 1 + end } } - puts h + ng_by_n.each_with_index { |ng,j| + puts ng.to_s + singletons = ng.reject { |k,v| v > 1 }.size + enums[j] += ng.size - singletons + denoms[j] += ng.size.to_f + } +} + +rr = 1.0 +enums.each_with_index { |i,j| + rr *= i/denoms[j] } +puts ((rr**0.25)*100).round 2 + |