diff options
-rwxr-xr-x | rr | 31 |
1 files changed, 24 insertions, 7 deletions
@@ -9,20 +9,37 @@ while line = STDIN.gets if cur_sz >= 1000 windows << cur cur = [] - else - cur << line.strip - cur_size += cur[-1].split.size + cur_sz = 0 end + cur << line.strip + cur_sz += cur.last.split.size end +enums = [0.0]*4 +denoms = [0.0]*4 windows.each { |w| - h = {} - h.default = 0 + ng_by_n = [{}]*4 w.each { |seg| ngrams(seg, 4) { |ng| - h[ng] += 1 + if ng_by_n[ng.size-1].has_key? ng + ng_by_n[ng.size-1][ng] += 1 + else + ng_by_n[ng.size-1][ng] = 1 + end } } - puts h + ng_by_n.each_with_index { |ng,j| + puts ng.to_s + singletons = ng.reject { |k,v| v > 1 }.size + enums[j] += ng.size - singletons + denoms[j] += ng.size.to_f + } +} + +rr = 1.0 +enums.each_with_index { |i,j| + rr *= i/denoms[j] } +puts ((rr**0.25)*100).round 2 + |