diff options
author | Patrick Simianer <p@simianer.de> | 2013-12-05 07:56:38 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-12-05 07:56:38 +0100 |
commit | db6a6ecfa350cae29739c59df1210d8f76a479c9 (patch) | |
tree | f137a001f57f170455c28ce97b5abb2726006cf6 /ng |
init
Diffstat (limited to 'ng')
-rwxr-xr-x | ng | 39 |
1 files changed, 39 insertions, 0 deletions
@@ -0,0 +1,39 @@ +#!/usr/bin/env ruby + +def ngrams_it(s, n, fix=false) + a = s.strip.split + a.each_with_index { |tok, i| + tok.strip! + 0.upto([n-1, a.size-i-1].min) { |m| + yield a[i..i+m] if !(fix^(a[i..i+m].size==n)) + } + } +end + +def main(n, fix, sep) + STDIN.set_encoding 'utf-8' + STDOUT.set_encoding 'utf-8' + while line = STDIN.gets + a = [] + ngrams_it(line, n, fix) {|ng| a << ng.join(' ')} + a.reject! {|i| i.strip.size==0 } + puts a.join sep if a.size > 0 + end +end + +def usage + STDERR.write "./ng [-n <n>] [--fix] [--separator <s>] < <one number per line>\n" + exit 1 +end + +if __FILE__ == $0 + require 'trollop' + opts = Trollop::options do + opt :n, "Ngrams", :type => :int, :default => 4 + opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => true + opt :separator, "separte ngrams of a line by this string", :type => :string, :default => "\n" + end + usage if not [0,2,4,6].include? ARGV.size + main(opts[:n], opts[:fix], opts[:separator]) +end + |