diff options
author | Patrick Simianer <p@simianer.de> | 2014-01-29 19:14:08 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-01-29 19:14:08 +0100 |
commit | 68acbb9a0c7967cb90a7e3756fc94fdd8a73d154 (patch) | |
tree | 3b445131dcb203e94473ae1d8aa82a1798585276 /ng | |
parent | 49158e721bfaf6423dca9fc633873218f691c83a (diff) |
make use of nlp_ruby, LICENSE
Diffstat (limited to 'ng')
-rwxr-xr-x | ng | 44 |
1 files changed, 12 insertions, 32 deletions
@@ -1,39 +1,19 @@ #!/usr/bin/env ruby -def ngrams_it(s, n, fix=false) - a = s.strip.split - a.each_with_index { |tok, i| - tok.strip! - 0.upto([n-1, a.size-i-1].min) { |m| - yield a[i..i+m] if !(fix||(a[i..i+m].size>n)) - } - } -end - -def main(n, fix, sep) - STDIN.set_encoding 'utf-8' - STDOUT.set_encoding 'utf-8' - while line = STDIN.gets - a = [] - ngrams_it(line, n, fix) {|ng| a << ng.join(' ')} - a.reject! {|i| i.strip.size==0 } - puts a.join sep if a.size > 0 - end -end +require 'nlp_ruby' +require 'trollop' -def usage - STDERR.write "./ng [-n <n>] [--fix] [--separator <s>] < <one number per line>\n" - exit 1 +cfg = Trollop::options do + banner "ng < <input>" + opt :n, "n for Ngrams", :type => :int, :default => 4 + opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false + opt :separator, "separte ngrams of a line by this string", :type => :string, :default => "\n" end -if __FILE__ == $0 - require 'trollop' - opts = Trollop::options do - opt :n, "Ngrams", :type => :int, :default => 4 - opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false - opt :separator, "separte ngrams of a line by this string", :type => :string, :default => "\n" - end - usage if not [0,2,4,6].include? ARGV.size - main(opts[:n], opts[:fix], opts[:separator]) +while line = STDIN.gets + a = [] + ngrams(line, cfg[:n], cfg[:fix]) { |ng| a << ng.join(' ') } + a.reject! { |i| i.strip.size==0 } + puts a.join cfg[:separator] if a.size>0 end |