From 68acbb9a0c7967cb90a7e3756fc94fdd8a73d154 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 29 Jan 2014 19:14:08 +0100 Subject: make use of nlp_ruby, LICENSE --- ng | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) (limited to 'ng') diff --git a/ng b/ng index de314b8..dbc59eb 100755 --- a/ng +++ b/ng @@ -1,39 +1,19 @@ #!/usr/bin/env ruby -def ngrams_it(s, n, fix=false) - a = s.strip.split - a.each_with_index { |tok, i| - tok.strip! - 0.upto([n-1, a.size-i-1].min) { |m| - yield a[i..i+m] if !(fix||(a[i..i+m].size>n)) - } - } -end - -def main(n, fix, sep) - STDIN.set_encoding 'utf-8' - STDOUT.set_encoding 'utf-8' - while line = STDIN.gets - a = [] - ngrams_it(line, n, fix) {|ng| a << ng.join(' ')} - a.reject! {|i| i.strip.size==0 } - puts a.join sep if a.size > 0 - end -end +require 'nlp_ruby' +require 'trollop' -def usage - STDERR.write "./ng [-n ] [--fix] [--separator ] < \n" - exit 1 +cfg = Trollop::options do + banner "ng < " + opt :n, "n for Ngrams", :type => :int, :default => 4 + opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false + opt :separator, "separte ngrams of a line by this string", :type => :string, :default => "\n" end -if __FILE__ == $0 - require 'trollop' - opts = Trollop::options do - opt :n, "Ngrams", :type => :int, :default => 4 - opt :fix, "Don't output lower order Ngrams.", :type => :bool, :default => false - opt :separator, "separte ngrams of a line by this string", :type => :string, :default => "\n" - end - usage if not [0,2,4,6].include? ARGV.size - main(opts[:n], opts[:fix], opts[:separator]) +while line = STDIN.gets + a = [] + ngrams(line, cfg[:n], cfg[:fix]) { |ng| a << ng.join(' ') } + a.reject! { |i| i.strip.size==0 } + puts a.join cfg[:separator] if a.size>0 end -- cgit v1.2.3