diff options
author | Patrick Simianer <p@simianer.de> | 2014-06-16 17:44:07 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-06-16 17:44:07 +0200 |
commit | 4059a5d048cb0f72872c98073ef1ce120a30d78c (patch) | |
tree | 4fbff0dc62c5ef3deea0ffdec578e3f2c0ed74b6 /lib/nlp_ruby/stringutil.rb | |
parent | 912ff6aebcf4f89f9e64b5f59956dbf7d8f624e3 (diff) |
renaming to zipf
Diffstat (limited to 'lib/nlp_ruby/stringutil.rb')
-rw-r--r-- | lib/nlp_ruby/stringutil.rb | 22 |
1 files changed, 0 insertions, 22 deletions
diff --git a/lib/nlp_ruby/stringutil.rb b/lib/nlp_ruby/stringutil.rb deleted file mode 100644 index aa9be00..0000000 --- a/lib/nlp_ruby/stringutil.rb +++ /dev/null @@ -1,22 +0,0 @@ -def tokenize s - s.strip.split -end - -def ngrams(s, n, fix=false) - a = tokenize s - a.each_with_index { |tok, i| - tok.strip! - 0.upto([n-1, a.size-i-1].min) { |m| - yield a[i..i+m] if !fix||(fix&&a[i..i+m].size==n) - } - } -end - -def bag_of_words s, stopwords=[] - s.strip.split.uniq.sort.reject{ |w| stopwords.include? w } -end - -def splitpipe s, n=3 - s.strip.split("|"*n) -end - |