summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/stringutil.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
committerPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
commit4059a5d048cb0f72872c98073ef1ce120a30d78c (patch)
tree4fbff0dc62c5ef3deea0ffdec578e3f2c0ed74b6 /lib/nlp_ruby/stringutil.rb
parent912ff6aebcf4f89f9e64b5f59956dbf7d8f624e3 (diff)
renaming to zipf
Diffstat (limited to 'lib/nlp_ruby/stringutil.rb')
-rw-r--r--lib/nlp_ruby/stringutil.rb22
1 files changed, 0 insertions, 22 deletions
diff --git a/lib/nlp_ruby/stringutil.rb b/lib/nlp_ruby/stringutil.rb
deleted file mode 100644
index aa9be00..0000000
--- a/lib/nlp_ruby/stringutil.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-def tokenize s
- s.strip.split
-end
-
-def ngrams(s, n, fix=false)
- a = tokenize s
- a.each_with_index { |tok, i|
- tok.strip!
- 0.upto([n-1, a.size-i-1].min) { |m|
- yield a[i..i+m] if !fix||(fix&&a[i..i+m].size==n)
- }
- }
-end
-
-def bag_of_words s, stopwords=[]
- s.strip.split.uniq.sort.reject{ |w| stopwords.include? w }
-end
-
-def splitpipe s, n=3
- s.strip.split("|"*n)
-end
-