summaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/cjk.py46
-rwxr-xr-xpython/cmp.py16
-rw-r--r--python/finally.py10
-rwxr-xr-xpython/kwargs.py10
-rwxr-xr-xpython/kwargs1.py10
-rw-r--r--python/linear-log.py11
-rwxr-xr-xpython/linked_list.py31
-rwxr-xr-xpython/lowercase.py8
-rwxr-xr-xpython/mapred.py7
-rw-r--r--python/mp.py31
-rwxr-xr-xpython/pairs.py7
-rw-r--r--python/psb.py9
-rwxr-xr-xpython/script_path.py7
-rw-r--r--python/shards.py23
-rw-r--r--python/sum-first-n.py10
-rw-r--r--python/transpose.py11
-rwxr-xr-xpython/uniform.py8
17 files changed, 255 insertions, 0 deletions
diff --git a/python/cjk.py b/python/cjk.py
new file mode 100644
index 0000000..9a417d1
--- /dev/null
+++ b/python/cjk.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Chinese
+ranges = [{"from":"\u4E00", "to":"\u9FFF"}, # CJK Unified Ideographs
+ {"from":"\u3400", "to":"\u4DBF"}, # CJK Unified Ideographs Extension A
+ #{"from":"\u20000", "to":"\u2A6DF"}, # CJK Unified Ideographs Extension B
+ #{"from":"\u2A700", "to":"\u2B73F"}, # CJK Unified Ideographs Extension C
+ #{"from":"\u2B740", "to":"\u2B81F"}, # CJK Unified Ideographs Extension D
+ #{"from":"\u2B820", "to":"\u2CEAF"}, # CJK Unified Ideographs Extension E
+ #{"from":"\u2B820", "to":"\u2CEAF"}, # CJK Unified Ideographs Extension F
+ {"from":"\u2E80", "to":"\u2EFF"}, # CJK Radicals Supplement
+ {"from":"\u2F00", "to":"\u2FDF"}, # Kangxi Radicals
+ {"from":"\u2FF0", "to":"\u2FFF"}, # Ideographic Description Characters
+ {"from":"\u31C0", "to":"\u31EF"}, # CJK Strokes
+ {"from":"\u3200", "to":"\u32FF"}, # Enclosed CJK Letters and Months
+ {"from":"\u3300", "to":"\u33FF"}, # CJK Compatibility
+ {"from":"\uF900", "to":"\uFAFF"}, # CJK Compatibility Ideographs
+ #{"from":"\u1F200", "to":"\u1F2FF"}, # Enclosed Ideographic Supplement
+ #{"from":"\u2F800", "to":"\u2FA1F"} # CJK Compatibility Ideographs Supplement
+ ]
+
+# Japanese
+ranges.extend([{"from":"\u3040", "to":"\u309F"}, # Hiragana
+ #{"from":"\u1B100", "to":"\u1B12F"}, # Kana Extended-A
+ #{"from":"\u1B000", "to":"\u1B0FF"}, # Kana Supplement
+ {"from":"\u30A0", "to":"\u30FF"}, # Katakana
+ {"from":"\u31F0", "to":"\u31FF"}]) # Katakana Phonetic Extensions
+
+# Korean
+ranges.extend([{"from":"\u1100", "to":"\u11FF"}, # Hangul Jamo
+ {"from":"\uA960", "to":"\uA97F"}, # Hangul Jamo Extended-A
+ {"from":"\uD7B0", "to":"\uD7FF"}, # Hangul Jamo Extended-B
+ {"from":"\u3130", "to":"\u318F"}, # Hangul Compatibility Jamo
+ {"from":"\uAC00", "to":"\uD7AF"}]) # Hangul Syllables
+
+# Punctuation, etc.
+ranges.extend([{"from":"\u3000", "to":"\u303F"}, # CJK Symbols and Punctuation
+ #{"from":"\u16FE0", "to":"\u16FFF"}, # Ideographic Symbols and Punctuation
+ {"from":"\uFE30", "to":"\uFE4F"}, # CJK Compatibility Forms
+ {"from":"\uFE50", "to":"\uFE6F"}, # Small Form Variants
+ {"from":"\uFE10", "to":"\uFE1F"}, # Vertical Forms
+ {"from":"\uFF00", "to":"\uFFEF"}]) # Halfwidth and Fullwidth Forms
+
+def is_cjk(char):
+ return any([ord(range["from"]) <= ord(char) <= ord(range["to"]) for range in ranges])
+
diff --git a/python/cmp.py b/python/cmp.py
new file mode 100755
index 0000000..9bc6871
--- /dev/null
+++ b/python/cmp.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python2
+
+
+class A:
+ def __init__(self, a):
+ self.a = a
+
+ def __cmp__(self, b):
+ if self.a==b.a: return 0
+ return 1
+
+a = A("a")
+b = A("b")
+c = A("a")
+print a == c
+
diff --git a/python/finally.py b/python/finally.py
new file mode 100644
index 0000000..a6f14ab
--- /dev/null
+++ b/python/finally.py
@@ -0,0 +1,10 @@
+def f():
+ try:
+ raise Exception
+ except:
+ return
+ finally:
+ print("exit")
+
+f()
+
diff --git a/python/kwargs.py b/python/kwargs.py
new file mode 100755
index 0000000..d1a81ea
--- /dev/null
+++ b/python/kwargs.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python2
+
+
+def fun(*args, **kwargs):
+ print args
+ print kwargs
+
+if __name__=="__main__":
+ fun(1,2,3,4,a=2,b=4,c=4)
+
diff --git a/python/kwargs1.py b/python/kwargs1.py
new file mode 100755
index 0000000..0917965
--- /dev/null
+++ b/python/kwargs1.py
@@ -0,0 +1,10 @@
+def a(x, y=None, z=3):
+ print(x)
+ print(y)
+ print(z)
+
+def b(*args, **kwargs):
+ return a(*args, **kwargs)
+
+b(2, y=-1, z=1)
+
diff --git a/python/linear-log.py b/python/linear-log.py
new file mode 100644
index 0000000..6863b2c
--- /dev/null
+++ b/python/linear-log.py
@@ -0,0 +1,11 @@
+import math
+
+def f(n):
+ x = n/1000.0
+ if x > 10:
+ x = 10 + math.log(x**3)
+ return min(100, max(1, int(x)))
+
+for i in [1,5,10,500,1000,5000,10000,50000,100000,500000,1000000,10000000]:
+ print("%d --- %d"%(i,f(i)))
+
diff --git a/python/linked_list.py b/python/linked_list.py
new file mode 100755
index 0000000..8754e04
--- /dev/null
+++ b/python/linked_list.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python2
+
+
+class El():
+ def __init__(self, val=None, nxt=None):
+ self.val = val
+ self.nxt = nxt
+
+ def getNext(self):
+ return self.nxt
+
+ def getVal(self):
+ return self.val
+
+
+class Ll():
+ def __init__(self, first):
+ self.first = first
+
+ def iterv(self):
+ el = self.first
+ while True:
+ yield el.getVal()
+ el = el.getNext()
+ if not el: break
+
+a = Ll(El("first", El("second")))
+
+for i in a.iterv():
+ print i
+
diff --git a/python/lowercase.py b/python/lowercase.py
new file mode 100755
index 0000000..a096d43
--- /dev/null
+++ b/python/lowercase.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python2
+
+import sys
+
+
+for line in sys.stdin:
+ sys.stdout.write(line.lower())
+
diff --git a/python/mapred.py b/python/mapred.py
new file mode 100755
index 0000000..c61f7cc
--- /dev/null
+++ b/python/mapred.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2
+
+
+a = [1, 2, 3, 4, 5]
+b = reduce(lambda x,y: x+y, map(lambda x: x**2, a))
+print b
+
diff --git a/python/mp.py b/python/mp.py
new file mode 100644
index 0000000..93fa864
--- /dev/null
+++ b/python/mp.py
@@ -0,0 +1,31 @@
+import threading as mp
+from Queue import Queue
+from time import sleep
+from random import randint
+
+workers = []
+queues = []
+master_queue = Queue()
+
+def f(i, master, queue):
+ while True:
+ inp = master_queue.get(True)
+ print("got input %s"%inp)
+ sleep(randint(1,10))
+ queue.put(">>>> %d %s\n"%(i, inp))
+
+for i in range(10):
+ queue = Queue(1)
+ workers.append(mp.Thread(target=f, args=(i,master_queue,queue)))
+ queues.append(queue)
+ workers[i].start()
+
+for i in range(10000):
+ master_queue.put("msg-"+str(randint(1,1000)))
+
+while True:
+ for i in range(10):
+ if not queues[i].empty():
+ data = queues[i].get()
+ print data
+
diff --git a/python/pairs.py b/python/pairs.py
new file mode 100755
index 0000000..8372775
--- /dev/null
+++ b/python/pairs.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2
+
+
+a=[1,2,3,4,5,6,7,8,9,10]
+b=[(a[i], a[j]) for i in range(len(a)) for j in range(i+1,len(a))]
+print len(b)
+
diff --git a/python/psb.py b/python/psb.py
new file mode 100644
index 0000000..9d963f1
--- /dev/null
+++ b/python/psb.py
@@ -0,0 +1,9 @@
+import nltk
+from nltk.translate.bleu_score import SmoothingFunction
+smoothing = SmoothingFunction()
+
+hypothesis = open('in').read().strip()
+reference = open('ref').read().strip()
+score = nltk.translate.bleu_score.sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothing.method2)
+print("%f"%(score*100))
+
diff --git a/python/script_path.py b/python/script_path.py
new file mode 100755
index 0000000..c199583
--- /dev/null
+++ b/python/script_path.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2
+
+import os
+
+
+print os.path.dirname(os.path.abspath(__file__))
+
diff --git a/python/shards.py b/python/shards.py
new file mode 100644
index 0000000..f47921f
--- /dev/null
+++ b/python/shards.py
@@ -0,0 +1,23 @@
+total = 100000
+data=range(total)
+jobSize = 5000
+numJobs = (total // jobSize) + 1
+numSegmentsPerJob = total // numJobs
+print numSegmentsPerJob
+print(numJobs)
+print('---')
+
+start = 0
+count = 0
+for i in range(numJobs):
+ if i == numJobs-1:
+ end = total
+ else:
+ end = start+numSegmentsPerJob
+ shard = data[start:end]
+ print(len(shard))
+ count += len(shard)
+ start += numSegmentsPerJob
+
+print(count)
+
diff --git a/python/sum-first-n.py b/python/sum-first-n.py
new file mode 100644
index 0000000..7a4edd1
--- /dev/null
+++ b/python/sum-first-n.py
@@ -0,0 +1,10 @@
+def s(n):
+ i = n-1
+ sum = 0
+ while i >= 1:
+ sum += i
+ i -= 1
+ return sum
+
+print s(1500)
+
diff --git a/python/transpose.py b/python/transpose.py
new file mode 100644
index 0000000..464194a
--- /dev/null
+++ b/python/transpose.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import numpy as np
+
+m = np.matrix('1 2; 4 5; 7 8; 9 10') # 3 x 2
+v = np.matrix('1 2 3 4') # 1 x 3
+print(m)
+print(v)
+print(v*m)
+print((m.transpose()*v.transpose()).transpose())
+
diff --git a/python/uniform.py b/python/uniform.py
new file mode 100755
index 0000000..e2e3af1
--- /dev/null
+++ b/python/uniform.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python2
+
+import numpy as np
+
+n=10000.0
+s=100.0
+print "should be ~ %d: %d"%(int(s),len([x for x in (np.random.uniform(0,1,int(n)) >= (1-(s/n))) if x]))
+