diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/cjk.py | 46 | ||||
-rwxr-xr-x | python/cmp.py | 16 | ||||
-rw-r--r-- | python/finally.py | 10 | ||||
-rwxr-xr-x | python/kwargs.py | 10 | ||||
-rwxr-xr-x | python/kwargs1.py | 10 | ||||
-rw-r--r-- | python/linear-log.py | 11 | ||||
-rwxr-xr-x | python/linked_list.py | 31 | ||||
-rwxr-xr-x | python/lowercase.py | 8 | ||||
-rwxr-xr-x | python/mapred.py | 7 | ||||
-rw-r--r-- | python/mp.py | 31 | ||||
-rwxr-xr-x | python/pairs.py | 7 | ||||
-rw-r--r-- | python/psb.py | 9 | ||||
-rwxr-xr-x | python/script_path.py | 7 | ||||
-rw-r--r-- | python/shards.py | 23 | ||||
-rw-r--r-- | python/sum-first-n.py | 10 | ||||
-rw-r--r-- | python/transpose.py | 11 | ||||
-rwxr-xr-x | python/uniform.py | 8 |
17 files changed, 255 insertions, 0 deletions
diff --git a/python/cjk.py b/python/cjk.py new file mode 100644 index 0000000..9a417d1 --- /dev/null +++ b/python/cjk.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +# Chinese +ranges = [{"from":"\u4E00", "to":"\u9FFF"}, # CJK Unified Ideographs + {"from":"\u3400", "to":"\u4DBF"}, # CJK Unified Ideographs Extension A + #{"from":"\u20000", "to":"\u2A6DF"}, # CJK Unified Ideographs Extension B + #{"from":"\u2A700", "to":"\u2B73F"}, # CJK Unified Ideographs Extension C + #{"from":"\u2B740", "to":"\u2B81F"}, # CJK Unified Ideographs Extension D + #{"from":"\u2B820", "to":"\u2CEAF"}, # CJK Unified Ideographs Extension E + #{"from":"\u2B820", "to":"\u2CEAF"}, # CJK Unified Ideographs Extension F + {"from":"\u2E80", "to":"\u2EFF"}, # CJK Radicals Supplement + {"from":"\u2F00", "to":"\u2FDF"}, # Kangxi Radicals + {"from":"\u2FF0", "to":"\u2FFF"}, # Ideographic Description Characters + {"from":"\u31C0", "to":"\u31EF"}, # CJK Strokes + {"from":"\u3200", "to":"\u32FF"}, # Enclosed CJK Letters and Months + {"from":"\u3300", "to":"\u33FF"}, # CJK Compatibility + {"from":"\uF900", "to":"\uFAFF"}, # CJK Compatibility Ideographs + #{"from":"\u1F200", "to":"\u1F2FF"}, # Enclosed Ideographic Supplement + #{"from":"\u2F800", "to":"\u2FA1F"} # CJK Compatibility Ideographs Supplement + ] + +# Japanese +ranges.extend([{"from":"\u3040", "to":"\u309F"}, # Hiragana + #{"from":"\u1B100", "to":"\u1B12F"}, # Kana Extended-A + #{"from":"\u1B000", "to":"\u1B0FF"}, # Kana Supplement + {"from":"\u30A0", "to":"\u30FF"}, # Katakana + {"from":"\u31F0", "to":"\u31FF"}]) # Katakana Phonetic Extensions + +# Korean +ranges.extend([{"from":"\u1100", "to":"\u11FF"}, # Hangul Jamo + {"from":"\uA960", "to":"\uA97F"}, # Hangul Jamo Extended-A + {"from":"\uD7B0", "to":"\uD7FF"}, # Hangul Jamo Extended-B + {"from":"\u3130", "to":"\u318F"}, # Hangul Compatibility Jamo + {"from":"\uAC00", "to":"\uD7AF"}]) # Hangul Syllables + +# Punctuation, etc. +ranges.extend([{"from":"\u3000", "to":"\u303F"}, # CJK Symbols and Punctuation + #{"from":"\u16FE0", "to":"\u16FFF"}, # Ideographic Symbols and Punctuation + {"from":"\uFE30", "to":"\uFE4F"}, # CJK Compatibility Forms + {"from":"\uFE50", "to":"\uFE6F"}, # Small Form Variants + {"from":"\uFE10", "to":"\uFE1F"}, # Vertical Forms + {"from":"\uFF00", "to":"\uFFEF"}]) # Halfwidth and Fullwidth Forms + +def is_cjk(char): + return any([ord(range["from"]) <= ord(char) <= ord(range["to"]) for range in ranges]) + diff --git a/python/cmp.py b/python/cmp.py new file mode 100755 index 0000000..9bc6871 --- /dev/null +++ b/python/cmp.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python2 + + +class A: + def __init__(self, a): + self.a = a + + def __cmp__(self, b): + if self.a==b.a: return 0 + return 1 + +a = A("a") +b = A("b") +c = A("a") +print a == c + diff --git a/python/finally.py b/python/finally.py new file mode 100644 index 0000000..a6f14ab --- /dev/null +++ b/python/finally.py @@ -0,0 +1,10 @@ +def f(): + try: + raise Exception + except: + return + finally: + print("exit") + +f() + diff --git a/python/kwargs.py b/python/kwargs.py new file mode 100755 index 0000000..d1a81ea --- /dev/null +++ b/python/kwargs.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python2 + + +def fun(*args, **kwargs): + print args + print kwargs + +if __name__=="__main__": + fun(1,2,3,4,a=2,b=4,c=4) + diff --git a/python/kwargs1.py b/python/kwargs1.py new file mode 100755 index 0000000..0917965 --- /dev/null +++ b/python/kwargs1.py @@ -0,0 +1,10 @@ +def a(x, y=None, z=3): + print(x) + print(y) + print(z) + +def b(*args, **kwargs): + return a(*args, **kwargs) + +b(2, y=-1, z=1) + diff --git a/python/linear-log.py b/python/linear-log.py new file mode 100644 index 0000000..6863b2c --- /dev/null +++ b/python/linear-log.py @@ -0,0 +1,11 @@ +import math + +def f(n): + x = n/1000.0 + if x > 10: + x = 10 + math.log(x**3) + return min(100, max(1, int(x))) + +for i in [1,5,10,500,1000,5000,10000,50000,100000,500000,1000000,10000000]: + print("%d --- %d"%(i,f(i))) + diff --git a/python/linked_list.py b/python/linked_list.py new file mode 100755 index 0000000..8754e04 --- /dev/null +++ b/python/linked_list.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 + + +class El(): + def __init__(self, val=None, nxt=None): + self.val = val + self.nxt = nxt + + def getNext(self): + return self.nxt + + def getVal(self): + return self.val + + +class Ll(): + def __init__(self, first): + self.first = first + + def iterv(self): + el = self.first + while True: + yield el.getVal() + el = el.getNext() + if not el: break + +a = Ll(El("first", El("second"))) + +for i in a.iterv(): + print i + diff --git a/python/lowercase.py b/python/lowercase.py new file mode 100755 index 0000000..a096d43 --- /dev/null +++ b/python/lowercase.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python2 + +import sys + + +for line in sys.stdin: + sys.stdout.write(line.lower()) + diff --git a/python/mapred.py b/python/mapred.py new file mode 100755 index 0000000..c61f7cc --- /dev/null +++ b/python/mapred.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python2 + + +a = [1, 2, 3, 4, 5] +b = reduce(lambda x,y: x+y, map(lambda x: x**2, a)) +print b + diff --git a/python/mp.py b/python/mp.py new file mode 100644 index 0000000..93fa864 --- /dev/null +++ b/python/mp.py @@ -0,0 +1,31 @@ +import threading as mp +from Queue import Queue +from time import sleep +from random import randint + +workers = [] +queues = [] +master_queue = Queue() + +def f(i, master, queue): + while True: + inp = master_queue.get(True) + print("got input %s"%inp) + sleep(randint(1,10)) + queue.put(">>>> %d %s\n"%(i, inp)) + +for i in range(10): + queue = Queue(1) + workers.append(mp.Thread(target=f, args=(i,master_queue,queue))) + queues.append(queue) + workers[i].start() + +for i in range(10000): + master_queue.put("msg-"+str(randint(1,1000))) + +while True: + for i in range(10): + if not queues[i].empty(): + data = queues[i].get() + print data + diff --git a/python/pairs.py b/python/pairs.py new file mode 100755 index 0000000..8372775 --- /dev/null +++ b/python/pairs.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python2 + + +a=[1,2,3,4,5,6,7,8,9,10] +b=[(a[i], a[j]) for i in range(len(a)) for j in range(i+1,len(a))] +print len(b) + diff --git a/python/psb.py b/python/psb.py new file mode 100644 index 0000000..9d963f1 --- /dev/null +++ b/python/psb.py @@ -0,0 +1,9 @@ +import nltk +from nltk.translate.bleu_score import SmoothingFunction +smoothing = SmoothingFunction() + +hypothesis = open('in').read().strip() +reference = open('ref').read().strip() +score = nltk.translate.bleu_score.sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothing.method2) +print("%f"%(score*100)) + diff --git a/python/script_path.py b/python/script_path.py new file mode 100755 index 0000000..c199583 --- /dev/null +++ b/python/script_path.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python2 + +import os + + +print os.path.dirname(os.path.abspath(__file__)) + diff --git a/python/shards.py b/python/shards.py new file mode 100644 index 0000000..f47921f --- /dev/null +++ b/python/shards.py @@ -0,0 +1,23 @@ +total = 100000 +data=range(total) +jobSize = 5000 +numJobs = (total // jobSize) + 1 +numSegmentsPerJob = total // numJobs +print numSegmentsPerJob +print(numJobs) +print('---') + +start = 0 +count = 0 +for i in range(numJobs): + if i == numJobs-1: + end = total + else: + end = start+numSegmentsPerJob + shard = data[start:end] + print(len(shard)) + count += len(shard) + start += numSegmentsPerJob + +print(count) + diff --git a/python/sum-first-n.py b/python/sum-first-n.py new file mode 100644 index 0000000..7a4edd1 --- /dev/null +++ b/python/sum-first-n.py @@ -0,0 +1,10 @@ +def s(n): + i = n-1 + sum = 0 + while i >= 1: + sum += i + i -= 1 + return sum + +print s(1500) + diff --git a/python/transpose.py b/python/transpose.py new file mode 100644 index 0000000..464194a --- /dev/null +++ b/python/transpose.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import numpy as np + +m = np.matrix('1 2; 4 5; 7 8; 9 10') # 3 x 2 +v = np.matrix('1 2 3 4') # 1 x 3 +print(m) +print(v) +print(v*m) +print((m.transpose()*v.transpose()).transpose()) + diff --git a/python/uniform.py b/python/uniform.py new file mode 100755 index 0000000..e2e3af1 --- /dev/null +++ b/python/uniform.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python2 + +import numpy as np + +n=10000.0 +s=100.0 +print "should be ~ %d: %d"%(int(s),len([x for x in (np.random.uniform(0,1,int(n)) >= (1-(s/n))) if x])) + |