diff options
-rw-r--r-- | python/transpose.py | 11 | ||||
-rw-r--r-- | tensorflow/transformer-attention.py | 51 | ||||
-rw-r--r-- | tensorflow/transformer-attention2.py | 36 |
3 files changed, 98 insertions, 0 deletions
diff --git a/python/transpose.py b/python/transpose.py new file mode 100644 index 0000000..464194a --- /dev/null +++ b/python/transpose.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import numpy as np + +m = np.matrix('1 2; 4 5; 7 8; 9 10') # 3 x 2 +v = np.matrix('1 2 3 4') # 1 x 3 +print(m) +print(v) +print(v*m) +print((m.transpose()*v.transpose()).transpose()) + diff --git a/tensorflow/transformer-attention.py b/tensorflow/transformer-attention.py new file mode 100644 index 0000000..6f82549 --- /dev/null +++ b/tensorflow/transformer-attention.py @@ -0,0 +1,51 @@ +import numpy as np +import math + +dmodel = 32 +embedding_dim = 8 +nwords = 3 +num_heads = 4 + +assert(dmodel/num_heads == embedding_dim) + +states = np.array([np.ones(shape=[embedding_dim])*(i+1) for i in range(nwords)]) # num. words x embedding dim + +Wqs = [] +Wks = [] +Wvs = [] +scores = [] + +def softmax(m): + return np.exp(m) / np.sum(np.exp(m), axis=1) + +for h in range(num_heads): + Wq = np.random.rand(embedding_dim, int(dmodel/num_heads)) + Wk = np.random.rand(embedding_dim, int(dmodel/num_heads)) + Wv = np.random.rand(embedding_dim, int(dmodel/num_heads)) + + queries = np.matmul(states, Wq) + keys = np.matmul(states, Wk) + values = np.matmul(states, Wv) + + out = np.matmul(queries, np.transpose(keys)) + out = out/math.sqrt(dmodel) + + # manual + #out_max = [] + #for i in range(out.shape[0]): + # out_max.append(softmax(out[i])) + #out = np.array(out_max) + + out = softmax(out) + out = np.matmul(out, values) + + Wqs.append(Wq) + Wks.append(Wk) + Wvs.append(Wv) + scores.append(out) + +out = np.concatenate(scores, axis=0) +out = np.matmul(np.random.rand(nwords,out.shape[0]), out) +print(out.shape) +print(out) + diff --git a/tensorflow/transformer-attention2.py b/tensorflow/transformer-attention2.py new file mode 100644 index 0000000..c214934 --- /dev/null +++ b/tensorflow/transformer-attention2.py @@ -0,0 +1,36 @@ +import numpy as np +import math + +dmodel = 32 +embedding_dim = 8 +nwords = 3 +num_heads = 4 + +assert(dmodel/num_heads == embedding_dim) + +states = np.array([np.random.rand(embedding_dim) for i in range(nwords)]) # num. words x embedding dim + +def softmax(m): + return np.exp(m) / np.sum(np.exp(m), axis=1) + +Wqs = np.random.rand(embedding_dim, dmodel) +Wks = np.random.rand(embedding_dim, dmodel) +Wvs = np.random.rand(embedding_dim, dmodel) + +queries = np.matmul(states, Wqs) +keys = np.matmul(states, Wks) +values = np.matmul(states, Wvs) + +print(values) + +out = np.matmul(queries, np.transpose(keys)) +out = out/math.sqrt(dmodel/float(num_heads)) + +out = softmax(out) +print(out) +out = np.matmul(out, values) + +out = np.matmul(np.random.rand(nwords,out.shape[0]), out) +print(out.shape) +print(out) + |