summaryrefslogtreecommitdiff
path: root/tensorflow
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/transformer-attention.py51
-rw-r--r--tensorflow/transformer-attention2.py36
2 files changed, 87 insertions, 0 deletions
diff --git a/tensorflow/transformer-attention.py b/tensorflow/transformer-attention.py
new file mode 100644
index 0000000..6f82549
--- /dev/null
+++ b/tensorflow/transformer-attention.py
@@ -0,0 +1,51 @@
+import numpy as np
+import math
+
+dmodel = 32
+embedding_dim = 8
+nwords = 3
+num_heads = 4
+
+assert(dmodel/num_heads == embedding_dim)
+
+states = np.array([np.ones(shape=[embedding_dim])*(i+1) for i in range(nwords)]) # num. words x embedding dim
+
+Wqs = []
+Wks = []
+Wvs = []
+scores = []
+
+def softmax(m):
+ return np.exp(m) / np.sum(np.exp(m), axis=1)
+
+for h in range(num_heads):
+ Wq = np.random.rand(embedding_dim, int(dmodel/num_heads))
+ Wk = np.random.rand(embedding_dim, int(dmodel/num_heads))
+ Wv = np.random.rand(embedding_dim, int(dmodel/num_heads))
+
+ queries = np.matmul(states, Wq)
+ keys = np.matmul(states, Wk)
+ values = np.matmul(states, Wv)
+
+ out = np.matmul(queries, np.transpose(keys))
+ out = out/math.sqrt(dmodel)
+
+ # manual
+ #out_max = []
+ #for i in range(out.shape[0]):
+ # out_max.append(softmax(out[i]))
+ #out = np.array(out_max)
+
+ out = softmax(out)
+ out = np.matmul(out, values)
+
+ Wqs.append(Wq)
+ Wks.append(Wk)
+ Wvs.append(Wv)
+ scores.append(out)
+
+out = np.concatenate(scores, axis=0)
+out = np.matmul(np.random.rand(nwords,out.shape[0]), out)
+print(out.shape)
+print(out)
+
diff --git a/tensorflow/transformer-attention2.py b/tensorflow/transformer-attention2.py
new file mode 100644
index 0000000..c214934
--- /dev/null
+++ b/tensorflow/transformer-attention2.py
@@ -0,0 +1,36 @@
+import numpy as np
+import math
+
+dmodel = 32
+embedding_dim = 8
+nwords = 3
+num_heads = 4
+
+assert(dmodel/num_heads == embedding_dim)
+
+states = np.array([np.random.rand(embedding_dim) for i in range(nwords)]) # num. words x embedding dim
+
+def softmax(m):
+ return np.exp(m) / np.sum(np.exp(m), axis=1)
+
+Wqs = np.random.rand(embedding_dim, dmodel)
+Wks = np.random.rand(embedding_dim, dmodel)
+Wvs = np.random.rand(embedding_dim, dmodel)
+
+queries = np.matmul(states, Wqs)
+keys = np.matmul(states, Wks)
+values = np.matmul(states, Wvs)
+
+print(values)
+
+out = np.matmul(queries, np.transpose(keys))
+out = out/math.sqrt(dmodel/float(num_heads))
+
+out = softmax(out)
+print(out)
+out = np.matmul(out, values)
+
+out = np.matmul(np.random.rand(nwords,out.shape[0]), out)
+print(out.shape)
+print(out)
+