diff options
| -rw-r--r-- | tensorflow/transformer-attention1.py | 54 | 
1 files changed, 54 insertions, 0 deletions
| diff --git a/tensorflow/transformer-attention1.py b/tensorflow/transformer-attention1.py new file mode 100644 index 0000000..32fe739 --- /dev/null +++ b/tensorflow/transformer-attention1.py @@ -0,0 +1,54 @@ +import numpy as np +import math + +dmodel = 32 +num_heads = 2 +embedding_dim = dmodel #dmodel // num_heads +nwords = 4 + +#assert(dmodel/num_heads == embedding_dim) + +states = np.array([np.ones(shape=[embedding_dim])*(i*0.1) for i in range(nwords)]) # num. words x embedding dim + +Wqs = [] +Wks = [] +Wvs = [] +scores = [] + + +def softmax(m): +    return np.exp(m) / np.sum(np.exp(m), axis=1) + +for h in range(num_heads): +    Wq = np.random.rand(embedding_dim, int(dmodel/num_heads)) +    Wk = np.random.rand(embedding_dim, int(dmodel/num_heads)) +    Wv = np.random.rand(embedding_dim, int(dmodel/num_heads)) + +    queries = np.matmul(states, Wq) +    keys    = np.matmul(states, Wk) +    print(states.shape) +    values  = np.matmul(states, Wv) +    print(values.shape) +    exit() + +    out = np.matmul(queries, np.transpose(keys)) +    out = out/math.sqrt(dmodel) + +    # manual +    #out_max = [] +    #for i in range(out.shape[0]): +    #    out_max.append(softmax(out[i])) +    #out = np.array(out_max) + +    out = softmax(out) +    out = np.matmul(out, values) + +    Wqs.append(Wq) +    Wks.append(Wk) +    Wvs.append(Wv) +    scores.append(out) + +out = np.concatenate(scores, axis=0) +out = np.matmul(np.random.rand(nwords, out.shape[0]), out) + + | 
