diff options
author | karpathy <andrej.karpathy@gmail.com> | 2014-12-18 12:29:45 -0800 |
---|---|---|
committer | karpathy <andrej.karpathy@gmail.com> | 2014-12-18 12:29:45 -0800 |
commit | 3e62bba15db6dbbc83514bfcbf6cb49f0b4fe2ec (patch) | |
tree | e7ea56ab152480ad346de30b3cde664e5b822640 /src |
first commit
Diffstat (limited to 'src')
-rw-r--r-- | src/recurrent.js | 528 | ||||
-rw-r--r-- | src/vis.js | 88 |
2 files changed, 616 insertions, 0 deletions
diff --git a/src/recurrent.js b/src/recurrent.js new file mode 100644 index 0000000..4c20a39 --- /dev/null +++ b/src/recurrent.js @@ -0,0 +1,528 @@ +var R = {}; // the Recurrent library + +(function(global) { + "use strict"; + + // Utility fun + function assert(condition, message) { + // from http://stackoverflow.com/questions/15313418/javascript-assert + if (!condition) { + message = message || "Assertion failed"; + if (typeof Error !== "undefined") { + throw new Error(message); + } + throw message; // Fallback + } + } + + // Random numbers utils + var return_v = false; + var v_val = 0.0; + var gaussRandom = function() { + if(return_v) { + return_v = false; + return v_val; + } + var u = 2*Math.random()-1; + var v = 2*Math.random()-1; + var r = u*u + v*v; + if(r == 0 || r > 1) return gaussRandom(); + var c = Math.sqrt(-2*Math.log(r)/r); + v_val = v*c; // cache this + return_v = true; + return u*c; + } + var randf = function(a, b) { return Math.random()*(b-a)+a; } + var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); } + var randn = function(mu, std){ return mu+gaussRandom()*std; } + + // helper function returns array of zeros of length n + // and uses typed arrays if available + var zeros = function(n) { + if(typeof(n)==='undefined' || isNaN(n)) { return []; } + if(typeof ArrayBuffer === 'undefined') { + // lacking browser support + var arr = new Array(n); + for(var i=0;i<n;i++) { arr[i] = 0; } + return arr; + } else { + return new Float64Array(n); + } + } + + // Mat holds a matrix + var Mat = function(n,d) { + // n is number of rows d is number of columns + this.n = n; + this.d = d; + this.w = zeros(n * d); + this.dw = zeros(n * d); + } + Mat.prototype = { + get: function(row, col) { + // slow but careful accessor function + // we want row-major order + var ix = (this.d * row) + col; + assert(ix >= 0 && ix < this.w.length); + return this.w[ix]; + }, + set: function(row, col, v) { + // slow but careful accessor function + var ix = (this.d * row) + col; + assert(ix >= 0 && ix < this.w.length); + this.w[ix] = v; + }, + toJSON: function() { + var json = {}; + json['n'] = this.n; + json['d'] = this.d; + json['w'] = this.w; + return json; + }, + fromJSON: function(json) { + this.n = json.n; + this.d = json.d; + this.w = zeros(this.n * this.d); + this.dw = zeros(this.n * this.d); + for(var i=0,n=this.n * this.d;i<n;i++) { + this.w[i] = json.w[i]; // copy over weights + } + } + } + + // return Mat but filled with random numbers from gaussian + var RandMat = function(n,d,mu,std) { + var m = new Mat(n, d); + //fillRandn(m,mu,std); + fillRand(m,-std,std); // kind of :P + return m; + } + + // Mat utils + // fill matrix with random gaussian numbers + var fillRandn = function(m, mu, std) { for(var i=0,n=m.w.length;i<n;i++) { m.w[i] = randn(mu, std); } } + var fillRand = function(m, lo, hi) { for(var i=0,n=m.w.length;i<n;i++) { m.w[i] = randf(lo, hi); } } + + // Transformer definitions + var Graph = function(needs_backprop) { + if(typeof needs_backprop === 'undefined') { needs_backprop = true; } + this.needs_backprop = needs_backprop; + + // this will store a list of functions that perform backprop, + // in their forward pass order. So in backprop we will go + // backwards and evoke each one + this.backprop = []; + } + Graph.prototype = { + backward: function() { + for(var i=this.backprop.length-1;i>=0;i--) { + this.backprop[i](); // tick! + } + }, + rowPluck: function(m, ix) { + // pluck a row of m with index ix and return it as col vector + assert(ix >= 0 && ix < m.n); + var d = m.d; + var out = new Mat(d, 1); + for(var i=0,n=d;i<n;i++){ out.w[i] = m.w[d * ix + i]; } // copy over the data + + if(this.needs_backprop) { + var backward = function() { + for(var i=0,n=d;i<n;i++){ m.dw[d * ix + i] += out.dw[i]; } + } + this.backprop.push(backward); + } + return out; + }, + tanh: function(m) { + // tanh nonlinearity + var out = new Mat(m.n, m.d); + var n = m.w.length; + for(var i=0;i<n;i++) { + out.w[i] = Math.tanh(m.w[i]); + } + + if(this.needs_backprop) { + var backward = function() { + for(var i=0;i<n;i++) { + // grad for z = tanh(x) is (1 - z^2) + var mwi = out.w[i]; + m.dw[i] += (1.0 - mwi * mwi) * out.dw[i]; + } + } + this.backprop.push(backward); + } + return out; + }, + sigmoid: function(m) { + // sigmoid nonlinearity + var out = new Mat(m.n, m.d); + var n = m.w.length; + for(var i=0;i<n;i++) { + out.w[i] = sig(m.w[i]); + } + + if(this.needs_backprop) { + var backward = function() { + for(var i=0;i<n;i++) { + // grad for z = tanh(x) is (1 - z^2) + var mwi = out.w[i]; + m.dw[i] += mwi * (1.0 - mwi) * out.dw[i]; + } + } + this.backprop.push(backward); + } + return out; + }, + relu: function(m) { + var out = new Mat(m.n, m.d); + var n = m.w.length; + for(var i=0;i<n;i++) { + out.w[i] = Math.max(0, m.w[i]); // relu + } + if(this.needs_backprop) { + var backward = function() { + for(var i=0;i<n;i++) { + m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0; + } + } + this.backprop.push(backward); + } + return out; + }, + mul: function(m1, m2) { + // multiply matrices m1 * m2 + assert(m1.d === m2.n, 'matmul dimensions misaligned'); + + var n = m1.n; + var d = m2.d; + var out = new Mat(n,d); + for(var i=0;i<m1.n;i++) { // loop over rows of m1 + for(var j=0;j<m2.d;j++) { // loop over cols of m2 + var dot = 0.0; + for(var k=0;k<m1.d;k++) { // dot product loop + dot += m1.w[m1.d*i+k] * m2.w[m2.d*k+j]; + } + out.w[d*i+j] = dot; + } + } + + if(this.needs_backprop) { + var backward = function() { + for(var i=0;i<m1.n;i++) { // loop over rows of m1 + for(var j=0;j<m2.d;j++) { // loop over cols of m2 + for(var k=0;k<m1.d;k++) { // dot product loop + var b = out.dw[d*i+j]; + m1.dw[m1.d*i+k] += m2.w[m2.d*k+j] * b; + m2.dw[m2.d*k+j] += m1.w[m1.d*i+k] * b; + } + } + } + } + this.backprop.push(backward); + } + return out; + }, + add: function(m1, m2) { + assert(m1.w.length === m2.w.length); + + var out = new Mat(m1.n, m1.d); + for(var i=0,n=m1.w.length;i<n;i++) { + out.w[i] = m1.w[i] + m2.w[i]; + } + if(this.needs_backprop) { + var backward = function() { + for(var i=0,n=m1.w.length;i<n;i++) { + m1.dw[i] += out.dw[i]; + m2.dw[i] += out.dw[i]; + } + } + this.backprop.push(backward); + } + return out; + }, + eltmul: function(m1, m2) { + assert(m1.w.length === m2.w.length); + + var out = new Mat(m1.n, m1.d); + for(var i=0,n=m1.w.length;i<n;i++) { + out.w[i] = m1.w[i] * m2.w[i]; + } + if(this.needs_backprop) { + var backward = function() { + for(var i=0,n=m1.w.length;i<n;i++) { + m1.dw[i] += m2.w[i] * out.dw[i]; + m2.dw[i] += m1.w[i] * out.dw[i]; + } + } + this.backprop.push(backward); + } + return out; + }, + } + + var softmax = function(m) { + var out = new Mat(m.n, m.d); // probability volume + var maxval = -999999; + for(var i=0,n=m.w.length;i<n;i++) { if(m.w[i] > maxval) maxval = m.w[i]; } + + var s = 0.0; + for(var i=0,n=m.w.length;i<n;i++) { + out.w[i] = Math.exp(m.w[i] - maxval); + s += out.w[i]; + } + for(var i=0,n=m.w.length;i<n;i++) { out.w[i] /= s; } + + // no backward pass here needed + // since we will use the computed probabilities outside + // to set gradients directly on m + return out; + } + + var Solver = function() { + this.decay_rate = 0.999; + this.smooth_eps = 1e-8; + this.step_cache = {}; + } + Solver.prototype = { + step: function(model, step_size, regc, clipval) { + // perform parameter update + var solver_stats = {}; + var num_clipped = 0; + var num_tot = 0; + for(var k in model) { + if(model.hasOwnProperty(k)) { + var m = model[k]; // mat ref + if(!(k in this.step_cache)) { this.step_cache[k] = new Mat(m.n, m.d); } + var s = this.step_cache[k]; + for(var i=0,n=m.w.length;i<n;i++) { + + // rmsprop adaptive learning rate + var mdwi = m.dw[i]; + s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi; + + // gradient clip + if(mdwi > clipval) { + mdwi = clipval; + num_clipped++; + } + if(mdwi < -clipval) { + mdwi = -clipval; + num_clipped++; + } + num_tot++; + + // update (and regularize) + m.w[i] += - step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i]; + m.dw[i] = 0; // reset gradients for next iteration + } + } + } + solver_stats['ratio_clipped'] = num_clipped*1.0/num_tot; + return solver_stats; + } + } + + var initLSTM = function(input_size, hidden_sizes, output_size) { + // hidden size should be a list + + var model = {}; + for(var d=0;d<hidden_sizes.length;d++) { // loop over depths + var prev_size = d === 0 ? input_size : hidden_sizes[d - 1]; + var hidden_size = hidden_sizes[d]; + + // gates parameters + model['Wix'+d] = new RandMat(hidden_size, prev_size , 0, 0.08); + model['Wih'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08); + model['bi'+d] = new Mat(hidden_size, 1); + model['Wfx'+d] = new RandMat(hidden_size, prev_size , 0, 0.08); + model['Wfh'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08); + model['bf'+d] = new Mat(hidden_size, 1); + model['Wox'+d] = new RandMat(hidden_size, prev_size , 0, 0.08); + model['Woh'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08); + model['bo'+d] = new Mat(hidden_size, 1); + // cell write params + model['Wcx'+d] = new RandMat(hidden_size, prev_size , 0, 0.08); + model['Wch'+d] = new RandMat(hidden_size, hidden_size , 0, 0.08); + model['bc'+d] = new Mat(hidden_size, 1); + } + // decoder params + model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08); + model['bd'] = new Mat(output_size, 1); + return model; + } + + var forwardLSTM = function(G, model, hidden_sizes, x, prev) { + // forward prop for a single tick of LSTM + // G is graph to append ops to + // model contains LSTM parameters + // x is 1D column vector with observation + // prev is a struct containing hidden and cell + // from previous iteration + + if(typeof prev.h === 'undefined') { + var hidden_prevs = []; + var cell_prevs = []; + for(var d=0;d<hidden_sizes.length;d++) { + hidden_prevs.push(new R.Mat(hidden_sizes[d],1)); + cell_prevs.push(new R.Mat(hidden_sizes[d],1)); + } + } else { + var hidden_prevs = prev.h; + var cell_prevs = prev.c; + } + + var hidden = []; + var cell = []; + for(var d=0;d<hidden_sizes.length;d++) { + + var input_vector = d === 0 ? x : hidden[d-1]; + var hidden_prev = hidden_prevs[d]; + var cell_prev = cell_prevs[d]; + + // input gate + var h0 = G.mul(model['Wix'+d], input_vector); + var h1 = G.mul(model['Wih'+d], hidden_prev); + var input_gate = G.sigmoid(G.add(G.add(h0,h1),model['bi'+d])); + + // forget gate + var h2 = G.mul(model['Wfx'+d], input_vector); + var h3 = G.mul(model['Wfh'+d], hidden_prev); + var forget_gate = G.sigmoid(G.add(G.add(h2, h3),model['bf'+d])); + + // output gate + var h4 = G.mul(model['Wox'+d], input_vector); + var h5 = G.mul(model['Woh'+d], hidden_prev); + var output_gate = G.sigmoid(G.add(G.add(h4, h5),model['bo'+d])); + + // write operation on cells + var h6 = G.mul(model['Wcx'+d], input_vector); + var h7 = G.mul(model['Wch'+d], hidden_prev); + var cell_write = G.tanh(G.add(G.add(h6, h7),model['bc'+d])); + + // compute new cell activation + var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell + var write_cell = G.eltmul(input_gate, cell_write); // what do we write to cell + var cell_d = G.add(retain_cell, write_cell); // new cell contents + + // compute hidden state as gated, saturated cell activations + var hidden_d = G.eltmul(output_gate, G.tanh(cell_d)); + + hidden.push(hidden_d); + cell.push(cell_d); + } + + // one decoder to outputs at end + var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]),model['bd']); + + // return cell memory, hidden representation and output + return {'h':hidden, 'c':cell, 'o' : output}; + } + + var initRNN = function(input_size, hidden_sizes, output_size) { + // hidden size should be a list + + var model = {}; + for(var d=0;d<hidden_sizes.length;d++) { // loop over depths + var prev_size = d === 0 ? input_size : hidden_sizes[d - 1]; + var hidden_size = hidden_sizes[d]; + model['Wxh'+d] = new R.RandMat(hidden_size, prev_size , 0, 0.08); + model['Whh'+d] = new R.RandMat(hidden_size, hidden_size, 0, 0.08); + model['bhh'+d] = new R.Mat(hidden_size, 1); + } + // decoder params + model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08); + model['bd'] = new Mat(output_size, 1); + return model; + } + + var forwardRNN = function(G, model, hidden_sizes, x, prev) { + // forward prop for a single tick of RNN + // G is graph to append ops to + // model contains RNN parameters + // x is 1D column vector with observation + // prev is a struct containing hidden activations from last step + + if(typeof prev.h === 'undefined') { + var hidden_prevs = []; + for(var d=0;d<hidden_sizes.length;d++) { + hidden_prevs.push(new R.Mat(hidden_sizes[d],1)); + } + } else { + var hidden_prevs = prev.h; + } + + var hidden = []; + for(var d=0;d<hidden_sizes.length;d++) { + + var input_vector = d === 0 ? x : hidden[d-1]; + var hidden_prev = hidden_prevs[d]; + + var h0 = G.mul(model['Wxh'+d], input_vector); + var h1 = G.mul(model['Whh'+d], hidden_prev); + var hidden_d = G.relu(G.add(G.add(h0, h1), model['bhh'+d])); + + hidden.push(hidden_d); + } + + // one decoder to outputs at end + var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]),model['bd']); + + // return cell memory, hidden representation and output + return {'h':hidden, 'o' : output}; + } + + var sig = function(x) { + // helper function for computing sigmoid + return 1.0/(1+Math.exp(-x)); + } + + var maxi = function(w) { + // argmax of array w + var maxv = w[0]; + var maxix = 0; + for(var i=1,n=w.length;i<n;i++) { + var v = w[i]; + if(v > maxv) { + maxix = i; + maxv = v; + } + } + return maxix; + } + + var samplei = function(w) { + // sample argmax from w, assuming w are + // probabilities that sum to one + var r = randf(0,1); + var x = 0.0; + var i = 0; + while(true) { + x += w[i]; + if(x > r) { return i; } + i++; + } + return w.length - 1; // pretty sure we should never get here? + } + + // various utils + global.maxi = maxi; + global.samplei = samplei; + global.randi = randi; + global.softmax = softmax; + global.assert = assert; + + // classes + global.Mat = Mat; + global.RandMat = RandMat; + + global.forwardLSTM = forwardLSTM; + global.initLSTM = initLSTM; + global.forwardRNN = forwardRNN; + global.initRNN = initRNN; + + // optimization + global.Solver = Solver; + global.Graph = Graph; + +})(R); diff --git a/src/vis.js b/src/vis.js new file mode 100644 index 0000000..d501f41 --- /dev/null +++ b/src/vis.js @@ -0,0 +1,88 @@ + +// contains various utility functions +var Rvis = (function(exports){ + + // can be used to graph loss, or accuract over time + var Graph = function(options) { + var options = options || {}; + this.step_horizon = options.step_horizon || 1000; + + this.pts = []; + + this.maxy = -9999; + this.miny = 9999; + } + + Graph.prototype = { + // canv is the canvas we wish to update with this new datapoint + add: function(step, y) { + var time = new Date().getTime(); // in ms + if(y>this.maxy*0.99) this.maxy = y*1.05; + if(y<this.miny*1.01) this.miny = y*0.95; + + this.pts.push({step: step, time: time, y: y}); + if(step > this.step_horizon) this.step_horizon *= 2; + }, + // elt is a canvas we wish to draw into + drawSelf: function(canv) { + + var pad = 25; + var H = canv.height; + var W = canv.width; + var ctx = canv.getContext('2d'); + + ctx.clearRect(0, 0, W, H); + ctx.font="10px Georgia"; + + var f2t = function(x) { + var dd = 1.0 * Math.pow(10, 2); + return '' + Math.floor(x*dd)/dd; + } + + // draw guidelines and values + ctx.strokeStyle = "#999"; + ctx.beginPath(); + var ng = 10; + for(var i=0;i<=ng;i++) { + var xpos = i/ng*(W-2*pad)+pad; + ctx.moveTo(xpos, pad); + ctx.lineTo(xpos, H-pad); + ctx.fillText(f2t(i/ng*this.step_horizon/1000)+'k',xpos,H-pad+14); + } + for(var i=0;i<=ng;i++) { + var ypos = i/ng*(H-2*pad)+pad; + ctx.moveTo(pad, ypos); + ctx.lineTo(W-pad, ypos); + ctx.fillText(f2t((ng-i)/ng*(this.maxy-this.miny) + this.miny), 0, ypos); + } + ctx.stroke(); + + var N = this.pts.length; + if(N<2) return; + + // draw the actual curve + var t = function(x, y, s) { + var tx = x / s.step_horizon * (W-pad*2) + pad; + var ty = H - ((y-s.miny) / (s.maxy-s.miny) * (H-pad*2) + pad); + return {tx:tx, ty:ty} + } + + ctx.strokeStyle = "red"; + ctx.beginPath() + for(var i=0;i<N;i++) { + // draw line from i-1 to i + var p = this.pts[i]; + var pt = t(p.step, p.y, this); + if(i===0) ctx.moveTo(pt.tx, pt.ty); + else ctx.lineTo(pt.tx, pt.ty); + } + ctx.stroke(); + } + } + + exports = exports || {}; + exports.Graph = Graph; + return exports; + +})(typeof module != 'undefined' && module.exports); // add exports to module.exports if in node.js + |