Open navigation menu

Welcome to Scribd!

Freebitco Bot 2021

Uploaded by

100% found this document useful (1 vote)

115 views33 pages

bot for freebitcoin trevel

Original Title

freebitco bot 2021

Copyright

© © All Rights Reserved

Available Formats

TXT, PDF, TXT or read online from Scribd

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Report this Document

bot for freebitcoin trevel

Copyright:

© All Rights Reserved

Available Formats

Download as TXT, PDF, TXT or read online from Scribd

Flag for inappropriate content

100% found this document useful (1 vote)

115 views33 pages

Freebitco Bot 2021

Uploaded by

bot for freebitcoin trevel

Copyright:

© All Rights Reserved

Available Formats

Download as TXT, PDF, TXT or read online from Scribd

Flag for inappropriate content

Jump to Page

You are on page 1of 33

Search inside document

var R = {}; // the Recurrent library

(function(global) {
"use strict";
// Utility fun
function assert(condition, message) {
// from http://stackoverflow.com/questions/15313418/javascript-assert
if (!condition) {
message = message || "Assertion failed";
if (typeof Error !== "undefined") {
throw new Error(message);
}
throw message; // Fallback
}
}
// Random numbers utils
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if (return_v) {
return_v = false;
return v_val;
}
var u = 2 * Math.random() - 1;
var v = 2 * Math.random() - 1;
var r = u * u + v * v;
if (r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2 * Math.log(r) / r);
v_val = v * c; // cache this
return_v = true;
return u * c;
}
var randf = function(a, b) {
return Math.random() * (b - a) + a;
}
var randi = function(a, b) {
return Math.floor(Math.random() * (b - a) + a);
}
var randn = function(mu, std) {
return mu + gaussRandom() * std;
}
// helper function returns array of zeros of length n
// and uses typed arrays if available
var zeros = function(n) {
if (typeof(n) === 'undefined' || isNaN(n)) {
return [];
}
if (typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for (var i = 0; i < n; i++) {
arr[i] = 0;
}
return arr;
} else {
return new Float64Array(n);
}
}
// Mat holds a matrix
var Mat = function(n, d) {
// n is number of rows d is number of columns
this.n = n;
this.d = d;
this.w = zeros(n * d);
this.dw = zeros(n * d);
}
Mat.prototype = {
get: function(row, col) {
// slow but careful accessor function
// we want row-major order
var ix = (this.d * row) + col;
assert(ix >= 0 && ix < this.w.length);
return this.w[ix];
},
set: function(row, col, v) {
// slow but careful accessor function
var ix = (this.d * row) + col;
assert(ix >= 0 && ix < this.w.length);
this.w[ix] = v;
},
setFrom: function(arr) {
for (var i = 0, n = arr.length; i < n; i++) {
this.w[i] = arr[i];
}
},
setColumn: function(m, i) {
for (var q = 0, n = m.w.length; q < n; q++) {
this.w[(this.d * q) + i] = m.w[q];
}
},
toJSON: function() {
var json = {};
json['n'] = this.n;
json['d'] = this.d;
json['w'] = this.w;
return json;
},
fromJSON: function(json) {
this.n = json.n;
this.d = json.d;
this.w = zeros(this.n * this.d);
this.dw = zeros(this.n * this.d);
for (var i = 0, n = this.n * this.d; i < n; i++) {
this.w[i] = json.w[i]; // copy over weights
}
}
}
var copyMat = function(b) {
var a = new Mat(b.n, b.d);
a.setFrom(b.w);
return a;
}
var copyNet = function(net) {
// nets are (k,v) pairs with k = string key, v = Mat()
var new_net = {};
for (var p in net) {
if (net.hasOwnProperty(p)) {
new_net[p] = copyMat(net[p]);
}
}
return new_net;
}
var updateMat = function(m, alpha) {
// updates in place
for (var i = 0, n = m.n * m.d; i < n; i++) {
if (m.dw[i] !== 0) {
m.w[i] += -alpha * m.dw[i];
m.dw[i] = 0;
}
}
}
var updateNet = function(net, alpha) {
for (var p in net) {
if (net.hasOwnProperty(p)) {
updateMat(net[p], alpha);
}
}
}
var netToJSON = function(net) {
var j = {};
for (var p in net) {
if (net.hasOwnProperty(p)) {
j[p] = net[p].toJSON();
}
}
return j;
}
var netFromJSON = function(j) {
var net = {};
for (var p in j) {
if (j.hasOwnProperty(p)) {
net[p] = new Mat(1, 1); // not proud of this
net[p].fromJSON(j[p]);
}
}
return net;
}
var netZeroGrads = function(net) {
for (var p in net) {
if (net.hasOwnProperty(p)) {
var mat = net[p];
gradFillConst(mat, 0);
}
}
}
var netFlattenGrads = function(net) {
var n = 0;
for (var p in net) {
if (net.hasOwnProperty(p)) {
var mat = net[p];
n += mat.dw.length;
}
}
var g = new Mat(n, 1);
var ix = 0;
for (var p in net) {
if (net.hasOwnProperty(p)) {
var mat = net[p];
for (var i = 0, m = mat.dw.length; i < m; i++) {
g.w[ix] = mat.dw[i];
ix++;
}
}
}
return g;
}
// return Mat but filled with random numbers from gaussian
var RandMat = function(n, d, mu, std) {
var m = new Mat(n, d);
fillRandn(m, mu, std);
//fillRand(m,-std,std); // kind of :P
return m;
}
// Mat utils
// fill matrix with random gaussian numbers
var fillRandn = function(m, mu, std) {
for (var i = 0, n = m.w.length; i < n; i++) {
m.w[i] = randn(mu, std);
}
}
var fillRand = function(m, lo, hi) {
for (var i = 0, n = m.w.length; i < n; i++) {
m.w[i] = randf(lo, hi);
}
}
var gradFillConst = function(m, c) {
for (var i = 0, n = m.dw.length; i < n; i++) {
m.dw[i] = c
}
}
// Transformer definitions
var Graph = function(needs_backprop) {
if (typeof needs_backprop === 'undefined') {
needs_backprop = true;
}
this.needs_backprop = needs_backprop;
// this will store a list of functions that perform backprop,
// in their forward pass order. So in backprop we will go
// backwards and evoke each one
this.backprop = [];
}
Graph.prototype = {
backward: function() {
for (var i = this.backprop.length - 1; i >= 0; i--) {
this.backprop[i](); // tick!
}
},
rowPluck: function(m, ix) {
// pluck a row of m with index ix and return it as col vector
assert(ix >= 0 && ix < m.n);
var d = m.d;
var out = new Mat(d, 1);
for (var i = 0, n = d; i < n; i++) {
out.w[i] = m.w[d * ix + i];
} // copy over the data
if (this.needs_backprop) {
var backward = function() {
for (var i = 0, n = d; i < n; i++) {
m.dw[d * ix + i] += out.dw[i];
}
}
this.backprop.push(backward);
}
return out;
},
tanh: function(m) {
// tanh nonlinearity
var out = new Mat(m.n, m.d);
var n = m.w.length;
for (var i = 0; i < n; i++) {
out.w[i] = Math.tanh(m.w[i]);
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0; i < n; i++) {
// grad for z = tanh(x) is (1 - z^2)
var mwi = out.w[i];
m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
}
}
this.backprop.push(backward);
}
return out;
},
sigmoid: function(m) {
// sigmoid nonlinearity
var out = new Mat(m.n, m.d);
var n = m.w.length;
for (var i = 0; i < n; i++) {
out.w[i] = sig(m.w[i]);
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0; i < n; i++) {
// grad for z = tanh(x) is (1 - z^2)
var mwi = out.w[i];
m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
}
}
this.backprop.push(backward);
}
return out;
},
relu: function(m) {
var out = new Mat(m.n, m.d);
var n = m.w.length;
for (var i = 0; i < n; i++) {
out.w[i] = Math.max(0, m.w[i]); // relu
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0; i < n; i++) {
m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
}
}
this.backprop.push(backward);
}
return out;
},
mul: function(m1, m2) {
// multiply matrices m1 * m2
assert(m1.d === m2.n, 'matmul dimensions misaligned');
var n = m1.n;
var d = m2.d;
var out = new Mat(n, d);
for (var i = 0; i < m1.n; i++) { // loop over rows of m1
for (var j = 0; j < m2.d; j++) { // loop over cols of m2
var dot = 0.0;
for (var k = 0; k < m1.d; k++) { // dot product loop
dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
}
out.w[d * i + j] = dot;
}
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0; i < m1.n; i++) { // loop over rows of
m1
for (var j = 0; j < m2.d; j++) { // loop over
cols of m2
for (var k = 0; k < m1.d; k++) { // dot
product loop
var b = out.dw[d * i + j];
m1.dw[m1.d * i + k] += m2.w[m2.d *
k + j] * b;
m2.dw[m2.d * k + j] += m1.w[m1.d *
i + k] * b;
}
}
}
}
this.backprop.push(backward);
}
return out;
},
add: function(m1, m2) {
assert(m1.w.length === m2.w.length);
var out = new Mat(m1.n, m1.d);
for (var i = 0, n = m1.w.length; i < n; i++) {
out.w[i] = m1.w[i] + m2.w[i];
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0, n = m1.w.length; i < n; i++) {
m1.dw[i] += out.dw[i];
m2.dw[i] += out.dw[i];
}
}
this.backprop.push(backward);
}
return out;
},
dot: function(m1, m2) {
// m1 m2 are both column vectors
assert(m1.w.length === m2.w.length);
var out = new Mat(1, 1);
var dot = 0.0;
for (var i = 0, n = m1.w.length; i < n; i++) {
dot += m1.w[i] * m2.w[i];
}
out.w[0] = dot;
if (this.needs_backprop) {
var backward = function() {
for (var i = 0, n = m1.w.length; i < n; i++) {
m1.dw[i] += m2.w[i] * out.dw[0];
m2.dw[i] += m1.w[i] * out.dw[0];
}
}
this.backprop.push(backward);
}
return out;
},
eltmul: function(m1, m2) {
assert(m1.w.length === m2.w.length);
var out = new Mat(m1.n, m1.d);
for (var i = 0, n = m1.w.length; i < n; i++) {
out.w[i] = m1.w[i] * m2.w[i];
}
if (this.needs_backprop) {
var backward = function() {
for (var i = 0, n = m1.w.length; i < n; i++) {
m1.dw[i] += m2.w[i] * out.dw[i];
m2.dw[i] += m1.w[i] * out.dw[i];
}
}
this.backprop.push(backward);
}
return out;
},
}
var softmax = function(m) {
var out = new Mat(m.n, m.d); // probability volume
var maxval = -999999;
for (var i = 0, n = m.w.length; i < n; i++) {
if (m.w[i] > maxval) maxval = m.w[i];
}
var s = 0.0;
for (var i = 0, n = m.w.length; i < n; i++) {
out.w[i] = Math.exp(m.w[i] - maxval);
s += out.w[i];
}
for (var i = 0, n = m.w.length; i < n; i++) {
out.w[i] /= s;
}
// no backward pass here needed
// since we will use the computed probabilities outside
// to set gradients directly on m
return out;
}
var Solver = function() {
this.decay_rate = 0.999;
this.smooth_eps = 1e-8;
this.step_cache = {};
}
Solver.prototype = {
step: function(model, step_size, regc, clipval) {
// perform parameter update
var solver_stats = {};
var num_clipped = 0;
var num_tot = 0;
for (var k in model) {
if (model.hasOwnProperty(k)) {
var m = model[k]; // mat ref
if (!(k in this.step_cache)) {
this.step_cache[k] = new Mat(m.n, m.d);
}
var s = this.step_cache[k];
for (var i = 0, n = m.w.length; i < n; i++) {
// rmsprop adaptive learning rate
var mdwi = m.dw[i];
s.w[i] = s.w[i] * this.decay_rate + (1.0 -
this.decay_rate) * mdwi * mdwi;
// gradient clip
if (mdwi > clipval) {
mdwi = clipval;
num_clipped++;
}
if (mdwi < -clipval) {
mdwi = -clipval;
num_clipped++;
}
num_tot++;
// update (and regularize)
m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i]
+ this.smooth_eps) - regc * m.w[i];
m.dw[i] = 0; // reset gradients for next
iteration
}
}
}
solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
return solver_stats;
}
}
var initLSTM = function(input_size, hidden_sizes, output_size) {
// hidden size should be a list
var model = {};
for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
var hidden_size = hidden_sizes[d];
// gates parameters
model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0,
0.08);
model['bi' + d] = new Mat(hidden_size, 1);
model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0,
0.08);
model['bf' + d] = new Mat(hidden_size, 1);
model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0,
0.08);
model['bo' + d] = new Mat(hidden_size, 1);
// cell write params
model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0,
0.08);
model['bc' + d] = new Mat(hidden_size, 1);
}
// decoder params
model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
model['bd'] = new Mat(output_size, 1);
return model;
}
var forwardLSTM = function(G, model, hidden_sizes, x, prev) {
// forward prop for a single tick of LSTM
// G is graph to append ops to
// model contains LSTM parameters
// x is 1D column vector with observation
// prev is a struct containing hidden and cell
// from previous iteration
if (prev == null || typeof prev.h === 'undefined') {
var hidden_prevs = [];
var cell_prevs = [];
for (var d = 0; d < hidden_sizes.length; d++) {
hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
}
} else {
var hidden_prevs = prev.h;
var cell_prevs = prev.c;
}
var hidden = [];
var cell = [];
for (var d = 0; d < hidden_sizes.length; d++) {
var input_vector = d === 0 ? x : hidden[d - 1];
var hidden_prev = hidden_prevs[d];
var cell_prev = cell_prevs[d];
// input gate
var h0 = G.mul(model['Wix' + d], input_vector);
var h1 = G.mul(model['Wih' + d], hidden_prev);
var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' +
d]));
// forget gate
var h2 = G.mul(model['Wfx' + d], input_vector);
var h3 = G.mul(model['Wfh' + d], hidden_prev);
var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' +
d]));
// output gate
var h4 = G.mul(model['Wox' + d], input_vector);
var h5 = G.mul(model['Woh' + d], hidden_prev);
var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' +
d]));
// write operation on cells
var h6 = G.mul(model['Wcx' + d], input_vector);
var h7 = G.mul(model['Wch' + d], hidden_prev);
var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
// compute new cell activation
var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we
keep from cell
var write_cell = G.eltmul(input_gate, cell_write); // what do we
write to cell
var cell_d = G.add(retain_cell, write_cell); // new cell contents
// compute hidden state as gated, saturated cell activations
var hidden_d = G.eltmul(output_gate, G.tanh(cell_d));
hidden.push(hidden_d);
cell.push(cell_d);
}
// one decoder to outputs at end
var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]),
model['bd']);
// return cell memory, hidden representation and output
return {
'h': hidden,
'c': cell,
'o': output
};
}
var sig = function(x) {
// helper function for computing sigmoid
return 1.0 / (1 + Math.exp(-x));
}
var maxi = function(w) {
// argmax of array w
var maxv = w[0];
var maxix = 0;
for (var i = 1, n = w.length; i < n; i++) {
var v = w[i];
if (v > maxv) {
maxix = i;
maxv = v;
}
}
return maxix;
}
var samplei = function(w) {
// sample argmax from w, assuming w are
// probabilities that sum to one
var r = randf(0, 1);
var x = 0.0;
var i = 0;
while (true) {
x += w[i];
if (x > r) {
return i;
}
i++;
}
return w.length - 1; // pretty sure we should never get here?
}
// various utils
global.assert = assert;
global.zeros = zeros;
global.maxi = maxi;
global.samplei = samplei;
global.randi = randi;
global.randn = randn;
global.softmax = softmax;
// classes
global.Mat = Mat;
global.RandMat = RandMat;
global.forwardLSTM = forwardLSTM;
global.initLSTM = initLSTM;
// more utils
global.updateMat = updateMat;
global.updateNet = updateNet;
global.copyMat = copyMat;
global.copyNet = copyNet;
global.netToJSON = netToJSON;
global.netFromJSON = netFromJSON;
global.netZeroGrads = netZeroGrads;
global.netFlattenGrads = netFlattenGrads;
// optimization
global.Solver = Solver;
global.Graph = Graph;
})(R);
// END OF RECURRENTJS
var RL = {};
(function(global) {
"use strict";
// syntactic sugar function for getting default parameter values
var getopt = function(opt, field_name, default_value) {
if (typeof opt === 'undefined') {
return default_value;
}
return (typeof opt[field_name] !== 'undefined') ? opt[field_name] :
default_value;
}
var zeros = R.zeros; // inherit these
var assert = R.assert;
var randi = R.randi;
var randf = R.randf;
var setConst = function(arr, c) {
for (var i = 0, n = arr.length; i < n; i++) {
arr[i] = c;
}
}
var sampleWeighted = function(p) {
var r = Math.random();
var c = 0.0;
for (var i = 0, n = p.length; i < n; i++) {
c += p[i];
if (c >= r) {
return i;
}
}
assert(false, 'wtf');
}
// ------
// AGENTS
// ------
// DPAgent performs Value Iteration
// - can also be used for Policy Iteration if you really wanted to
// - requires model of the environment :(
// - does not learn from experience :(
// - assumes finite MDP :(
var DPAgent = function(env, opt) {
this.V = null; // state value function
this.P = null; // policy distribution \pi(s,a)
this.env = env; // store pointer to environment
this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount
factor
this.reset();
}
DPAgent.prototype = {
reset: function() {
// reset the agent's policy and value function
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
this.V = zeros(this.ns);
this.P = zeros(this.ns * this.na);
// initialize uniform random policy
for (var s = 0; s < this.ns; s++) {
var poss = this.env.allowedActions(s);
for (var i = 0, n = poss.length; i < n; i++) {
this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
}
}
},
act: function(s) {
// behave according to the learned policy
var poss = this.env.allowedActions(s);
var ps = [];
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
var prob = this.P[a * this.ns + s];
ps.push(prob);
}
var maxi = sampleWeighted(ps);
return poss[maxi];
},
learn: function() {
// perform a single round of value iteration
self.evaluatePolicy(); // writes this.V
self.updatePolicy(); // writes this.P
},
evaluatePolicy: function() {
// perform a synchronous update of the value function
var Vnew = zeros(this.ns);
for (var s = 0; s < this.ns; s++) {
// integrate over actions in a stochastic policy
// note that we assume that policy probability mass over
allowed actions sums to one
var v = 0.0;
var poss = this.env.allowedActions(s);
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
var prob = this.P[a * this.ns + s]; // probability of
taking action under policy
if (prob === 0) {
continue;
} // no contribution, skip for speed
var ns = this.env.nextStateDistribution(s, a);
var rs = this.env.reward(s, a, ns); // reward for s-
>a->ns transition
v += prob * (rs + this.gamma * this.V[ns]);
}
Vnew[s] = v;
}
this.V = Vnew; // swap
},
updatePolicy: function() {
// update policy to be greedy w.r.t. learned Value function
for (var s = 0; s < this.ns; s++) {
var poss = this.env.allowedActions(s);
// compute value of taking each allowed action
var vmax, nmax;
var vs = [];
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
var ns = this.env.nextStateDistribution(s, a);
var rs = this.env.reward(s, a, ns);
var v = rs + this.gamma * this.V[ns];
vs.push(v);
if (i === 0 || v > vmax) {
vmax = v;
nmax = 1;
} else if (v === vmax) {
nmax += 1;
}
}
// update policy smoothly across all argmaxy actions
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
this.P[a * this.ns + s] = (vs[i] === vmax) ? 1.0 /
nmax : 0.0;
}
}
},
}
// QAgent uses TD (Q-Learning, SARSA)
// - does not require environment model :)
// - learns from experience :)
var TDAgent = function(env, opt) {
this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount
factor
this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy
policy
this.alpha = getopt(opt, 'alpha', 0.01); // value function learning
rate
// class allows non-deterministic policy, and smoothly regressing
towards the optimal policy based on Q
this.smooth_policy_update = getopt(opt, 'smooth_policy_update', false);
this.beta = getopt(opt, 'beta', 0.01); // learning rate for policy, if
smooth updates are on
// eligibility traces
this.lambda = getopt(opt, 'lambda', 0); // eligibility trace decay. 0 =
no eligibility traces used
this.replacing_traces = getopt(opt, 'replacing_traces', true);
// optional optimistic initial values
this.q_init_val = getopt(opt, 'q_init_val', 0);
this.planN = getopt(opt, 'planN', 0); // number of planning steps per
learning iteration (0 = no planning)
this.Q = null; // state action value function
this.P = null; // policy distribution \pi(s,a)
this.e = null; // eligibility trace
this.env_model_s = null;; // environment model (s,a) -> (s',r)
this.env_model_r = null;; // environment model (s,a) -> (s',r)
this.env = env; // store pointer to environment
this.reset();
}
TDAgent.prototype = {
reset: function() {
// reset the agent's policy and value function
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
this.Q = zeros(this.ns * this.na);
if (this.q_init_val !== 0) {
setConst(this.Q, this.q_init_val);
}
this.P = zeros(this.ns * this.na);
this.e = zeros(this.ns * this.na);
// model/planning vars
this.env_model_s = zeros(this.ns * this.na);
setConst(this.env_model_s, -1); // init to -1 so we can test if
we saw the state before
this.env_model_r = zeros(this.ns * this.na);
this.sa_seen = [];
this.pq = zeros(this.ns * this.na);
// initialize uniform random policy
for (var s = 0; s < this.ns; s++) {
var poss = this.env.allowedActions(s);
for (var i = 0, n = poss.length; i < n; i++) {
this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
}
}
// agent memory, needed for streaming updates
// (s0,a0,r0,s1,a1,r1,...)
this.r0 = null;
this.s0 = null;
this.s1 = null;
this.a0 = null;
this.a1 = null;
},
resetEpisode: function() {
// an episode finished
},
act: function(s) {
// act according to epsilon greedy policy
var poss = this.env.allowedActions(s);
var probs = [];
for (var i = 0, n = poss.length; i < n; i++) {
probs.push(this.P[poss[i] * this.ns + s]);
}
// epsilon greedy policy
if (Math.random() < this.epsilon) {
var a = poss[randi(0, poss.length)]; // random available
action
this.explored = true;
} else {
var a = poss[sampleWeighted(probs)];
this.explored = false;
}
// shift state memory
this.s0 = this.s1;
this.a0 = this.a1;
this.s1 = s;
this.a1 = a;
return a;
},
learn: function(r1) {
// takes reward for previous action, which came from a call to
act()
if (!(this.r0 == null)) {
this.learnFromTuple(this.s0, this.a0, this.r0, this.s1,
this.a1, this.lambda);
if (this.planN > 0) {
this.updateModel(this.s0, this.a0, this.r0, this.s1);
this.plan();
}
}
this.r0 = r1; // store this for next update
},
updateModel: function(s0, a0, r0, s1) {
// transition (s0,a0) -> (r0,s1) was observed. Update environment
model
var sa = a0 * this.ns + s0;
if (this.env_model_s[sa] === -1) {
// first time we see this state action
this.sa_seen.push(a0 * this.ns + s0); // add as seen state
}
this.env_model_s[sa] = s1;
this.env_model_r[sa] = r0;
},
plan: function() {
// order the states based on current priority queue information
var spq = [];
for (var i = 0, n = this.sa_seen.length; i < n; i++) {
var sa = this.sa_seen[i];
var sap = this.pq[sa];
if (sap > 1e-5) { // gain a bit of efficiency
spq.push({
sa: sa,
p: sap
});
}
}
spq.sort(function(a, b) {
return a.p < b.p ? 1 : -1
});
// perform the updates
var nsteps = Math.min(this.planN, spq.length);
for (var k = 0; k < nsteps; k++) {
// random exploration
//var i = randi(0, this.sa_seen.length); // pick random
prev seen state action
//var s0a0 = this.sa_seen[i];
var s0a0 = spq[k].sa;
this.pq[s0a0] = 0; // erase priority, since we're backing
up this state
var s0 = s0a0 % this.ns;
var a0 = Math.floor(s0a0 / this.ns);
var r0 = this.env_model_r[s0a0];
var s1 = this.env_model_s[s0a0];
var a1 = -1; // not used for Q learning
if (this.update === 'sarsa') {
// generate random action?...
var poss = this.env.allowedActions(s1);
var a1 = poss[randi(0, poss.length)];
}
this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda
= 0 - shouldnt use eligibility trace here
}
},
learnFromTuple: function(s0, a0, r0, s1, a1, lambda) {
var sa = a0 * this.ns + s0;
// calculate the target for Q(s,a)
if (this.update === 'qlearn') {
// Q learning target is Q(s0,a0) = r0 + gamma * max_a
Q[s1,a]
var poss = this.env.allowedActions(s1);
var qmax = 0;
for (var i = 0, n = poss.length; i < n; i++) {
var s1a = poss[i] * this.ns + s1;
var qval = this.Q[s1a];
if (i === 0 || qval > qmax) {
qmax = qval;
}
}
var target = r0 + this.gamma * qmax;
} else if (this.update === 'sarsa') {
// SARSA target is Q(s0,a0) = r0 + gamma * Q[s1,a1]
var s1a1 = a1 * this.ns + s1;
var target = r0 + this.gamma * this.Q[s1a1];
}
if (lambda > 0) {
// perform an eligibility trace update
if (this.replacing_traces) {
this.e[sa] = 1;
} else {
this.e[sa] += 1;
}
var edecay = lambda * this.gamma;
var state_update = zeros(this.ns);
for (var s = 0; s < this.ns; s++) {
var poss = this.env.allowedActions(s);
for (var i = 0; i < poss.length; i++) {
var a = poss[i];
var saloop = a * this.ns + s;
var esa = this.e[saloop];
var update = this.alpha * esa * (target -
this.Q[saloop]);
this.Q[saloop] += update;
this.updatePriority(s, a, update);
this.e[saloop] *= edecay;
var u = Math.abs(update);
if (u > state_update[s]) {
state_update[s] = u;
}
}
}
for (var s = 0; s < this.ns; s++) {
if (state_update[s] > 1e-5) { // save efficiency here
this.updatePolicy(s);
}
}
if (this.explored && this.update === 'qlearn') {
// have to wipe the trace since q learning is off-
policy :(
this.e = zeros(this.ns * this.na);
}
} else {
// simpler and faster update without eligibility trace
// update Q[sa] towards it with some step size
var update = this.alpha * (target - this.Q[sa]);
this.Q[sa] += update;
this.updatePriority(s0, a0, update);
// update the policy to reflect the change (if appropriate)
this.updatePolicy(s0);
}
},
updatePriority: function(s, a, u) {
// used in planning. Invoked when Q[sa] += update
// we should find all states that lead to (s,a) and upgrade their
priority
// of being update in the next planning step
u = Math.abs(u);
if (u < 1e-5) {
return;
} // for efficiency skip small updates
if (this.planN === 0) {
return;
} // there is no planning to be done, skip.
for (var si = 0; si < this.ns; si++) {
// note we are also iterating over impossible actions at
all states,
// but this should be okay because their env_model_s should
simply be -1
// as initialized, so they will never be predicted to point
to any state
// because they will never be observed, and hence never be
added to the model
for (var ai = 0; ai < this.na; ai++) {
var siai = ai * this.ns + si;
if (this.env_model_s[siai] === s) {
// this state leads to s, add it to priority
queue
this.pq[siai] += u;
}
}
}
},
updatePolicy: function(s) {
var poss = this.env.allowedActions(s);
// set policy at s to be the action that achieves max_a Q(s,a)
// first find the maxy Q values
var qmax, nmax;
var qs = [];
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
var qval = this.Q[a * this.ns + s];
qs.push(qval);
if (i === 0 || qval > qmax) {
qmax = qval;
nmax = 1;
} else if (qval === qmax) {
nmax += 1;
}
}
// now update the policy smoothly towards the argmaxy actions
var psum = 0.0;
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
var target = (qs[i] === qmax) ? 1.0 / nmax : 0.0;
var ix = a * this.ns + s;
if (this.smooth_policy_update) {
// slightly hacky :p
this.P[ix] += this.beta * (target - this.P[ix]);
psum += this.P[ix];
} else {
// set hard target
this.P[ix] = target;
}
}
if (this.smooth_policy_update) {
// renomalize P if we're using smooth policy updates
for (var i = 0, n = poss.length; i < n; i++) {
var a = poss[i];
this.P[a * this.ns + s] /= psum;
}
}
}
}
var DQNAgent = function(env, opt) {
this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount
factor
this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy
policy
this.alpha = getopt(opt, 'alpha', 0.01); // value function learning
rate
this.experience_add_every = getopt(opt, 'experience_add_every', 25); //
number of time steps before we add another experience to replay memory
this.experience_size = getopt(opt, 'experience_size', 5000); // size of
experience replay
this.learning_steps_per_iteration = getopt(opt,
'learning_steps_per_iteration', 10);
this.tderror_clamp = getopt(opt, 'tderror_clamp', 1.0);
this.num_hidden_units = getopt(opt, 'num_hidden_units', 100);
this.env = env;
this.reset();
}
DQNAgent.prototype = {
reset: function() {
this.nh = this.num_hidden_units; // number of hidden units
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
// nets are hardcoded for now as key (str) -> Mat
// not proud of this. better solution is to have a whole Net
object
// on top of Mats, but for now sticking with this
this.net = {};
this.net.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
this.net.b1 = new R.Mat(this.nh, 1, 0, 0.01);
this.net.W2 = new R.RandMat(this.na, this.nh, 0, 0.01);
this.net.b2 = new R.Mat(this.na, 1, 0, 0.01);
this.exp = []; // experience
this.expi = 0; // where to insert
this.t = 0;
this.r0 = null;
this.s0 = null;
this.s1 = null;
this.a0 = null;
this.a1 = null;
this.tderror = 0; // for visualization only...
},
toJSON: function() {
// save function
var j = {};
j.nh = this.nh;
j.ns = this.ns;
j.na = this.na;
j.net = R.netToJSON(this.net);
return j;
},
fromJSON: function(j) {
// load function
this.nh = j.nh;
this.ns = j.ns;
this.na = j.na;
this.net = R.netFromJSON(j.net);
},
forwardQ: function(net, s, needs_backprop) {
var G = new R.Graph(needs_backprop);
var a1mat = G.add(G.mul(net.W1, s), net.b1);
var h1mat = G.tanh(a1mat);
var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
this.lastG = G; // back this up. Kind of hacky isn't it
return a2mat;
},
act: function(slist) {
// convert to a Mat column vector
var s = new R.Mat(this.ns, 1);
s.setFrom(slist);
// epsilon greedy policy
if (Math.random() < this.epsilon) {
var a = randi(0, this.na);
} else {
// greedy wrt Q function
var amat = this.forwardQ(this.net, s, false);
var a = R.maxi(amat.w); // returns index of argmax action
}
// shift state memory
this.s0 = this.s1;
this.a0 = this.a1;
this.s1 = s;
this.a1 = a;
return a;
},
learn: function(r1) {
// perform an update on Q function
if (!(this.r0 == null) && this.alpha > 0) {
// learn from this tuple to get a sense of how "surprising"
it is to the agent
var tderror = this.learnFromTuple(this.s0, this.a0,
this.r0, this.s1, this.a1);
this.tderror = tderror; // a measure of surprise
// decide if we should keep this experience in the replay
if (this.t % this.experience_add_every === 0) {
this.exp[this.expi] = [this.s0, this.a0, this.r0,
this.s1, this.a1];
this.expi += 1;
if (this.expi > this.experience_size) {
this.expi = 0;
} // roll over when we run out
}
this.t += 1;
// sample some additional experience from replay memory and
learn from it
for (var k = 0; k < this.learning_steps_per_iteration; k++)
{
var ri = randi(0, this.exp.length); // todo: priority
sweeps?
var e = this.exp[ri];
this.learnFromTuple(e[0], e[1], e[2], e[3], e[4])
}
}
this.r0 = r1; // store for next update
},
learnFromTuple: function(s0, a0, r0, s1, a1) {
// want: Q(s,a) = r + gamma * max_a' Q(s',a')
// compute the target Q value
var tmat = this.forwardQ(this.net, s1, false);
var qmax = r0 + this.gamma * tmat.w[R.maxi(tmat.w)];
// now predict
var pred = this.forwardQ(this.net, s0, true);
var tderror = pred.w[a0] - qmax;
var clamp = this.tderror_clamp;
if (Math.abs(tderror) > clamp) { // huber loss to robustify
if (tderror > clamp) tderror = clamp;
if (tderror < -clamp) tderror = -clamp;
}
pred.dw[a0] = tderror;
this.lastG.backward(); // compute gradients on net params
// update net
R.updateNet(this.net, this.alpha);
return tderror;
}
}
// buggy implementation, doesnt work...
var SimpleReinforceAgent = function(env, opt) {
this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount
factor
this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy
policy
this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
this.env = env;
this.reset();
}
SimpleReinforceAgent.prototype = {
reset: function() {
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
this.nh = 100; // number of hidden units
this.nhb = 100; // and also in the baseline lstm
this.actorNet = {};
this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
this.actorNet.W2 = new R.RandMat(this.na, this.nh, 0, 0.1);
this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
this.actorOutputs = [];
this.actorGraphs = [];
this.actorActions = []; // sampled ones
this.rewardHistory = [];
this.baselineNet = {};
this.baselineNet.W1 = new R.RandMat(this.nhb, this.ns, 0, 0.01);
this.baselineNet.b1 = new R.Mat(this.nhb, 1, 0, 0.01);
this.baselineNet.W2 = new R.RandMat(this.na, this.nhb, 0, 0.01);
this.baselineNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
this.baselineOutputs = [];
this.baselineGraphs = [];
this.t = 0;
},
forwardActor: function(s, needs_backprop) {
var net = this.actorNet;
var G = new R.Graph(needs_backprop);
var a1mat = G.add(G.mul(net.W1, s), net.b1);
var h1mat = G.tanh(a1mat);
var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
return {
'a': a2mat,
'G': G
}
},
forwardValue: function(s, needs_backprop) {
var net = this.baselineNet;
var G = new R.Graph(needs_backprop);
var a1mat = G.add(G.mul(net.W1, s), net.b1);
var h1mat = G.tanh(a1mat);
var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
return {
'a': a2mat,
'G': G
}
},
act: function(slist) {
// convert to a Mat column vector
var s = new R.Mat(this.ns, 1);
s.setFrom(slist);
// forward the actor to get action output
var ans = this.forwardActor(s, true);
var amat = ans.a;
var ag = ans.G;
this.actorOutputs.push(amat);
this.actorGraphs.push(ag);
// forward the baseline estimator
var ans = this.forwardValue(s, true);
var vmat = ans.a;
var vg = ans.G;
this.baselineOutputs.push(vmat);
this.baselineGraphs.push(vg);
// sample action from the stochastic gaussian policy
var a = R.copyMat(amat);
var gaussVar = 0.02;
a.w[0] = R.randn(0, gaussVar);
a.w[1] = R.randn(0, gaussVar);
this.actorActions.push(a);
// shift state memory
this.s0 = this.s1;
this.a0 = this.a1;
this.s1 = s;
this.a1 = a;
return a;
},
learn: function(r1) {
// perform an update on Q function
this.rewardHistory.push(r1);
var n = this.rewardHistory.length;
var baselineMSE = 0.0;
var nup = 100; // what chunk of experience to take
var nuse = 80; // what chunk to update from
if (n >= nup) {
// lets learn and flush
// first: compute the sample values at all points
var vs = [];
for (var t = 0; t < nuse; t++) {
var mul = 1;
// compute the actual discounted reward for this time
step
var V = 0;
for (var t2 = t; t2 < n; t2++) {
V += mul * this.rewardHistory[t2];
mul *= this.gamma;
if (mul < 1e-5) {
break;
} // efficiency savings
}
// get the predicted baseline at this time step
var b = this.baselineOutputs[t].w[0];
for (var i = 0; i < this.na; i++) {
// [the action delta] * [the desirebility]
var update = -(V - b) *
(this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
if (update > 0.1) {
update = 0.1;
}
if (update < -0.1) {
update = -0.1;
}
this.actorOutputs[t].dw[i] += update;
}
var update = -(V - b);
if (update > 0.1) {
update = 0.1;
}
if (update < 0.1) {
update = -0.1;
}
this.baselineOutputs[t].dw[0] += update;
baselineMSE += (V - b) * (V - b);
vs.push(V);
}
baselineMSE /= nuse;
// backprop all the things
for (var t = 0; t < nuse; t++) {
this.actorGraphs[t].backward();
this.baselineGraphs[t].backward();
}
R.updateNet(this.actorNet, this.alpha); // update actor
network
R.updateNet(this.baselineNet, this.beta); // update
baseline network
// flush
this.actorOutputs = [];
this.rewardHistory = [];
this.actorActions = [];
this.baselineOutputs = [];
this.actorGraphs = [];
this.baselineGraphs = [];
this.tderror = baselineMSE;
}
this.t += 1;
this.r0 = r1; // store for next update
},
}
// buggy implementation as well, doesn't work
var RecurrentReinforceAgent = function(env, opt) {
this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount
factor
this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy
policy
this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
this.env = env;
this.reset();
}
RecurrentReinforceAgent.prototype = {
reset: function() {
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
this.nh = 40; // number of hidden units
this.nhb = 40; // and also in the baseline lstm
this.actorLSTM = R.initLSTM(this.ns, [this.nh], this.na);
this.actorG = new R.Graph();
this.actorPrev = null;
this.actorOutputs = [];
this.rewardHistory = [];
this.actorActions = [];
this.baselineLSTM = R.initLSTM(this.ns, [this.nhb], 1);
this.baselineG = new R.Graph();
this.baselinePrev = null;
this.baselineOutputs = [];
this.t = 0;
this.r0 = null;
this.s0 = null;
this.s1 = null;
this.a0 = null;
this.a1 = null;
},
act: function(slist) {
// convert to a Mat column vector
var s = new R.Mat(this.ns, 1);
s.setFrom(slist);
// forward the LSTM to get action distribution
var actorNext = R.forwardLSTM(this.actorG, this.actorLSTM,
[this.nh], s, this.actorPrev);
this.actorPrev = actorNext;
var amat = actorNext.o;
this.actorOutputs.push(amat);
// forward the baseline LSTM
var baselineNext = R.forwardLSTM(this.baselineG,
this.baselineLSTM, [this.nhb], s, this.baselinePrev);
this.baselinePrev = baselineNext;
this.baselineOutputs.push(baselineNext.o);
// sample action from actor policy
var gaussVar = 0.05;
var a = R.copyMat(amat);
for (var i = 0, n = a.w.length; i < n; i++) {
a.w[0] += R.randn(0, gaussVar);
a.w[1] += R.randn(0, gaussVar);
}
this.actorActions.push(a);
// shift state memory
this.s0 = this.s1;
this.a0 = this.a1;
this.s1 = s;
this.a1 = a;
return a;
},
learn: function(r1) {
// perform an update on Q function
this.rewardHistory.push(r1);
var n = this.rewardHistory.length;
var baselineMSE = 0.0;
var nup = 100; // what chunk of experience to take
var nuse = 80; // what chunk to also update
if (n >= nup) {
// lets learn and flush
// first: compute the sample values at all points
var vs = [];
for (var t = 0; t < nuse; t++) {
var mul = 1;
var V = 0;
for (var t2 = t; t2 < n; t2++) {
V += mul * this.rewardHistory[t2];
mul *= this.gamma;
if (mul < 1e-5) {
break;
} // efficiency savings
}
var b = this.baselineOutputs[t].w[0];
// todo: take out the constants etc.
for (var i = 0; i < this.na; i++) {
// [the action delta] * [the desirebility]
var update = -(V - b) *
(this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
if (update > 0.1) {
update = 0.1;
}
if (update < -0.1) {
update = -0.1;
}
this.actorOutputs[t].dw[i] += update;
}
var update = -(V - b);
if (update > 0.1) {
update = 0.1;
}
if (update < 0.1) {
update = -0.1;
}
this.baselineOutputs[t].dw[0] += update;
baselineMSE += (V - b) * (V - b);
vs.push(V);
}
baselineMSE /= nuse;
this.actorG.backward(); // update params! woohoo!
this.baselineG.backward();
R.updateNet(this.actorLSTM, this.alpha); // update actor
network
R.updateNet(this.baselineLSTM, this.beta); // update
baseline network
// flush
this.actorG = new R.Graph();
this.actorPrev = null;
this.actorOutputs = [];
this.rewardHistory = [];
this.actorActions = [];
this.baselineG = new R.Graph();
this.baselinePrev = null;
this.baselineOutputs = [];
this.tderror = baselineMSE;
}
this.t += 1;
this.r0 = r1; // store for next update
},
}
// Currently buggy implementation, doesnt work
var DeterministPG = function(env, opt) {
this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount
factor
this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy
policy
this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
this.env = env;
this.reset();
}
DeterministPG.prototype = {
reset: function() {
this.ns = this.env.getNumStates();
this.na = this.env.getMaxNumActions();
this.nh = 100; // number of hidden units
// actor
this.actorNet = {};
this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
this.actorNet.W2 = new R.RandMat(this.na, this.ns, 0, 0.1);
this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
this.ntheta = this.na * this.ns + this.na; // number of params in
actor
// critic
this.criticw = new R.RandMat(1, this.ntheta, 0, 0.01); // row
vector
this.r0 = null;
this.s0 = null;
this.s1 = null;
this.a0 = null;
this.a1 = null;
this.t = 0;
},
forwardActor: function(s, needs_backprop) {
var net = this.actorNet;
var G = new R.Graph(needs_backprop);
var a1mat = G.add(G.mul(net.W1, s), net.b1);
var h1mat = G.tanh(a1mat);
var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
return {
'a': a2mat,
'G': G
}
},
act: function(slist) {
// convert to a Mat column vector
var s = new R.Mat(this.ns, 1);
s.setFrom(slist);
// forward the actor to get action output
var ans = this.forwardActor(s, false);
var amat = ans.a;
var ag = ans.G;
// sample action from the stochastic gaussian policy
var a = R.copyMat(amat);
if (Math.random() < this.epsilon) {
var gaussVar = 0.02;
a.w[0] = R.randn(0, gaussVar);
a.w[1] = R.randn(0, gaussVar);
}
var clamp = 0.25;
if (a.w[0] > clamp) a.w[0] = clamp;
if (a.w[0] < -clamp) a.w[0] = -clamp;
if (a.w[1] > clamp) a.w[1] = clamp;
if (a.w[1] < -clamp) a.w[1] = -clamp;
// shift state memory
this.s0 = this.s1;
this.a0 = this.a1;
this.s1 = s;
this.a1 = a;
return a;
},
utilJacobianAt: function(s) {
var ujacobian = new R.Mat(this.ntheta, this.na);
for (var a = 0; a < this.na; a++) {
R.netZeroGrads(this.actorNet);
var ag = this.forwardActor(this.s0, true);
ag.a.dw[a] = 1.0;
ag.G.backward();
var gflat = R.netFlattenGrads(this.actorNet);
ujacobian.setColumn(gflat, a);
}
return ujacobian;
},
learn: function(r1) {
// perform an update on Q function
//this.rewardHistory.push(r1);
if (!(this.r0 == null)) {
var Gtmp = new R.Graph(false);
// dpg update:
// first compute the features psi:
// the jacobian matrix of the actor for s
var ujacobian0 = this.utilJacobianAt(this.s0);
// now form the features \psi(s,a)
var psi_sa0 = Gtmp.mul(ujacobian0, this.a0); // should be
[this.ntheta x 1] "feature" vector
var qw0 = Gtmp.mul(this.criticw, psi_sa0); // 1x1
// now do the same thing because we need \psi(s_{t+1},
\mu\_\theta(s\_t{t+1}))
var ujacobian1 = this.utilJacobianAt(this.s1);
var ag = this.forwardActor(this.s1, false);
var psi_sa1 = Gtmp.mul(ujacobian1, ag.a);
var qw1 = Gtmp.mul(this.criticw, psi_sa1); // 1x1
// get the td error finally
var tderror = this.r0 + this.gamma * qw1.w[0] -
qw0.w[0]; // lol
if (tderror > 0.5) tderror = 0.5; // clamp
if (tderror < -0.5) tderror = -0.5;
this.tderror = tderror;
// update actor policy with natural gradient
var net = this.actorNet;
var ix = 0;
for (var p in net) {
var mat = net[p];
if (net.hasOwnProperty(p)) {
for (var i = 0, n = mat.w.length; i < n; i++) {
mat.w[i] += this.alpha *
this.criticw.w[ix]; // natural gradient update
ix += 1;
}
}
}
// update the critic parameters too
for (var i = 0; i < this.ntheta; i++) {
var update = this.beta * tderror * psi_sa0.w[i];
this.criticw.w[i] += update;
}
}
this.r0 = r1; // store for next update
},
}
// exports
global.DPAgent = DPAgent;
global.TDAgent = TDAgent;
global.DQNAgent = DQNAgent;
//global.SimpleReinforceAgent = SimpleReinforceAgent;
//global.RecurrentReinforceAgent = RecurrentReinforceAgent;
//global.DeterministPG = DeterministPG;
})(RL);
var Trevel = {
//settings you can change
stop: true,
maxBet: 0.00001,
minBet: 0.00000002,
swap: true,
betSpeed: 2,//change this on init
verbose: true,
isTesting: false,
showEvery:100,//log details to console after bets if verbose is false
seedEvery:100, // change client seed after?
//money management
useKelly: false,//martingale performs better on live account!
kellyPercent: 5, //can't be more than 100 or less than 1
useMartingale: true, //if kelly is true this won't work
martingaleMultiplier: 2,
//bot settings, these are set automatically don't bother
currentBalance: 0,
startingBalance: 0,
betAmount: 0,
profit: 0,
totalBets: 0,
totalWins: 0,
winRate: 0,
betHistory: [], //this is a sequence of all winning bets not the sequence of
bets we placed
betOutcomes: [],
hbProbability: 0,
lbProbability: 0,
hbCount: 0,
lbcount: 0,
nextBet: "",
previousReward:0,
nextLog:0,
addBet: function(bet, outcome) {
if (bet === "LB" && outcome === "Win") {
Trevel.betHistory.push("LO");
Trevel.betOutcomes.push("W");
Trevel.totalWins++;
Trevel.lbcount++;
}
if (bet === "LB" && outcome === "Loose") {
Trevel.betHistory.push("HI");
Trevel.hbCount++;
Trevel.betOutcomes.push("L");
}
if (bet === "HB" && outcome === "Win") {
Trevel.betHistory.push("HI");
Trevel.totalWins++;
Trevel.hbCount++;
Trevel.betOutcomes.push("W");
}
if (bet === "HB" && outcome === "Loose") {
Trevel.betHistory.push("LO");
Trevel.lbcount++;
Trevel.betOutcomes.push("L");
}
Trevel.totalBets++;
},
calculateProbabilities: function() {
Trevel.hbProbability = Trevel.hbCount / Trevel.betHistory.length;
Trevel.lbProbability = Trevel.lbcount / Trevel.betHistory.length;
Trevel.winRate = Trevel.totalWins / Trevel.totalBets;
if(Trevel.isTesting===false){
Trevel.profit = Trevel.getProfit();
}
},
getCurrentBalance: function() {
return parseFloat($('#balance').html());
},
placeHighBet: function() {
$('#double_your_btc_bet_hi_button').click();
},
placeLowBet: function() {
$('#double_your_btc_bet_lo_button').click();
},
setBetAmount: function(amount) {
var elem = document.getElementById("double_your_btc_stake");
elem.value = amount;
},
changeSeed:function(){
$('#next_client_seed').val(Trevel.getNewSeed());
},
getNewSeed:function(){
var result = '';
var length = 16;
var chars =
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz';
for (var i = length; i > 0; --i) {
result += chars[Math.floor(Math.random() * chars.length)];
}
return result;
},
setOutcome: function(bet) {
if ($('#double_your_btc_bet_lose').html() !== '') {
Trevel.addBet(bet, "Loose");
} else {
Trevel.addBet(bet, "Win");
}
},
prepareBet: function() {
Trevel.calculateProbabilities();
if (Trevel.betHistory.length < 10) {
if (Trevel.useMartingale === true && Trevel.betHistory.length>12)
{
if ($('#double_your_btc_bet_lose').html() !== '' &&
parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier <
Trevel.maxBet) {
Trevel.setBetAmount((parseFloat($
('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier).toFixed(8));
} else {
Trevel.setBetAmount(Trevel.minBet);
}
}
} else {
if (Trevel.useKelly === true && Trevel.betHistory.length>12) {
Trevel.currentBalance = Trevel.getCurrentBalance();
var currMulty =
document.getElementById("double_your_btc_payout_multiplier").value;
var kellyAmount = (((Trevel.currentBalance *
Trevel.kellyPercent) / 100) * ((Trevel.winRate * currMulty - 1)) / (currMulty -
1)).toFixed(8);
if (kellyAmount > 0 && kellyAmount < Trevel.maxBet) {
Trevel.setBetAmount(kellyAmount);
} else {
Trevel.setBetAmount(Trevel.minBet);
}
} else if (Trevel.useMartingale === true &&
Trevel.betHistory.length>12) {
if ($('#double_your_btc_bet_lose').html() !== '' &&
parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier <
Trevel.maxBet) {
Trevel.setBetAmount((parseFloat($
('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier).toFixed(8));
} else {
Trevel.setBetAmount(Trevel.minBet);
}
}
}
},
placeBet: function() {
if (Trevel.nextBet === "HB") {
Trevel.placeHighBet();
} else if (Trevel.nextBet === "LB") {
Trevel.placeLowBet();
} else if (Trevel.betHistory.length > 0 && Trevel.swap === true) {
var prev = Trevel.betHistory[Trevel.betHistory.length - 1];
if (prev === "LO") {
Trevel.placeHighBet();
} else {
Trevel.placeLowBet();
}
} else {
Trevel.placeLowBet();
}
},
getProfit: function() {
return (Trevel.getCurrentBalance() -
Trevel.startingBalance).toFixed(8);
},
getNumStates: function() {
return 8;
},
getMaxNumActions: function() {
return 2;
},
getSentiment: function(bet) {
if (bet === "HI") {
return 1;
} else {
return 0;
}
},
getPreviousBets: function() {
var hist = [];
if (Trevel.betHistory.length > 12) {

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
1]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
2]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
3]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
4]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
5]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
6]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
7]));

hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length -
8]));
} else {
hist = [0, 1, 0, 1, 0, 1, 0, 1]; //incase we just started...
}
return hist;
},
getAgentState: function() { //we'll observe the last 8 bets
var s = Trevel.getPreviousBets();
return s;
},
getReward: function() {
var reward = 0;
var out1=Trevel.betOutcomes[Trevel.betOutcomes.length - 1];
var out2=Trevel.betOutcomes[Trevel.betOutcomes.length - 2];
if(out1==="L"){
if(Trevel.previousReward<0){
reward=Trevel.previousReward;
reward+=-0.03;
if(out2==="L"){
reward+=-0.03;
}
}
else{
reward=-0.03;
if(out2==="L"){
reward+=-0.03;
}
}
}
else{
if(Trevel.previousReward>0){
reward=Trevel.previousReward;
reward+=0.01;
if(out2==="W"){
reward+=0.01;
}
}
else{
reward=0.01;
if(out2==="W"){
reward+=0.01;
}
}
}
return reward;
},
//initialize Trevel
init: function() {
Trevel.startingBalance = Trevel.currentBalance = parseFloat($
('#balance').html());
Trevel.setBetAmount(Trevel.minBet);
Trevel.stop = true;
Trevel.swap = true;
Trevel.betSpeed=3000;
Trevel.nextLog=Trevel.showEvery;
}
};
RefreshPageAfterFreePlayTimerEnds = function() {};
//Deep Q learning with reinforceJS
var spec = {}
spec.update = 'qlearn';
spec.gamma = 0.9;
//if you are loading a trainned network the values here should match your trainning
values
spec.epsilon = 0.20;
spec.alpha = 0.1;
spec.experience_add_every = 5;
spec.experience_size = 999999;
spec.learning_steps_per_iteration = 5;
spec.tderror_clamp = 1.0;
spec.num_hidden_units = 100;
// create an environment object
var env = Trevel;
var rolls=0;
if (env.isTesting === false) {
env.init();
}
// create the DQN agent
agent = new RL.DQNAgent(env, spec);
setInterval(function() {
if (env.stop === false) {
var state = env.getAgentState();
var action = agent.act(state);
var outcome = "";
if (env.isTesting === false) {
if (action === 0) {
env.nextBet = "LB";
env.prepareBet();
env.placeBet();
env.setOutcome("LB");
outcome = env.betOutcomes[env.betOutcomes.length - 1];
} else if (action === 1) {
env.nextBet = "HB";
env.prepareBet();
env.placeBet();
env.setOutcome("HB");
outcome = env.betOutcomes[env.betOutcomes.length - 1];
}
rolls++;
if(rolls>=env.seedEvery){
env.changeSeed();
}
env.calculateProbabilities();
if (env.verbose === true) {
console.log("Profit: " + env.profit+" WinRate: " +
(env.winRate*100).toFixed(2)+" Bets: "+env.betHistory.length);
}
else{
if(env.nextLogs>=env.showEvery){
console.log("Profit: " + env.profit+" WinRate: " +
(env.winRate*100).toFixed(2)+" Bets: "+env.betHistory.length);
env.nextLog+=env.showEvery;
}
}
}
else {
console.log("To test this bot or Train a network use free-
simulator.js");
env.stop=true;
}
var reward = env.getReward();
if (reward == 0) {
if (outcome === "L") {
reward = -0.03;
} else {
reward = 0.01;
}
}
agent.learn(reward);
env.previousReward=reward;
}
}, env.betSpeed);
//type all this in console press enter after every line
env.maxBet=0.0001;// set your maximum bet in BTC
env.minBet=0.00000002;//set your minimum bet in BTC cannot be less than 0.00000001
env.showEvery=100;//Log betting info to console after
env.seedEvery=100;//change seed after n bets
env.useMartingale=true;// martingale?
env.martingaleMultiplier=2;//multiplier used to increase balance on loose
env.stop=false;//start betting

You might also like

Ultimate Cracking Guide PDF
Document88 pages
Ultimate Cracking Guide PDF
harsh
40% (5)
Poop
Document1 page
Poop
Vance Hurd
No ratings yet
Problem Solving-3
Document24 pages
Problem Solving-3
Mojam Haque
0% (1)
Double Bitcoin Bet Script
Document1 page
Double Bitcoin Bet Script
Ma Cristina Domingo Bulagao
No ratings yet
Double BTC betting game logic
Document3 pages
Double BTC betting game logic
anon_333608388
67% (3)
0.002 BTC Script
Document1 page
0.002 BTC Script
jiy kirs
No ratings yet
Edoc - Pub Freebitcoin Script Ultimate by Hax3rz Teamtxt
Document2 pages
Edoc - Pub Freebitcoin Script Ultimate by Hax3rz Teamtxt
Chandima Senarathna
No ratings yet
Script Hack Multiply Freebitco - in
Document16 pages
Script Hack Multiply Freebitco - in
Nurlykhan Omarov
No ratings yet
Roll
Document6 pages
Roll
FADIL N
No ratings yet
Otp Bypass
Document2 pages
Otp Bypass
jajaj monese
0% (1)
Queuing Theory PDF
Document38 pages
Queuing Theory PDF
Nelson Fica
No ratings yet
Freebitco.in Low Balance Script
Document7 pages
Freebitco.in Low Balance Script
logie0726
No ratings yet
Roll Script
Document2 pages
Roll Script
FADIL N
100% (2)
Freebitco - in MUltiply Long Time Script 2021
Document3 pages
Freebitco - in MUltiply Long Time Script 2021
BIT BOT
No ratings yet
Freebitco - in NO Captcha WITH LOTTERY Auto Roll + 100 Reward Points + 1000% Bonus BTC
Document4 pages
Freebitco - in NO Captcha WITH LOTTERY Auto Roll + 100 Reward Points + 1000% Bonus BTC
Internet To Cash
No ratings yet
FreeBitcoin Script Roll 10000
Document2 pages
FreeBitcoin Script Roll 10000
Keivin Guevara GK
0% (1)
Freebitcoin Autoclaim Play Without Captcha + Force Refresh Page
Document1 page
Freebitcoin Autoclaim Play Without Captcha + Force Refresh Page
husam
No ratings yet
Activated script manipulates page elements
Document2 pages
Activated script manipulates page elements
Maiky Brothers
No ratings yet
Low Balance Script +256782037517
Document3 pages
Low Balance Script +256782037517
okanya
No ratings yet
Freebitco - in Script 2020
Document1 page
Freebitco - in Script 2020
Elizabeth Quiñonez
No ratings yet
Refresh Table with Game Data
Document1 page
Refresh Table with Game Data
adam
0% (1)
Bitsler Script.0.004 Amazing Script
Document2 pages
Bitsler Script.0.004 Amazing Script
abhinav
29% (7)
Western Union Carding / E.x.p.l.o.i.t: +tutorial by Hackfreaks +
Document4 pages
Western Union Carding / E.x.p.l.o.i.t: +tutorial by Hackfreaks +
jajaj monese
100% (3)
G2A Method
Document10 pages
G2A Method
jajaj monese
No ratings yet
User Manual Installation & Settings
Document18 pages
User Manual Installation & Settings
jajaj monese
No ratings yet
Mathcounts Bible
Document7 pages
Mathcounts Bible
Nicole Yang
100% (1)
Problem Set 1
Document6 pages
Problem Set 1
SVR07
No ratings yet
Roll 10000
Document2 pages
Roll 10000
Jr Andry
100% (1)
10 Satoshis
Document2 pages
10 Satoshis
Alaaali
100% (2)
Hash
Document5 pages
Hash
Arsalan Raza
100% (1)
Roll Script
Document2 pages
Roll Script
FADIL N
0% (2)
Freebitco in Script Trick 2020 Txt12
Document4 pages
Freebitco in Script Trick 2020 Txt12
Android TVBox
No ratings yet
Freebitco
Document3 pages
Freebitco
Basith Y
No ratings yet
Licensed Script Freebitcoin 10000 Rolltxtpdf
Document2 pages
Licensed Script Freebitcoin 10000 Rolltxtpdf
foxinthebox.idn
No ratings yet
Freebitco in 10000 Script 1
Document4 pages
Freebitco in 10000 Script 1
guestavo designs
0% (1)
PDF Script LTC DL
Document2 pages
PDF Script LTC DL
Dacio Messias
No ratings yet
Idoc - Pub Freebitcoin Script Roll 10000
Document2 pages
Idoc - Pub Freebitcoin Script Roll 10000
jean
33% (3)
Script For Multiply BTC
Document3 pages
Script For Multiply BTC
Aldrin John
No ratings yet
Freebitco in 10000 Script
Document2 pages
Freebitco in 10000 Script
Williams
100% (1)
BTConline Hack My Script
Document1 page
BTConline Hack My Script
Manikantan
0% (2)
Script Freedogecoin Win Jackpot
Document1 page
Script Freedogecoin Win Jackpot
BlackID Corp
50% (2)
Skrypt Free Litecoin 2021
Document2 pages
Skrypt Free Litecoin 2021
Justyna Wiśniewska
No ratings yet
Script 1 Bitsler
Document2 pages
Script 1 Bitsler
MastersBestHackers
25% (8)
Freebitco
Document3 pages
Freebitco
Dodyk
No ratings yet
JavaScript mining terminal simulator with timer
Document2 pages
JavaScript mining terminal simulator with timer
Nancy Dsouza
100% (2)
Best Bitsler Bot
Document7 pages
Best Bitsler Bot
Ben Aepli
50% (4)
Working 9999 Roll Script But Needs Work
Document2 pages
Working 9999 Roll Script But Needs Work
Andy Mac
No ratings yet
Free Bitcoin Bot Logs
Document7 pages
Free Bitcoin Bot Logs
António
No ratings yet
Freebitcoin Script Roll 10000 - Compress
Document2 pages
Freebitcoin Script Roll 10000 - Compress
The Amaze Lifestyle
No ratings yet
New JavaScript Document
Document1 page
New JavaScript Document
SVolo Stan
No ratings yet
Reward 22
Document2 pages
Reward 22
Islam Atef
No ratings yet
FreeBitcoin 10000 Roll Fake Script
Document2 pages
FreeBitcoin 10000 Roll Fake Script
smohsen58
No ratings yet
Auto Roll Free Bit Co
Document2 pages
Auto Roll Free Bit Co
Reza Fauzan
0% (4)
Freebitcoin Script Roll 10000
Document2 pages
Freebitcoin Script Roll 10000
Qahtan Maqhat
100% (1)
Qưeqe
Document5 pages
Qưeqe
Lê Hữu Hóa
100% (2)
бот
Document6 pages
бот
Сергей Ильяшенко
No ratings yet
Free Bico
Document5 pages
Free Bico
Anonymous jXUctqti
0% (3)
Merge Sort Using Divide and Conquer
Document15 pages
Merge Sort Using Divide and Conquer
thatissubbu
No ratings yet
HPC_Output
Document12 pages
HPC_Output
Shubham Rokade
No ratings yet
Bls Decrypted
Document264 pages
Bls Decrypted
paris9370
No ratings yet
Practicals AA
Document17 pages
Practicals AA
TECH TALKS
No ratings yet
Recover Shell
Document238 pages
Recover Shell
dika
No ratings yet
Codes 01
Document26 pages
Codes 01
Kamlesh Mali
No ratings yet
Freebitco - in V 4.5.2
Document10 pages
Freebitco - in V 4.5.2
john suwandi
No ratings yet
Message
Document313 pages
Message
กันตภณ หนิ้วยะวงค์
No ratings yet
ORACLE - DSA Top Interview Questions & Solutions: Prashant Kumar
Document54 pages
ORACLE - DSA Top Interview Questions & Solutions: Prashant Kumar
A02Arnab Biswas
No ratings yet
Vns Institute of Technology: 2010-2011 Department of Computer Science & Engineering
Document53 pages
Vns Institute of Technology: 2010-2011 Department of Computer Science & Engineering
rohits3333
50% (2)
SCP4
Document18 pages
SCP4
alexserban150
No ratings yet
Perfectly Balanced Chapter 2 Anand Oza Anandoza Source Code
Document10 pages
Perfectly Balanced Chapter 2 Anand Oza Anandoza Source Code
Dj
No ratings yet
Disclosure Ownership Form New
Document3 pages
Disclosure Ownership Form New
joesphsteel
No ratings yet
Smart Card Security @MegaPack
Document139 pages
Smart Card Security @MegaPack
jajaj monese
No ratings yet
Carding Tutorial With Sites and Bins
Document1 page
Carding Tutorial With Sites and Bins
jajaj monese
100% (1)
Key Browser
Document1 page
Key Browser
jajaj monese
No ratings yet
Licenta Danemarca
Document47 pages
Licenta Danemarca
Alex Beach
No ratings yet
MATH 146 Linear Algebra 1 Assignment 9
Document2 pages
MATH 146 Linear Algebra 1 Assignment 9
user2357
No ratings yet
M. Tech Structural Engineering PDF
Document65 pages
M. Tech Structural Engineering PDF
Pathan NagulMeera
No ratings yet
C2 Practice Paper A2
Document5 pages
C2 Practice Paper A2
Brianna Amechi
No ratings yet
Generalized Pareto Distribution
Document7 pages
Generalized Pareto Distribution
Sri Muslihah Bakhtiar
No ratings yet
Maths 1-6 Test Q
Document7 pages
Maths 1-6 Test Q
Narissa Patel
No ratings yet
Normalization of The Wavefunction
Document4 pages
Normalization of The Wavefunction
Zulfiqar Ali
No ratings yet
CS229 Lecture 2 PDF
Document48 pages
CS229 Lecture 2 PDF
Amr Abbas
No ratings yet
SNM Question Bank 1
Document3 pages
SNM Question Bank 1
vima210421
No ratings yet
Gujarat Technological University
Document1 page
Gujarat Technological University
Bhautik Daxini
No ratings yet
14, 42, 61 Or1 LP
Document120 pages
14, 42, 61 Or1 LP
Paola Pecolera
100% (1)
Heat Eqn (1 Dimension) (2nd BVP)
Document3 pages
Heat Eqn (1 Dimension) (2nd BVP)
Riyad Mohammed
No ratings yet
1 - Counting Techniques, Permutation, Combination - New
Document96 pages
1 - Counting Techniques, Permutation, Combination - New
Marc Rivera
No ratings yet
Chapter 3 - Time Response Analysis
Document31 pages
Chapter 3 - Time Response Analysis
ANDREW LEONG CHUN TATT STUDENT
No ratings yet
112
Document10 pages
112
Memo Ly
No ratings yet
Vdoc - Pub Linear-Analysis
Document67 pages
Vdoc - Pub Linear-Analysis
vahid
No ratings yet
DSIP Comp IT Viva Questions PDF
Document25 pages
DSIP Comp IT Viva Questions PDF
PrithvirajKshatriya-Badgujar
No ratings yet
Algebra and Trigonometry 6th Edition Blitzer Solutions Manual
Document26 pages
Algebra and Trigonometry 6th Edition Blitzer Solutions Manual
HeatherBrownjawf
100% (50)
Mathematics: Pearson Edexcel Level 3 GCE
Document32 pages
Mathematics: Pearson Edexcel Level 3 GCE
jack murairwa
No ratings yet
QUADRATICS Notes and Homework Worksheets For TEAM
Document21 pages
QUADRATICS Notes and Homework Worksheets For TEAM
Trixia
No ratings yet
Statistical Inference Estimators
Document82 pages
Statistical Inference Estimators
Syed Umar Hayat Shirazi Al-Hashmi
No ratings yet
Amcat Question QUANTITATIVE ABILITY Papers-2. - FresherLine - Jobs, Recruitment, Fresher - Fresherline
Document2 pages
Amcat Question QUANTITATIVE ABILITY Papers-2. - FresherLine - Jobs, Recruitment, Fresher - Fresherline
Prashant Kumar Tiwari
No ratings yet
Prof. Ashutosh Kar's Linear Programming Lecture
Document27 pages
Prof. Ashutosh Kar's Linear Programming Lecture
SUBHANKAR PANDA
No ratings yet
Cylinder Rolling Inside Another Rolling Cylinder
Document6 pages
Cylinder Rolling Inside Another Rolling Cylinder
সামিন সালেক
No ratings yet
Mean Shift
Document5 pages
Mean Shift
joseph676
No ratings yet
Recurrence Relation: Fall 2002 CMSC 203 - Discrete Structures 1
Document23 pages
Recurrence Relation: Fall 2002 CMSC 203 - Discrete Structures 1
stumpydumpty
No ratings yet
Lab2 Signals
Document4 pages
Lab2 Signals
Melih Atasever
No ratings yet