Dynamic variables in Node.js
For those of you unfamiliar with it, Common Lisp has this concept of "dynamic variables", which are nothing more than _global_ variables with _dynamic_ scope; but what does it mean for a variable to have dynamic scope? That you can bind a new a value to it, and all the _subsequent_ accesses to that variable (within the scope of the binding operation), will return _that_ new value instead of the _previous_ one. Let's take a look at an example, as I hope it will make things a little bit easier to understand (the asterisks around a variable name indeed mark the variable as dynamic / special):
(defvar *x* 5)
(defun foo ()
*x*)
(defun bar ()
(let ((*x* 42))
(foo)))
> *x*
5
> (foo)
5
> (bar)
42
> *x*
5
Step by step explanation:
- `*x*` was initially bound to `5`, so when we access it we get `5` back -- all good
- We call FOO, which all it does is returning the value of `*x*`, and without much surprise we get `5` back -- so all good
- We call BAR, which binds `*x*` to `42` before calling FOO and returning its return value, and interestingly enough we now get back `42` instead of `5` -- there it is the _dynamic_ scope I was talking about earlier
- Lastly, we access `*x*` and we get `5` back -- we are outside BAR's new binding scope, so the value of `*x*` has been restored to its previous value
Let's go back the definition that I gave earlier: _global_ variables, with _dynamic_ scope. I know that the first rule of global variables is: "thou shalt not use global variables", but it's just that _sometimes_ they appear to be right tool for the job, especially in the context of Web applications; think about the following use cases:
- getting a hold of the currently logged in user
- automatically adding the content of the `X-Request-Id` header to each log trace
- querying the _right_ database, based on the logged in user's tenant
How would you implement these? Either you shove all this information up into a context object, and pass it around, _everywhere_; or maybe you forget about all the bad things you read about global variables, and consciously and carefully agree to use them where it really matters, where it really makes a difference.
Anyways, enough talking about global variables, I do not want this to be an essay about its pros or cons (I am sure the Internet is full of material and opinions about it); instead, let's go back to our original goal: trying to implement dynamic variables in Node.js.
It all begins with the definition of a type representing a bag of bindings (we are going to wrap the standard `Map` type for this):
var Bindings = function (kvpairs) {
this.data = new Map(kvpairs);
};
> new Bindings().data
Map(0) {}
> new Bindings([['a', 1]]).data
Map(1) { 'a' => 1 }
> new Bindings([['a', 1], ['b', 2]]).data
Map(2) { 'a' => 1, 'b' => 2 }
(Don't worry about that _ugly_ syntax for now, we will deal with it later)
Getting a binding (i.e. getting the value bound to a variable), should be as easy as calling `Map.prototype.get` (plus some additional logic to validate user input):
Bindings.prototype.get = function (name) {
assert.ok(name, `Dynamic variable name, invalid: ${name}`);
if (!this.data.has(name)) {
throw new Error(`Dynamic variable, unbound: '${name}'`);
}
return this.data.get(name);
};
First we check that the name of the variable is indeed _valid_, then we confirm that a binding for that variable actually exists, and finally we return the value bound to that variable. Let's play with it an confirm it's all working fine:
var bb = new Bindings([['a', 1], ['b', 2]])
> bb.get('a')
1
> bb.get('b')
2
> bb.get('c')
Uncaught Error: Dynamic variable, unbound: 'c'
at getBinding (repl:4:11)
> bb.get()
Uncaught AssertionError [ERR_ASSERTION]: Dynamic variable name, invalid: undefined
at getBinding (repl:2:10)
at repl:1:1
at Script.runInThisContext (vm.js:131:18)
at REPLServer.defaultEval (repl.js:472:29)
at bound (domain.js:430:14)
at REPLServer.runBound [as eval] (domain.js:443:12)
at REPLServer.onLine (repl.js:794:10)
at REPLServer.emit (events.js:326:22)
at REPLServer.EventEmitter.emit (domain.js:486:12)
at REPLServer.Interface._onLine (readline.js:337:10) {
generatedMessage: false,
code: 'ERR_ASSERTION',
actual: undefined,
expected: true,
operator: '=='
}
Lastly, to set new bindings, we will go about and create a new `Binding` object and initialize it with the existing bindings and the newly defined ones..._merged_ together:
Bindings.prototype.set = function (kvpairs) {
return new Bindings([...this.data, ...kvpairs]);
};
Again, let's can play with this to confirm that it's all working as expected:
var bb = new Bindings([['a', 1], ['b', 2]])
> bb.set([['c', 3]]).data
Map(3) { 'a' => 1, 'b' => 2, 'c' => 3 }
> bb.set([['a', 3]]).data
Map(3) { 'a' => 3, 'b' => 2 }
> bb.data
Map(2) { 'a' => 1, 'b' => 2 }
All good, great! Before we dive into the details of the implementation of dynamic variables, let's first implement a couple of functions which will come in handy down the line. The first one is to simplify the _syntax_ for creating new bindings; we don't want users to specify new bindings via nested lists (i.e. `[['a', 1], ['b', 2]]`); instead, we would like them to use a _flattened_ list instead (i.e. `['a', 1, 'b', 2]`):
function parseKVPairs(flatBindings) {
assert.ok(
flatBindings.length % 2 === 0,
`Bindings arguments, expected even number of elements, but got: ${flatBindings.length}`
);
const kvpairs = [];
for (var i = 0; i < flatBindings.length; i += 2) {
kvpairs.push(flatBindings.slice(i, i + 2));
}
return kvpairs;
};
Nothing crazy about this: we first confirm that the number of bindings is indeed even, and then wrap every pair of adjacent elements into a nested list. Let's give it a go:
> parseKVPairs([])
[]
> parseKVPairs(['a', 1])
[ [ 'a', 1 ] ]
> parseKVPairs(['a', 1, 'b', 2])
[ [ 'a', 1 ], [ 'b', 2 ] ]
Perfect! The second utility function might look a bit cryptic at first, simply because I am yet to show you what is the problem that it tries to solve, but hopefully soon it will all make more sense. We want to feed it with a list of elements representing a flattened list of bindings followed by a callback function, and we expect it to return a list whose first element is a list of key-value pairs created from the list of flattened bindings, and the second is the given callback function:
function parseDynamicEnvironmentSetArguments(args) {
assert.ok(args, `Function arguments, invalid: ${args}`);
assert.ok(
args.length % 2 === 1,
`Function arguments, expected odd number of elements, but got: ${args.length}`
);
const kvpairs = parseKVPairs(args.slice(0, args.length - 1));
const body = args[args.length - 1];
return [kvpairs, body];
};
> parseDynamicEnvironmentSetArguments([() => {}])
[ [], [Function (anonymous)] ]
> parseDynamicEnvironmentSetArguments(['a', 1, () => {}])
[ [ [ 'a', 1 ] ], [Function (anonymous)] ]
> parseDynamicEnvironmentSetArguments(['a', 1, 'b', 2, () => {}])
[ [ [ 'a', 1 ], [ 'b', 2 ] ], [Function (anonymous)] ]
Alright, it's all working as expected, and with all of this defined and taken care for, it's time we took a look at a possible implementation for a _dynamic environment_, i.e. environment getting a hold of a bunch of dynamic variables.
The biggest challenge in implementing dynamic variables in Node.js is figuring out a way to persist _state_ across changes of asynchronous context: you set `*x*` to a `5`, invoke `setTimeout`, and when the callback is invoked you expect `*x*` to still be bound to `5`. Similarly, if two asynchronous operations happen to re-bind the same dynamic variable, you don't any of them to step on each others toes.
Luckily for us, the Node.js core team has been working on this _problem_ for quite some time now, and you can see the result of their effort in the [`async_hooks`](https://nodejs.org/api/async_hooks.html) module. I am not going to bore you with its implementation details (mostly because I am not familiar with it myself), but for what we are trying to achieve here, all we need to know is that:
- Each piece of running code (_user_ code), can have an ID attached, identifying its asynchronous execution context
- Each piece of running code (_user_ code), can have another ID attached, identifying the asynchronous context that _triggered_, directly or indirectly, the current one (i.e. if you create three nested promises, each callback, when executed, will probably have a different `asyncId` value but same `triggerAsyncId` one)
- There is a low-level API, [`createHooks`](https://nodejs.org/api/async_hooks.html#async_hooks_async_hooks_createhook_callbacks), that can be used to get notified when an asynchronous execution context is created or destroyed; with it, one could think of attaching some _payload_ to the current execution context, and then expose another API for user code to access it
- There is a high-level API, [`AsyncLocalStorage`](https://nodejs.org/api/async_hooks.html#async_hooks_class_asynclocalstorage), that shields the user from all the above complexity, and offers a simple way of running user code with a given piece of _payload_ attached to the current execution context
It goes without saying it that `AsyncLocalStorage` is what we will use to implement our dynamic environment:
- Getting a binding translates to getting a hold of the current execution context's payload (i.e. the bindings), and returning whichever value is currently bound to the given variable name
- Setting a binding translates to creating a new set of bindings, attaching it to the current execution context, and running user code within it -- old bindings will be automatically restored after user code (synchronous or asynchronous) has finished running
Alright, let's get our hands dirty. Let's start by creating a new type for the dynamic environment:
var { AsyncLocalStorage } = require("async_hooks");
var DynamicEnvironment = function (...flatBindings) {
this.ctx = new AsyncLocalStorage();
this.ctx.enterWith(new Bindings(parseKVPairs(flatBindings)));
};
Here, all we do, is creating the asynchronous context object (i.e. an instance of `AsyncLocalStorage`), and then initialize it with some user defined bindings (e.g. `'a', 1, 'b', 2`). Let's give it a go to see what happens when we call the constructor (note: `ctx.getStore()` is how you access the _payload_ of the current asynchronous context):
> new DynamicEnvironment().ctx.getStore()
Bindings { data: Map(0) {} }
> new DynamicEnvironment('a', 1).ctx.getStore()
Bindings { data: Map(1) { 'a' => 1 } }
> new DynamicEnvironment('a', 1, 'b', 2).ctx.getStore()
Bindings { data: Map(1) { 'a' => 1, 'b' => 2 } }
Let's now define a method to get the value of a specific binding (note how `Bindings`, our previously defined type, is doing all the heavy lifting here):
DynamicEnvironment.prototype.get = function (name) {
return this.ctx.getStore().get(name);
};
var env = new DynamicEnvironment('a', 1, 'b', 2)
> env.get('a')
1
> env.get('b')
2
> env.get('c')
Uncaught Error: Dynamic variable, unbound: 'c'
at Bindings.get (repl:4:11)
at DynamicEnvironment.get (repl:2:30)
> env.get()
Uncaught AssertionError [ERR_ASSERTION]: Dynamic variable name, invalid: undefined
at Bindings.get (repl:2:3)
at DynamicEnvironment.get (repl:2:30)
at repl:1:5
at Script.runInThisContext (vm.js:131:18)
at REPLServer.defaultEval (repl.js:472:29)
at bound (domain.js:430:14)
at REPLServer.runBound [as eval] (domain.js:443:12)
at REPLServer.onLine (repl.js:794:10)
at REPLServer.emit (events.js:326:22)
at REPLServer.EventEmitter.emit (domain.js:486:12) {
generatedMessage: false,
code: 'ERR_ASSERTION',
actual: undefined,
expected: true,
operator: '=='
}
The last piece of the puzzle, is a mean of setting a new binding (or bindings), and run some user code within the scope of these new bindings; thanks to `Bindings`, `AsyncLocalStorage`, and the cryptic `parseDynamicEnvironmentSetArguments` I showed you before, this could not have been any easier to implement:
DynamicEnvironment.prototype.set = function (...args) {
const [kvpairs, body] = parseDynamicEnvironmentSetArguments(args);
const bindings = this.ctx.getStore().set(kvpairs);
return this.ctx.run(bindings, body);
};
First we parse function arguments into key-value pairs and the callback inside of which the new bindings will be active; then we create a new `Bindings` object merging new bindings with any existing ones; lastly we tell `AsyncLocalStorage` to _do its magic_ (i.e. attach new bindings to the execution context, and run user code). Let's try this out, and see if it works or not:
async function test(body) {
try {
await body();
console.log("A-OK!!!");
} catch (err) {
console.error(err);
}
}
> test(async () => {
var env = new DynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => foo());
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.equal(await bar(), 42);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
A-OK!!!
It seems like it _is_ indeed working; but what if we added some asynchronous operations within the scope of the `set` call?
> test(async () => {
var env = new DynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => {
return new Promise((resolve) => {
setTimeout(() => resolve(foo()), 2000);
});
});
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.equal(await bar(), 42);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
A-OK!!!
Still working, great! What about multiple asynchronous operations at the same time?
> test(async () => {
var env = new DynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => {
return Promise.all([
foo(),
env.set(
"x",
52,
() =>
new Promise((resolve) => {
setTimeout(() => resolve(foo()), 1000);
})
),
env.set(
"x",
72,
() =>
new Promise((resolve) => {
setTimeout(() => resolve(foo()), 2000);
})
),
]);
});
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.deepEqual(await bar(), [42, 52, 72]);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
A-OK!!!
It works, and by the look of it it appears we were indeed able to implement "dynamic variables" in Node.js.
I added all the above into a new repository, [`dynamic_variables.js`](https://github.com/iamFIREcracker/dynamic_variables.js), so feel free to play with it in your REPLs and do let me know if anything breaks for you.
Also, it's worth remembering that `async_hooks` is still considered _experimental_, so its API might suddenly change with a new release of Node.js; well, that and the fact that the current implementation might still contain some nasty bugs that might cause your dynamic bindings to get lost across switches of execution context. This might not be a big deal if you were just to messing around with this, or if you were just planning to use this to enhance your logging capabilities; but if instead, you were planning anything more _serious_ than that, like selecting the "right" database connection based on the logged-in user's tenant, then I would strongly recommend that you tested as many execution paths as possible and confirmed that no binding got lost in the process. You know, it works...until it doesn't!
PS. For educational purposes, here I am going to show you a different implementation of a dynamic environment, one that does _not_ use `AsyncLocalStorage` to keep track of re-binds (it does that with a stack of _active_ bindings) and because of that, one that most surely is going to fail the expectations in case of multiple nested, asynchronous, re-binds:
var UnsafeDynamicEnvironment = function (...flatBindings) {
this.snapshots = [new Bindings(parseKVPairs(flatBindings))];
};
UnsafeDynamicEnvironment.prototype.get = function (name) {
return this._getActiveSnapshot().get(name);
};
UnsafeDynamicEnvironment.prototype.set = function (...args) {
const [kvpairs, body] = parseDynamicEnvironmentSetArguments(args);
const bindings = this._getActiveSnapshot().set(kvpairs);
return this._runWithBindings(bindings, body);
};
UnsafeDynamicEnvironment.prototype._getActiveSnapshot = function () {
return this.snapshots[this.snapshots.length - 1];
};
UnsafeDynamicEnvironment.prototype._runWithBindings = async function (bindings, body) {
this.snapshots.push(bindings);
try {
return await body();
} finally {
this.snapshots.pop();
}
};
> test(async () => {
var env = new UnsafeDynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => foo());
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.equal(await bar(), 42);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
A-OK!!!
> test(async () => {
var env = new UnsafeDynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => {
return new Promise((resolve) => {
setTimeout(() => resolve(foo()), 2000);
});
});
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.equal(await bar(), 42);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
A-OK!!!
> test(async () => {
var env = new UnsafeDynamicEnvironment("x", 5);
var foo = function () {
return env.get("x");
};
var bar = function () {
return env.set("x", 42, () => {
return Promise.all([
foo(),
env.set(
"x",
52,
() =>
new Promise((resolve) => {
setTimeout(() => resolve(foo()), 1000);
})
),
env.set(
"x",
72,
() =>
new Promise((resolve) => {
setTimeout(() => resolve(foo()), 2000);
})
),
]);
});
};
assert.equal(env.get("x"), 5);
assert.equal(foo(), 5);
assert.deepEqual(await bar(), [42, 52, 72]);
assert.equal(env.get("x"), 5);
});
Promise { <pending> }
AssertionError [ERR_ASSERTION]: Expected values to be loosely deep-equal:
[
42,
72,
52
]
should loosely deep-equal
[
42,
52,
72
]
at repl:34:14
at async test (repl:3:9) {
generatedMessage: true,
code: 'ERR_ASSERTION',
actual: [Array],
expected: [Array],
operator: 'deepEqual'
}