The Dyna-Q Algorithm
- model based temporal difference algorithm which uses real and simulated (planned) experience
- the model implements a (state, action) to (new state, reward) mapping
- real experience is sampled from the environment
- simulated experience is generated using already collected real experience
- both real and simulated experience are used to optimize the value function and policy of the agent
Dyna-Q Example Implementation
Please see my Svelte TD Learning Repository for the complete code and the interactive Gridworld Examples for more information.
let envModel =
Array.from({ length: numX }, () =>
Array.from({ length: numY }, () =>
Array.from({ length: numA }, () => null
)
)
);
let seenStateActions = [];
const DynaQModelUpdate = (state, a, r, stateNext) => {
let x = state[0];
let y = state[1];
let seen = false;
for (let n = 0; n < seenStateActions.length; n++) {
if (
seenStateActions[n][0] == x &&
seenStateActions[n][1] == y &&
seenStateActions[n][2] == a
) {
seen = true;
break;
}
}
if (!seen) {
seenStateActions.push([state, a]);
}
envModel[x][y][a] = [stateNext, r];
};
const DynaQGetModelStateAction = () => {
let i = mazeComp.getRandomInt(seenStateActions.length);
return seenStateActions[i];
};
const runDynaQEpisodeStep = (state) => {
let stateNext;
let a, r;
if (mazeComp.isTerminal(state)) {
runEpisode(); // run next episode (calls runDynaQEpisode)
} else {
stepTimer = setTimeout(() => {
a = mazeComp.getEpsilonGreedyAction(state, epsilon);
[stateNext, r] = mazeComp.step(state, a);
QLearningQTableUpdate(state, a, r, stateNext);
DynaQModelUpdate(state, a, r, stateNext);
state = [...stateNext];
// planning steps (model based Q table update steps)
for (let n = 0; n < planningSteps; n++) {
let mState;
let ma, mr;
let mStateNext;
[mState, ma] = DynaQGetModelStateAction();
[mStateNext, mr] = envModel[mState[0]][mState[1]][ma];
QLearningQTableUpdate(mState, ma, mr, mStateNext);
}
runDynaQEpisodeStep(state);
}, 0);
}
};
const runDynaQEpisode = () => {
let state = mazeComp.getRandomStartState();
runDynaQEpisodeStep(state);
};