\@writefile{lof}{\contentsline{figure}{\numberline{5}{\ignorespaces ARL during a sleep phase. Initially, the learner $\mathcal S$ (see text) is copied to a frozen long-term memory instance. The long-term memory selectively replays observations and pseudo-targets during sleep phase learning, while the short-term memory (both generator and readout layer) is updated on high-TD-error samples collected in previous wake phases. }}{6}{figure.5}\protected@file@percent }
\@writefile{lof}{\contentsline{figure}{\numberline{5}{\ignorespaces ARL during a sleep phase. Initially, the learner $\mathcal S$ (see text) is copied to a frozen long-term memory instance. The long-term memory selectively replays observations and pseudo-targets during sleep phase learning, while the short-term memory (both generator and readout layer) is updated on high-TD-error samples collected in previous wake phases. }}{6}{figure.5}\protected@file@percent }
\newlabel{fig:sleep}{{5}{6}{ARL during a sleep phase. Initially, the learner $\mathcal S$ (see text) is copied to a frozen long-term memory instance. The long-term memory selectively replays observations and pseudo-targets during sleep phase learning, while the short-term memory (both generator and readout layer) is updated on high-TD-error samples collected in previous wake phases}{figure.5}{}}
\newlabel{fig:sleep}{{5}{6}{ARL during a sleep phase. Initially, the learner $\mathcal S$ (see text) is copied to a frozen long-term memory instance. The long-term memory selectively replays observations and pseudo-targets during sleep phase learning, while the short-term memory (both generator and readout layer) is updated on high-TD-error samples collected in previous wake phases}{figure.5}{}}
\@writefile{lot}{\contentsline{table}{\numberline{1}{\ignorespaces Tabulated values of $P_{nm}$, averaged over three identical runs, for the DQN baselines as a function of replay buffer size. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right.}}{7}{table.1}\protected@file@percent }
\@writefile{lot}{\contentsline{table}{\numberline{1}{\ignorespaces Tabulated values of $P_{nm}$, averaged over three identical runs, for the DQN baselines as a function of replay buffer size. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right.}}{7}{table.1}\protected@file@percent }
\newlabel{tab:results}{{1}{7}{Tabulated values of $P_{nm}$, averaged over three identical runs, for the DQN baselines as a function of replay buffer size. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right}{table.1}{}}
\newlabel{tab:results}{{1}{7}{Tabulated values of $P_{nm}$, averaged over three identical runs, for the DQN baselines as a function of replay buffer size. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right}{table.1}{}}
\@writefile{lot}{\contentsline{table}{\numberline{2}{\ignorespaces High-level performance measures for all benchmarks and baselines.}}{7}{table.2}\protected@file@percent }
\@writefile{lot}{\contentsline{table}{\numberline{2}{\ignorespaces High-level performance measures for all benchmarks and baselines.}}{8}{table.2}\protected@file@percent }
\newlabel{tab:hl-perf}{{2}{7}{High-level performance measures for all benchmarks and baselines}{table.2}{}}
\newlabel{tab:hl-perf}{{2}{8}{High-level performance measures for all benchmarks and baselines}{table.2}{}}
\@writefile{lof}{\contentsline{figure}{\numberline{6}{\ignorespaces Left to right: GMM centroids visualized at the end of the babbling phase, task 1, task 2 and task 3. Task 4 is omitted since very few new centroids are learned. At each task, we observe the gradual embedding of new knowledge (blocks of new colors) into existing centroids. Best viewed in color and under magnification. }}{8}{figure.6}\protected@file@percent }
\@writefile{lof}{\contentsline{figure}{\numberline{6}{\ignorespaces Left to right: GMM centroids visualized at the end of the babbling phase, task 1, task 2 and task 3. Task 4 is omitted since very few new centroids are learned. At each task, we observe the gradual embedding of new knowledge (blocks of new colors) into existing centroids. Best viewed in color and under magnification. }}{8}{figure.6}\protected@file@percent }
\newlabel{fig:protos}{{6}{8}{Left to right: GMM centroids visualized at the end of the babbling phase, task 1, task 2 and task 3. Task 4 is omitted since very few new centroids are learned. At each task, we observe the gradual embedding of new knowledge (blocks of new colors) into existing centroids. Best viewed in color and under magnification}{figure.6}{}}
\newlabel{fig:protos}{{6}{8}{Left to right: GMM centroids visualized at the end of the babbling phase, task 1, task 2 and task 3. Task 4 is omitted since very few new centroids are learned. At each task, we observe the gradual embedding of new knowledge (blocks of new colors) into existing centroids. Best viewed in color and under magnification}{figure.6}{}}
\@writefile{lof}{\contentsline{figure}{\numberline{7}{\ignorespaces From left to right: the 60 highest TD-error samples collected during the first wake phase of PO tasks 1-4. }}{9}{figure.7}\protected@file@percent }
\@writefile{lof}{\contentsline{figure}{\numberline{7}{\ignorespaces From left to right: the 60 highest TD-error samples collected during the first wake phase of PO tasks 1-4. }}{9}{figure.7}\protected@file@percent }
\newlabel{fig:samples}{{7}{9}{From left to right: the 60 highest TD-error samples collected during the first wake phase of PO tasks 1-4}{figure.7}{}}
\newlabel{fig:samples}{{7}{9}{From left to right: the 60 highest TD-error samples collected during the first wake phase of PO tasks 1-4}{figure.7}{}}
\@writefile{lot}{\contentsline{table}{\numberline{3}{\ignorespaces ARL ablation study results. Tabulated values of $P_{nm}$ for the ARL as a function of the indicated parameters, see text. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right.}}{9}{table.3}\protected@file@percent }
\newlabel{tab:arl-results}{{3}{9}{ARL ablation study results. Tabulated values of $P_{nm}$ for the ARL as a function of the indicated parameters, see text. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right}{table.3}{}}
\@writefile{toc}{\contentsline{section}{\numberline{3}Conclusions, generality of results}{10}{section.3}\protected@file@percent }
\bibdata{bibs/merged.bib}
\bibdata{bibs/merged.bib}
\bibcite{Atkinson2018a}{{1}{2018{a}}{{Atkinson et~al.}}{{Atkinson, McCane, Szymanski, and Robins}}}
\bibcite{Atkinson2018a}{{1}{2018{a}}{{Atkinson et~al.}}{{Atkinson, McCane, Szymanski, and Robins}}}
\@writefile{lot}{\contentsline{table}{\numberline{3}{\ignorespaces ARL ablation study results. Tabulated values of $P_{nm}$ for the ARL as a function of the indicated parameters, see text. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right.}}{10}{table.3}\protected@file@percent }
\newlabel{tab:arl-results}{{3}{10}{ARL ablation study results. Tabulated values of $P_{nm}$ for the ARL as a function of the indicated parameters, see text. Shown is the performance, measured on task $m<n$ after training on task $n$. For following performance evolution for a given task (rows in boxes) over the course of a given experiment (boxes), move along a row from left to right}{table.3}{}}