Select Git revision
tables_outsourced.tex 14.82 KiB
%%%%%%%%%%%% MNIST %%%%%%%%%%%
\section{Full results: MNIST}
%
\begin{table}[h!]
%\setlength{\arrayrulewidth}{0.1mm}
%\renewcommand{\arraystretch}{1.}
%\setlength{\tabcolsep}{6pt}
\small
\centering
\begin{tabular}{c c | c | c | c}
\hline
\multicolumn{5}{c}{\textbf{MNIST}} \\
\hline
\multicolumn{2}{c}{} & \multicolumn{3}{|c}{\textbf{test acc./std. (\%) for $D_i$ after $T_i$}} \\
\textbf{task} & \textbf{test on} & \textbf{AR} & \textbf{DGR(c.)} & \textbf{DGR(b.)} \\
\hline\hline
\textit{base} & $T_{ALL}$ & $89.8$ $\pm 0.6$ & \multicolumn{2}{c}{$97.7$ $\pm 0.3$} \\
\hline\hline % 7-1A
& $T_{ALL}$ & 78.0 $\pm0.4$ & 74.9 $\pm2.5$ & 90.3 $\pm0.9$ \\
& $T_1$ & 79.9 $\pm1.7$ & 67.1 $\pm2.9$ & 90.7 $\pm1.7$ \\
\multirow{2}{*}{\textbf{D7-$1^3$a}} & $T_2$ & 64.7 $\pm8.1$ & 86.3 $\pm3.4$ & 82.3 $\pm2.7$ \\
& $T_3$ & 67.8 $\pm0.8$ & 93.9 $\pm2.1$ & 89.2 $\pm3.2$ \\
& $T_4$ & 80.2 $\pm5.9$ & 98.2 $\pm0.6$ & 97.3 $\pm0.4$ \\
\hline
% 7-1B
& $T_{ALL}$ & 83.1 $\pm0.4$ & 82.5 $\pm0.7$ & 94.9 $\pm0.3$ \\
& $T_1$ & 82.8 $\pm1.2$ & 75.2 $\pm0.9$ & 93.8 $\pm0.6$ \\
\multirow{2}{*}{\textbf{D7-$1^3$b}} & $T_2$ & 84.6 $\pm2.9$ & 97.9 $\pm0.7$ & 97.3 $\pm0.6$ \\
& $T_3$ & 94.2 $\pm4.1$ & 99.1 $\pm0.2$ & 98.1 $\pm0.4$ \\
& $T_4$ & 68.6 $\pm4.4$ & 98.2 $\pm0.7$ & 96.6 $\pm1.3$ \\
\hline\hline
% 6-1A
& $T_{ALL}$ & 76.1 $\pm1.7$ & 66.2 $\pm3.5$ & 88.3 $\pm1.2$ \\
& $T_1$ & 79.7 $\pm2.7$ & 49.4 $\pm6.2$ & 87.1 $\pm1.4$ \\
& $T_2$ & 83.5 $\pm4.1$ & 90.9 $\pm2.6$ & 91.4 $\pm2.9$ \\
\multirow{2}{*}{\textbf{D6-$1^4$a}} & $T_3$ & 62.8 $\pm5.9$ & 82.7 $\pm3.5$ & 82.6 $\pm1.6$ \\
& $T_4$ & 60.7 $\pm6.1$ & 93.8 $\pm0.6$ & 88.9 $\pm1.9$ \\
& $T_5$ & 78.5 $\pm1.7$ & 98.7 $\pm0.4$ & 97.5 $\pm0.4$ \\
\hline
% 6-1B
& $T_{ALL}$ & 79.9 $\pm0.8$ & 70.2 $\pm0.3$ & 90.7 $\pm0.6$ \\
& $T_1$ & 80.3 $\pm1.1$ & 51.2 $\pm0.8$ & 86.7 $\pm0.9$ \\
& $T_2$ & 81.9 $\pm1.6$ & 95.6 $\pm1.2$ & 95.3 $\pm1.9$ \\
\multirow{2}{*}{\textbf{D6-$1^4$b}} & $T_3$ & 96.6 $\pm2.1$ & 98.8 $\pm0.2$ & 97.9 $\pm0.7$ \\
& $T_4$ & 71.9 $\pm8.2$ & 93.9 $\pm2.9$ & 92.9 $\pm1.0$ \\
& $T_5$ & 72.5 $\pm2.9$ & 99.1 $\pm0.4$ & 98.7 $\pm0.4$ \\
\hline\hline
% 5-1A
& $T_{ALL}$ & 73.9 $\pm0.6$ & 62.5 $\pm2.2$ & 83.3 $\pm3.4$ \\
& $T_2$ & 75.2 $\pm3.2$ & 40.3 $\pm3.4$ & 77.8 $\pm6.1$ \\
& $T_3$ & 64.2 $\pm4.5$ & 60.2 $\pm2.0$ & 79.3 $\pm3.9$ \\
\multirow{2}{*}{\textbf{D5-$1^5$a}} & $T_4$ & 84.6 $\pm4.8$ & 90.6 $\pm1.6$ & 92.7 $\pm1.8$ \\
& $T_5$ & 64.8 $\pm3.2$ & 84.9 $\pm2.2$ & 83.7 $\pm1.7$ \\
& $T_6$ & 59.9 $\pm4.4$ & 93.9 $\pm1.3$ & 91.5 $\pm1.1$ \\
& $T_7$ & 81.4 $\pm1.5$ & 98.6 $\pm0.2$ & 97.9 $\pm0.5$ \\
\hline
% 5-1B
& $T_{ALL}$ & 75.5 $\pm0.4$ & 70.0 $\pm3.1$ & 89.7 $\pm0.5$ \\
& $T_2$ & 72.1 $\pm3.2$ & 42.9 $\pm5.3$ & 83.5 $\pm1.6$ \\
& $T_3$ & 82.9 $\pm7.5$ & 88.9 $\pm5.9$ & 93.6 $\pm0.9$ \\
\multirow{2}{*}{\textbf{D5-$1^5$b}} & $T_4$ & 93.2 $\pm4.3$ & 98.1 $\pm0.7$ & 96.9 $\pm0.5$ \\
& $T_5$ & 69.4 $\pm10.6$ & 93.1 $\pm0.3$ & 92.6 $\pm1.6$ \\
& $T_6$ & 69.3 $\pm2.4$ & 97.9 $\pm0.4$ & 96.7 $\pm0.7$ \\
& $T_7$ & 78.1 $\pm1.9$ & 99.5 $\pm0.1$ & 97.5 $\pm0.8$ \\
\end{tabular}
\caption{MNIST: Shows experimental results for AR and DGR (balanced and constant-time training scenario). Lists the baseline performance on all dataset classes $T_{ALL}$ for comparison, as well as the evaluation metrics for $T_i$, and $T_{ALL}$ after completion of all sub-tasks ($i=[4,5,6]$). Details about the CL-problems can be found in \cref{tab:slts}). All metrics are calculated after completion of each sub-task and represent the resulting values of the trained model tested on the evaluation data corresponding to the sub-task; metrics are averaged over all experimental runs.
\label{tab:expres_long_mnist}
}
\end{table}
%
\newpage
%%%%%%%%%%%% FMNIST %%%%%%%%%%%
\section{Full results: FashionMNIST}
%
\begin{table}[h!]
%\setlength{\arrayrulewidth}{0.1mm}
%\renewcommand{\arraystretch}{1.}
%\setlength{\tabcolsep}{6pt}
\small
\centering
\begin{tabular}{c c | c | c | c}
\hline
\multicolumn{5}{c}{\textbf{FashionMNIST}} \\
\hline
\multicolumn{2}{c}{} & \multicolumn{3}{|c}{\textbf{test acc./std. (\%) for $D_i$ after $T_i$}} \\
\textbf{task} & \textbf{test on} & \textbf{AR} & \textbf{DGR(c.)} & \textbf{DGR(b.)} \\
\hline\hline
\textit{base} & $T_{ALL}$ & $74.1$ $\pm 0.8$ & \multicolumn{2}{c}{$88.6$ $\pm 0.2$} \\
\hline\hline
% 7-1A
& $T_{ALL}$ & 70.3 $\pm1.2$ & 68.9 $\pm1.9$ & 79.9 $\pm1.2$ \\
& $T_1$ & 64.0 $\pm2.4$ & 59.9 $\pm2.9$ & 76.7 $\pm1.4$ \\
\multirow{2}{*}{\textbf{D7-$1^3$a}} & $T_2$ & 73.4 $\pm3.8$ & 72.9 $\pm5.4$ & 69.9 $\pm4.2$ \\
& $T_3$ & 88.7 $\pm1.2$ & 97.6 $\pm1.0$ & 94.7 $\pm0.4$ \\
& $T_4$ & 92.1 $\pm0.1$ & 99.2 $\pm0.5$ & 98.6 $\pm0.5$ \\
\hline
% 7-1B
& $T_{ALL}$ & 69.3 $\pm0.3$ & 65.8 $\pm0.7$ & 75.1 $\pm2.8$ \\
& $T_1$ & 66.0 $\pm2.4$ & 56.0 $\pm1.7$ & 73.1 $\pm4.4$ \\
\multirow{2}{*}{\textbf{D7-$1^3$b}} & $T_2$ & 73.1 $\pm1.2$ & 70.4 $\pm5.9$ & 48.6 $\pm1.9$ \\
& $T_3$ & 82.0 $\pm0.4$ & 97.1 $\pm0.3$ & 96.1 $\pm0.3$ \\
& $T_4$ & 76.2 $\pm5.5$ & 97.9 $\pm0.6$ & 94.9 $\pm4.7$ \\
\hline\hline
% 6-1A
& $T_{ALL}$ & 67.6 $\pm0.7$ & 65.7 $\pm1.7$ & 76.9 $\pm1.2$ \\
& $T_1$ & 64.5 $\pm2.8$ & 61.6 $\pm5.1$ & 82.3 $\pm0.4$ \\
& $T_2$ & 52.8 $\pm7.3$ & 23.1 $\pm20.8$ & 6.2 $\pm8.7$ \\
\multirow{2}{*}{\textbf{D6-$1^4$a}} & $T_3$ & 60.2 $\pm4.4$ & 67.7 $\pm11.5$ & 75.6 $\pm4.2$ \\
& $T_4$ & 79.3 $\pm4.1$ & 96.9 $\pm0.7$ & 94.1 $\pm0.5$ \\
& $T_5$ & 91.9 $\pm3.6$ & 99.6 $\pm0.3$ & 98.8 $\pm0.6$ \\
\hline
% 6-1B
& $T_{ALL}$ & 69.9 $\pm0.7$ & 59.2 $\pm1.0$ & 75.6 $\pm1.3$ \\
& $T_1$ & 66.1 $\pm4.1$ & 42.4 $\pm2.6$ & 73.5 $\pm1.4$ \\
& $T_2$ & 53.5 $\pm2.9$ & 57.9 $\pm8.5$ & 48.9 $\pm13.5$ \\
\multirow{2}{*}{\textbf{D6-$1^4$b}} & $T_3$ & 84.2 $\pm3.6$ & 92.8 $\pm1.9$ & 93.6 $\pm1.0$ \\
& $T_4$ & 78.8 $\pm12.4$ & 88.9 $\pm1.0$ & 75.9 $\pm4.9$ \\
& $T_5$ & 80.9 $\pm1.7$ & 98.0 $\pm1.2$ & 97.2 $\pm0.3$ \\
\hline\hline
% 5-1A
& $T_{ALL}$ & 68.6 $\pm0.7$ & 63.5 $\pm0.8$ & 77.3 $\pm1.4$ \\
& $T_1$ & 72.3 $\pm2.8$ & 53.1 $\pm4.3$ & 81.7 $\pm2.8$ \\
& $T_2$ & 41.9 $\pm6.9$ & 44.3 $\pm4.3$ & 74.3 $\pm4.5$ \\
\multirow{2}{*}{\textbf{D5-$1^5$a}} & $T_3$ & 44.1 $\pm8.6$ & 52.7 $\pm17.4$ & 17.3 $\pm13.3$ \\
& $T_4$ & 60.8 $\pm3.9$ & 76.0 $\pm7.3$ & 77.8 $\pm3.1$ \\
& $T_5$ & 84.3 $\pm3.5$ & 97.1 $\pm0.4$ & 96.1 $\pm0.6$ \\
& $T_6$ & 95.6 $\pm1.5$ & 99.1 $\pm0.7$ & 99.0 $\pm0.3$ \\
\hline
% 5-1B
& $T_{ALL}$ & 66.9 $\pm0.2$ & 55.7 $\pm5.5$ & 72.0 $\pm0.9$ \\
& $T_1$ & 73.4 $\pm1.6$ & 43.7 $\pm7.9$ & 73.5 $\pm2.4$ \\
& $T_2$ & 47.9 $\pm4.2$ & 43.4 $\pm11.8$ & 59.4 $\pm14.2$ \\
\multirow{2}{*}{\textbf{D5-$1^5$b}} & $T_3$ & 80.3 $\pm3.9$ & 93.6 $\pm1.0$ & 94.6 $\pm1.2$ \\
& $T_4$ & 19.3 $\pm2.9$ & 30.2 $\pm2.4$ & 31.4 $\pm12.6$ \\
& $T_5$ & 74.2 $\pm5.2$ & 72.6 $\pm1.8$ & 71.6 $\pm2.3$ \\
& $T_6$ & 84.5 $\pm2.1$ & 98.2 $\pm0.6$ & 95.5 $\pm4.6$ \\
\end{tabular}
\caption{FashionMNIST: Shows experimental results for AR and DGR (balanced and constant-time training scenario). Lists the baseline performance on all dataset classes $T_{ALL}$ for comparison, as well as the evaluation metrics for $T_i$, and $T_{ALL}$ after completion of all sub-tasks ($i=[4,5,6]$). Details about the CL-problems can be found in \cref{tab:slts}). All metrics are calculated after completion of each sub-task and represent the resulting values of the trained model tested on the evaluation data corresponding to the sub-task; metrics are averaged over all experimental runs.
\label{tab:expres_long_fmnist}
}
\end{table}
%
%%%%%%%%%%%% EMNIST %%%%%%%%%%%
\newpage
%%%%%%%%%%%% EMNIST %%%%%%%%%%%
\section{Full results: EMNIST}
%
\begin{table}[h!]
%\setlength{\arrayrulewidth}{0.1mm}
%\renewcommand{\arraystretch}{1.}
%\setlength{\tabcolsep}{6pt}
\small
\centering
\begin{tabular}{c c | c | c | c}
\hline
\multicolumn{5}{c}{\textbf{EMNIST}} \\
\hline
\multicolumn{2}{c}{} & \multicolumn{3}{|c}{\textbf{test acc./std. (\%) for $D_i$ after $T_i$}} \\
\textbf{task} & \textbf{test on} & \textbf{AR} & \textbf{DGR(c.)} & \textbf{DGR(b.)} \\
\hline\hline
\textit{base} & $T_{ALL}$ & $44.1$ $\pm 3.3$ & \multicolumn{2}{c}{$84.6$ $\pm 0.1$} \\
\hline
% 20-1A
& $T_{ALL}$ & 32.6 $\pm1.1$ & 12.7 $\pm1.0$ & 50.6 $\pm20.9$ \\
& $T_1$ & 61.9 $\pm3.6$ & 9.5 $\pm2.3$ & 70.1 $\pm5.9$ \\
& $T_2$ & 58.4 $\pm3.5$ & 57.3 $\pm7.1$ & 38.1 $\pm29.3$ \\
\multirow{2}{*}{\textbf{D20-$1^5$a}} & $T_3$ & 25.3 $\pm9.5$ & 65.3 $\pm6.9$ & 36.9 $\pm45.5$ \\
& $T_4$ & 73.6 $\pm1.7$ & 91.3 $\pm2.2$ & 87.5 $\pm1.6$ \\
& $T_5$ & 47.1 $\pm6.2$ & 94.1 $\pm1.0$ & 92.7 $\pm5.3$ \\
& $T_6$ & 37.8 $\pm2.9$ & 99.2 $\pm0.8$ & 94.9 $\pm0.9$ \\
\hline
% 20-1B
& $T_{ALL}$ & 31.2 $\pm0.5$ & 14.6 $\pm1.3$ & 36.3 $\pm1.3$ \\
& $T_1$ & 62.7 $\pm4.5$ & 13.9 $\pm1.9$ & 69.8 $\pm4.1$ \\
& $T_2$ & 33.7 $\pm7.5$ & 46.6 $\pm26.1$ & 15.5 $\pm26.9$ \\
\multirow{2}{*}{\textbf{D20-$1^5$b}} & $T_3$ & 31.5 $\pm0.8$ & 74.8 $\pm13.6$ & 24.8 $\pm33.9$ \\
& $T_4$ & 63.5 $\pm3.1$ & 92.2 $\pm2.1$ & 83.3 $\pm5.6$ \\
& $T_5$ & 59.5 $\pm6.2$ & 97.0 $\pm0.7$ & 92.9 $\pm1.3$ \\
& $T_6$ & 39.8 $\pm8.5$ & 99.4 $\pm0.3$ & 95.2 $\pm1.7$ \\
\end{tabular}
\caption{EMNIST: Shows experimental results for AR and DGR (balanced and constant-time training scenario). Lists the baseline performance on all dataset classes $T_{ALL}$ for comparison, as well as the evaluation metrics for $T_i$, and $T_{ALL}$ after completion of all sub-tasks ($i=6$). Details about the CL-problems can be found in \cref{tab:slts}). All metrics are calculated after completion of each sub-task and represent the resulting values of the trained model tested on the evaluation data corresponding to the sub-task; metrics are averaged over all experimental runs.
\label{tab:expres_long_EMNIST}
}
\end{table}
%