diff --git a/iclr2024_conference.pdf b/iclr2024_conference.pdf
index a6b5b9ca416e78b0bdb87f1923eda67e831a83ae..f29cb6d816da00098c56285a75f7aa9989909901 100644
Binary files a/iclr2024_conference.pdf and b/iclr2024_conference.pdf differ
diff --git a/iclr2024_conference.tex b/iclr2024_conference.tex
index 45e10e4f87e7a403a3c1e37390c48023e8a8f1e5..d839abd843bff366ccd2f893e26b42f20fff070a 100644
--- a/iclr2024_conference.tex
+++ b/iclr2024_conference.tex
@@ -5,6 +5,7 @@
 \documentclass{article} % For LaTeX2e
 
 \usepackage{iclr2024_conference,times}
+\usepackage{algorithm2e}
 
 % Optional math commands from https://github.com/goodfeli/dlbook_notation.
 \input{math_commands.tex}
@@ -186,7 +187,40 @@
 	A new sample will cause adaptation of the scholar in a localized region of data space. Variants generated by that sample will, due to similarity, cause adaptation in the same region. Knowledge in the overlap region will therefore be adapted to represent both, while dissimilar regions stay unaffected (see \cref{fig:var} for a visual impression).
 	
     None of these requirements are fulfilled by DNNs, which is why we implement the scholar by a \enquote{flat} GMM layer (generator/feature encoder) followed by a linear classifier (solver). Both are independently trained via SGD according to \cite{gepperth2021gradient}. Extensions to deep convolutional GMMs (DCGMMs) \cite{gepperth2021new} for higher sampling capacity can be incorporated as drop-in replacements for the generator.
-    
+    % ------
+    \begin{figure}[ht]
+    \centering
+    \begin{minipage}{.6\linewidth}
+    \begin{algorithm}[H]
+    	\small
+    	\SetAlgoLined
+    	\caption{Adiabatic Replay}\label{alg:two}
+    	\KwData{AR scholar $\Phi$, real data $\mathcal{D}_{R}$}
+    	\For{$t \in 2...T$}{ % from T2...TN
+    		\For{$\mathcal{B}_{N} \sim \mathcal{D}_{R_t}$}{ % iterate over merged batches
+    			\tcp{Query scholar $\Phi_{t-1}$.}
+    			$\sigma_{\mathcal{B}_{N}} \gets Forward(\Phi_{t-1}, \mathcal{B}_{N})$\;
+    			% forward call on DCGMM with batch xs, returns logits from top layer
+    			\tcp{Sample from the GMM of $\Phi_{t-1}$.}
+    			$\mathcal{B}_{G} \gets SampleOp(\Phi_{t-1}, \sigma_{\mathcal{B}_{N}})$\;
+    			% perform a sampling op. from the probability density described by the GMM we traverse the network layers in a backwards direction, returns a batch of samples based on the prototype responses from the forward call on xs
+    			\tcp{Add generated samples to $\mathcal{D}_{G}$.}
+    			$\mathcal{D}_{G} \gets UpdateData(\mathcal{B}_{G})$
+    		}
+    		\For{$\mathcal{B}_{M} \sim (\mathcal{D}_{R_t} \cup \mathcal{D}_{G_t})$}{
+    			\tcp{Update selected prototype of $\Phi_{t}$.}
+    			% Update best-matching GMM components
+    			$\Phi_{t}(\pi_{k}, \mu_{k}, \Sigma_{k}) \gets SGD(\mathcal{B}_{M})$\;
+    			\tcp{Update AR solver.}
+    			% Classifier with merged batch
+    			$\Theta \gets SGD(\Phi_{t}, \mathcal{B}_{M})$\;
+    		}
+    	}
+    	%TODO \caption{do we need a caption here?}
+    \end{algorithm}
+    \end{minipage}
+    \end{figure}
+    % ------------
 	% GMMs
 	\par\noindent\textbf{Selective updating}
 	is an intrinsic property of GMMs. They describe data distributions by a set of $K$ \textit{components}, consisting of component weights $\pi_k$, centroids $\vmu_k$ and covariance matrices $\mSigma_k$. A data sample $\vx$ is assigned a probability $p(\vx) = \sum_k \pi_k \mathcal N(\vx ; \vmu_k, \mSigma_k)$ as a weighted sum of normal distributions $\mathcal N(\vx; \vmu_k, \mSigma_k)$. Training of GMMs is performed as detailed in \cite{gepperth2021gradient} by adapting centroids, covariance matrices and component weights through the SGD-based minimization of the negative log-likelihood $\mathcal L =\sum_n \log \sum_k \pi_k \mathcal N(\vx_n; \vmu_k,\mSigma_k)$.