diff --git a/ex5.ipynb b/ex5.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..da82da42f62b48e6feac680b98695f46d9412df4 --- /dev/null +++ b/ex5.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## IPython notebook for exercise sheet 5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import all Python libraries we will need" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy import sparse\n", + "import scipy.sparse.linalg\n", + "from IPython.display import set_matplotlib_formats, display, Math\n", + "set_matplotlib_formats('svg')\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gradient descent with Armijo rule" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def GradientDescent(x0, E, DE, beta, sigma, maxIter, stopEpsilon, inverseSPMatrix=None):\n", + "\n", + " def ArmijoRule(x, initialTau, beta, sigma, energy, descentDir, gradientAtX, E):\n", + "\n", + " tangentSlope = np.dot(descentDir.ravel(), gradientAtX.ravel())\n", + "\n", + " def ArmijoCondition(tau):\n", + " secantSlope = (E(x + tau * descentDir) - energy)/tau\n", + " cond = (secantSlope / tangentSlope) >= sigma\n", + " return cond\n", + "\n", + " tauMin = 0.000001\n", + " tauMax = 100000\n", + " tau = max(min(initialTau, tauMax), tauMin)\n", + "\n", + " condition = ArmijoCondition(tau)\n", + " if condition:\n", + " while (condition and (tau < tauMax)):\n", + " tau = tau / beta\n", + " condition = ArmijoCondition(tau)\n", + "\n", + " tau = beta * tau\n", + " else:\n", + " while ((not condition) and (tau > tauMin)):\n", + " tau = beta * tau\n", + " condition = ArmijoCondition(tau)\n", + "\n", + " return tau\n", + "\n", + " x = x0\n", + " energyNew = E(x)\n", + " tau = 1\n", + "\n", + " print('Initial energy {:.6f}'.format(energyNew))\n", + " for i in range(maxIter):\n", + " energy = energyNew\n", + " gradient = DE(x)\n", + " descentDir = - gradient\n", + " if (inverseSPMatrix is not None):\n", + " descentDir = inverseSPMatrix * descentDir\n", + "\n", + " tau = ArmijoRule(x, tau, beta, sigma, energy, descentDir, gradient, E)\n", + " x = x + tau * descentDir\n", + " energyNew = E(x)\n", + "\n", + " print('{} steps, tau: {:.6f} energy {:.6f}'.format(i+1, tau, energyNew))\n", + "\n", + " if ((energy - energyNew) < stopEpsilon):\n", + " break\n", + "\n", + " return x\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$J_a[x]=\\sum_{i=1}^n(x_i-f_i)^2+\\lambda\\sum_{i=1}^{n-1}\\frac1{h^2}(x_{i+1}-x_i)^2$$" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def Ja(x, f, h, lambda_):\n", + " energy = np.sum(np.square(x - f)) + lambda_ / h**2 * np.sum(np.square(x[1:]-x[0:-1]))\n", + " return energy\n", + "\n", + "\n", + "def DJa(x, f, h, lambda_):\n", + " grad = 2 * (x - f)\n", + " grad[0:-1] = grad[0:-1] - 2 * lambda_ / h**2 * (x[1:]-x[0:-1])\n", + " grad[1:] = grad[1:] - 2 * lambda_ / h**2 * (x[0:-1] - x[1:])\n", + " return grad\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$J_b[x]=\\sum_{i=1}^n(x_i-f_i)^2+\\lambda\\sum_{i=1}^{n-1}\\frac1{h}\\left\\vert x_{i+1}-x_i\\right\\vert_\\epsilon.$$" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def Jb(x, f, h, lambda_, epsilon):\n", + " energy = np.sum(np.square(x - f)) + lambda_ / h * np.sum(np.sqrt(np.square(x[1:] - x[0:-1]) + epsilon**2))\n", + " return energy\n", + "\n", + "\n", + "def DJb(x, f, h, lambda_, epsilon):\n", + " grad = 2 * (x - f)\n", + " grad[0:-1] = grad[0:-1] - lambda_ / h * np.divide(x[1:] - x[0:-1], np.sqrt(np.square(x[1:] - x[0:-1]) + epsilon**2))\n", + " grad[1:] = grad[1:] - lambda_ / h * np.divide(x[0:-1] - x[1:], np.sqrt(np.square(x[0:-1] - x[1:]) + epsilon**2))\n", + " return grad\n" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a noisy input signal" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "N = 200\n", + "coords = np.linspace(0, 2*np.pi, N)\n", + "f0 = np.sin(coords)\n", + "np.random.seed(42)\n", + "f = f0 + 0.2 * (np.random.rand(coords.size) - 0.5)\n", + "x0 = f\n", + "plt.plot(coords, f, label='f')\n", + "plt.plot(coords, f0, label='f0')\n", + "plt.legend()\n", + "plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set parameters" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "lambda_ = 0.005\n", + "sigma = 0.5\n", + "beta = 0.5\n", + "h = 1/(N - 1)\n", + "epsilon = .001" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Minimize $J_a$ using gradient descent." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def Ea(x): return Ja(x, f, h, lambda_)\n", + "def DEa(x): return DJa(x, f, h, lambda_)\n", + "xa = GradientDescent(x0, Ea, DEa, beta, sigma, 1000, 0.001)\n", + "plt.plot(coords, xa, label='xa')\n", + "plt.legend()\n", + "plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Minimize $J_b$ using gradient descent." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def Eb(x): return Jb(x, f, h, lambda_, epsilon)\n", + "def DEb(x): return DJb(x, f, h, lambda_, epsilon)\n", + "xb = GradientDescent(x0, Eb, DEb, beta, sigma, 1000, 0.001)\n", + "plt.plot(coords, xb, label='xb')\n", + "plt.legend()\n", + "plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Minimize $J_a$ the system of linear equations" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "L = 1/h * sparse.csr_matrix(sparse.diags([-1, 1], [0, 1], shape=(N, N)))\n", + "L[N-1, N-1] = 0\n", + "identityMatrix = sparse.eye(N)\n", + "A = identityMatrix + lambda_*L.transpose()*L\n", + "# For such a low number of unknowns, computing the inverse is fine.\n", + "invA = scipy.sparse.linalg.inv(sparse.csc_matrix(A))\n", + "xaLE = invA*x0\n", + "plt.plot(coords, xaLE, label='xaLE')\n", + "plt.legend()\n", + "plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Minimize $J_a$ using gradient flow." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "# Use a small sigma here to get the optimal tau in this case.\n", + "xaG = GradientDescent(x0, Ea, DEa, beta, 0.1, 1000, 0.001, invA)\n", + "plt.plot(coords, xaG, label='xaG')\n", + "plt.legend()\n", + "plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot all results in one figure." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "plt.plot(coords, xa, label='xa')\n", + "plt.plot(coords, xaLE, label='xaLE')\n", + "plt.plot(coords, xaG, label='xaG')\n", + "plt.plot(coords, xb, label='xb')\n", + "plt.plot(coords, f, label='f')\n", + "plt.plot(coords, f0, label='f0')\n", + "plt.legend()\n", + "plt.show()\n" + ], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file