{ "cells": [ { "cell_type": "code", "execution_count": 277, "metadata": {}, "outputs": [], "source": [ "# imports\n", "import numpy as np\n", "import scipy as sc\n", "import numpy.linalg as LA\n", "import matplotlib.pyplot as plt\n", "from sklearn.utils import shuffle\n", "\n", "# this allows plots to appear directly in the notebook\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generating Data: Two Classes are generated from two different Gaussian Clusters " ] }, { "cell_type": "code", "execution_count": 278, "metadata": {}, "outputs": [], "source": [ "# generating data\n", "d = 2 # dimension \n", "m = 100 # number of training samples \n", "c = 2.7 # parameter controlling seperation between the clusters\n", "mu1 = c*np.array([1,1])\n", "mu2 = -mu1 \n", "sigma_squared_1 = 1.5\n", "sigma_squared_2 = sigma_squared_1 \n", "\n", "X_train_1 = np.random.multivariate_normal(mu1,sigma_squared_1*np.identity(d),int(m/2)) \n", "X_train_2 = np.random.multivariate_normal(mu2,sigma_squared_2*np.identity(d),int(m/2)) \n", "X_train = np.concatenate((X_train_1,X_train_2),axis=0)\n", "\n", "Y_train_1 = np.ones((int(m/2),1))\n", "Y_train_2 = 0*np.ones((int(m/2),1))\n", "Y_train = np.concatenate((Y_train_1,Y_train_2),axis=0)" ] }, { "cell_type": "code", "execution_count": 279, "metadata": {}, "outputs": [], "source": [ "X_train = np.column_stack((np.ones((X_train.shape[0],1)),X_train)) # adding 1 as X0" ] }, { "cell_type": "code", "execution_count": 280, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 280, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(X_train_1[:,0],X_train_1[:,1], color='b',marker='o')\n", "plt.scatter(X_train_2[:,0],X_train_2[:,1], color='r',marker='o')\n", "plt.title('Two Gaussian clusters with different labels')\n", "plt.xlabel('x1')\n", "plt.ylabel('x2')" ] }, { "cell_type": "code", "execution_count": 281, "metadata": {}, "outputs": [], "source": [ "def sigmoid(x):\n", " return 1 / (1 + np.exp(-x))" ] }, { "cell_type": "code", "execution_count": 282, "metadata": {}, "outputs": [], "source": [ "def cross_entropy(y,y_est):\n", " return -y*np.log(y_est)-(1-y)*np.log(1-y_est)" ] }, { "cell_type": "code", "execution_count": 283, "metadata": {}, "outputs": [], "source": [ "def compute_cost(X,Y,theta):\n", " m = X.shape[0] \n", " loss= 0 \n", " for i in range(m):\n", " loss += cross_entropy(Y[i],sigmoid(np.dot(X[i,:],theta))) \n", " return loss " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gradient Descent " ] }, { "cell_type": "code", "execution_count": 284, "metadata": {}, "outputs": [], "source": [ "def gradient_step_log_reg(theta_current, X, Y, learning_rate):\n", "\n", " m = X.shape[0]\n", " \n", " grad = np.zeros_like(theta_current)\n", " \n", " for i in range(m):\n", " grad += (sigmoid(np.dot(X[i,:],theta_current))-Y[i])*X[i,:]\n", " \n", " \n", " theta_current += -learning_rate*grad \n", " return theta_current" ] }, { "cell_type": "code", "execution_count": 285, "metadata": {}, "outputs": [], "source": [ "# initializations\n", "theta = np.random.randn(3)\n", "learning_rate = 1e-5\n", "num_iterations = 6000" ] }, { "cell_type": "code", "execution_count": 286, "metadata": {}, "outputs": [], "source": [ "# shuffle the data set \n", "X_train, Y_train = shuffle(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 287, "metadata": {}, "outputs": [], "source": [ "# gradient descent\n", "cost_vec = []\n", "for t in range(num_iterations):\n", " theta = gradient_step_log_reg(theta, X_train, Y_train, learning_rate)\n", " if t%50==0:\n", " cost_vec.append(compute_cost(X_train, Y_train, theta))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot cost vs iteration" ] }, { "cell_type": "code", "execution_count": 288, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 288, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "iterations = np.linspace(0,num_iterations, num=len(cost_vec))\n", "plt.plot(iterations,cost_vec)\n", "plt.xlabel('iterations')\n", "plt.ylabel('cost')" ] }, { "cell_type": "code", "execution_count": 289, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-0.50290993, 1.00720295, 0.79526924])" ] }, "execution_count": 289, "metadata": {}, "output_type": "execute_result" } ], "source": [ "theta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualize the final seperating line " ] }, { "cell_type": "code", "execution_count": 290, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 290, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(X_train_1[:,0],X_train_1[:,1], color='b',marker='o')\n", "plt.scatter(X_train_2[:,0],X_train_2[:,1], color='r',marker='o')\n", "plt.title('Logistic regression line seperating the two classes')\n", "plt.plot([-4,4], (np.array([-4,4])*-theta[1]-theta[0])/theta[2],'r')\n", "plt.xlabel('x1')\n", "plt.ylabel('x2')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }