diff --git a/LogisticRegression.ipynb b/LogisticRegression.ipynb new file mode 100644 index 000000000..2d8c1ee4c --- /dev/null +++ b/LogisticRegression.ipynb @@ -0,0 +1,484 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# used for manipulating directory paths\n", + "import os\n", + "\n", + "# used for mathematical operations of elements\n", + "import math\n", + "\n", + "# Scientific and vector computation for python\n", + "import numpy as np\n", + "\n", + "# Plotting library\n", + "from matplotlib import pyplot\n", + "\n", + "# Optimization module in scipy\n", + "from scipy import optimize\n", + "\n", + "# library written for this exercise providing additional functions for assignment submission, and others\n", + "import utils\n", + "\n", + "# define the submission/grader object for this exercise\n", + "grader = utils.Grader()\n", + "\n", + "# tells matplotlib to embed plots within the notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load data\n", + "# The first two columns contains the exam scores and the third column\n", + "# contains the label.\n", + "data = np.loadtxt('ex2data1.txt', delimiter=',')\n", + "X, y = data[:, 0:2], data[:, 2]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def plotData(X, y):\n", + " \"\"\"\n", + " Plots the data points X and y into a new figure. Plots the data \n", + " points with * for the positive examples and o for the negative examples.\n", + " \n", + " Parameters\n", + " ----------\n", + " X : array_like\n", + " An Mx2 matrix representing the dataset. \n", + " \n", + " y : array_like\n", + " Label values for the dataset. A vector of size (M, ).\n", + " \n", + " Instructions\n", + " ------------\n", + " Plot the positive and negative examples on a 2D plot, using the\n", + " option 'k*' for the positive examples and 'ko' for the negative examples. \n", + " \"\"\"\n", + " # Create New Figure\n", + " fig = pyplot.figure()\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " # Find Indices of Positive and Negative Examples\n", + " pos = y == 1\n", + " neg = y == 0\n", + " \n", + " pyplot.plot(X[neg,0],X[neg,1],'ko', mfc='y', ms=8, mec='k', mew=1)\n", + "\n", + " pyplot.plot(X[pos,0],X[pos,1],'k*', lw=2, ms=10)\n", + "\n", + " \n", + " # ============================================================" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotData(X,y)\n", + "pyplot.xlabel('Exam 1 Score')\n", + "pyplot.ylabel('Exam 2 Score')\n", + "pyplot.legend(['Not Admitted','Admitted'])\n", + "pass" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def sigmoid(z):\n", + " \"\"\"\n", + " Compute sigmoid function given the input z.\n", + " \n", + " Parameters\n", + " ----------\n", + " z : array_like\n", + " The input to the sigmoid function. This can be a 1-D vector \n", + " or a 2-D matrix. \n", + " \n", + " Returns\n", + " -------\n", + " g : array_like\n", + " The computed sigmoid function. g has the same shape as z, since\n", + " the sigmoid is computed element-wise on z.\n", + " \n", + " Instructions\n", + " ------------\n", + " Compute the sigmoid of each value of z (z can be a matrix, vector or scalar).\n", + " \"\"\"\n", + " # convert input to a numpy array\n", + " z = np.array(z)\n", + " \n", + " # You need to return the following variables correctly \n", + " g = np.zeros(z.shape)\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " g = 1/(1+np.exp((-z)))\n", + " \n", + " # =============================================================\n", + " return g" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "g( [0, 100] ) = [0.5 1. ]\n" + ] + } + ], + "source": [ + "# Test the implementation of sigmoid function here\n", + "z = [0,100]\n", + "g = sigmoid(z)\n", + "\n", + "print('g(', z, ') = ', g)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup the data matrix appropriately, and add ones for the intercept term\n", + "m, n = X.shape\n", + "\n", + "# Add intercept term to X\n", + "X = np.concatenate([np.ones((m, 1)), X], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def costFunction(theta, X, y):\n", + " \"\"\"\n", + " Compute cost and gradient for logistic regression. \n", + " \n", + " Parameters\n", + " ----------\n", + " theta : array_like\n", + " The parameters for logistic regression. This a vector\n", + " of shape (n+1, ).\n", + " \n", + " X : array_like\n", + " The input dataset of shape (m x n+1) where m is the total number\n", + " of data points and n is the number of features. We assume the \n", + " intercept has already been added to the input.\n", + " \n", + " y : arra_like\n", + " Labels for the input. This is a vector of shape (m, ).\n", + " \n", + " Returns\n", + " -------\n", + " J : float\n", + " The computed value for the cost function. \n", + " \n", + " grad : array_like\n", + " A vector of shape (n+1, ) which is the gradient of the cost\n", + " function with respect to theta, at the current values of theta.\n", + " \n", + " Instructions\n", + " ------------\n", + " Compute the cost of a particular choice of theta. You should set J to \n", + " the cost. Compute the partial derivatives and set grad to the partial\n", + " derivatives of the cost w.r.t. each parameter in theta.\n", + " \"\"\"\n", + " # Initialize some useful values\n", + " m = y.size # number of training examples\n", + "\n", + " # You need to return the following variables correctly \n", + " J = 0\n", + " grad = np.zeros(theta.shape)\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " z=theta.dot(X.transpose())\n", + " h=sigmoid(z)\n", + " \n", + " for i in range(m):\n", + " J=J+((-1*(y[i]*math.log(h[i])+(1-y[i])*math.log(1-h[i])))/m)\n", + " \n", + " for i in range(theta.shape[0]):\n", + " grad[i]=((h-y).dot(X[:,i]))/m\n", + "\n", + " # =============================================================\n", + " return J, grad" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost at initial theta (zeros): 0.693\n", + "Expected cost (approx): 0.693\n", + "\n", + "Gradient at initial theta (zeros):\n", + "\t[-0.1000, -12.0092, -11.2628]\n", + "Expected gradients (approx):\n", + "\t[-0.1000, -12.0092, -11.2628]\n", + "\n", + "Cost at test theta: 0.218\n", + "Expected cost (approx): 0.218\n", + "\n", + "Gradient at test theta:\n", + "\t[0.043, 2.566, 2.647]\n", + "Expected gradients (approx):\n", + "\t[0.043, 2.566, 2.647]\n" + ] + } + ], + "source": [ + "# Initialize fitting parameters\n", + "initial_theta = np.zeros(n+1)\n", + "\n", + "cost, grad = costFunction(initial_theta, X, y)\n", + "\n", + "print('Cost at initial theta (zeros): {:.3f}'.format(cost))\n", + "print('Expected cost (approx): 0.693\\n')\n", + "\n", + "print('Gradient at initial theta (zeros):')\n", + "print('\\t[{:.4f}, {:.4f}, {:.4f}]'.format(*grad))\n", + "print('Expected gradients (approx):\\n\\t[-0.1000, -12.0092, -11.2628]\\n')\n", + "\n", + "# Compute and display cost and gradient with non-zero theta\n", + "test_theta = np.array([-24, 0.2, 0.2])\n", + "cost, grad = costFunction(test_theta, X, y)\n", + "\n", + "print('Cost at test theta: {:.3f}'.format(cost))\n", + "print('Expected cost (approx): 0.218\\n')\n", + "\n", + "print('Gradient at test theta:')\n", + "print('\\t[{:.3f}, {:.3f}, {:.3f}]'.format(*grad))\n", + "print('Expected gradients (approx):\\n\\t[0.043, 2.566, 2.647]')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost at theta found by optimize.minimize: 0.203\n", + "Expected cost (approx): 0.203\n", + "\n", + "theta:\n", + "\t[-25.161, 0.206, 0.201]\n", + "Expected theta (approx):\n", + "\t[-25.161, 0.206, 0.201]\n" + ] + } + ], + "source": [ + "# set options for optimize.minimize\n", + "options= {'maxiter': 400}\n", + "\n", + "# see documention for scipy's optimize.minimize for description about\n", + "# the different parameters\n", + "# The function returns an object `OptimizeResult`\n", + "# We use truncated Newton algorithm for optimization which is \n", + "# equivalent to MATLAB's fminunc\n", + "# See https://stackoverflow.com/questions/18801002/fminunc-alternate-in-numpy\n", + "res = optimize.minimize(costFunction,\n", + " initial_theta,\n", + " (X, y),\n", + " jac=True,\n", + " method='TNC',\n", + " options=options)\n", + "\n", + "# the fun property of `OptimizeResult` object returns\n", + "# the value of costFunction at optimized theta\n", + "cost = res.fun\n", + "\n", + "# the optimized theta is in the x property\n", + "theta = res.x\n", + "\n", + "# Print theta to screen\n", + "print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost))\n", + "print('Expected cost (approx): 0.203\\n');\n", + "\n", + "print('theta:')\n", + "print('\\t[{:.3f}, {:.3f}, {:.3f}]'.format(*theta))\n", + "print('Expected theta (approx):\\n\\t[-25.161, 0.206, 0.201]')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Plot Boundary\n", + "utils.plotDecisionBoundary(plotData, theta, X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def predict(theta, X):\n", + " \"\"\"\n", + " Predict whether the label is 0 or 1 using learned logistic regression.\n", + " Computes the predictions for X using a threshold at 0.5 \n", + " (i.e., if sigmoid(theta.T*x) >= 0.5, predict 1)\n", + " \n", + " Parameters\n", + " ----------\n", + " theta : array_like\n", + " Parameters for logistic regression. A vector of shape (n+1, ).\n", + " \n", + " X : array_like\n", + " The data to use for computing predictions. The rows is the number \n", + " of points to compute predictions, and columns is the number of\n", + " features.\n", + "\n", + " Returns\n", + " -------\n", + " p : array_like\n", + " Predictions and 0 or 1 for each row in X. \n", + " \n", + " Instructions\n", + " ------------\n", + " Complete the following code to make predictions using your learned \n", + " logistic regression parameters.You should set p to a vector of 0's and 1's \n", + " \"\"\"\n", + " m = X.shape[0] # Number of training examples\n", + "\n", + " # You need to return the following variables correctly\n", + " p = np.zeros(m)\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " for i in range(m):\n", + " if sigmoid(theta.dot(X.transpose()))[i]>=0.5 :\n", + " p[i]=1\n", + " else :\n", + " p[i]=0\n", + "\n", + " \n", + " # ============================================================\n", + " return p" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For a student with scores 45 and 85,we predict an admission probability of 0.776\n", + "Expected value: 0.775 +/- 0.002\n", + "\n", + "Train Accuracy: 89.00 %\n", + "Expected accuracy (approx): 89.00 %\n" + ] + } + ], + "source": [ + "# Predict probability for a student with score 45 on exam 1 \n", + "# and score 85 on exam 2 \n", + "prob = sigmoid(np.dot([1, 45, 85], theta))\n", + "print('For a student with scores 45 and 85,'\n", + " 'we predict an admission probability of {:.3f}'.format(prob))\n", + "print('Expected value: 0.775 +/- 0.002\\n')\n", + "\n", + "# Compute accuracy on our training set\n", + "p = predict(theta, X)\n", + "print('Train Accuracy: {:.2f} %'.format(np.mean(p == y) * 100))\n", + "print('Expected accuracy (approx): 89.00 %')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/ex2data1.txt b/ex2data1.txt new file mode 100644 index 000000000..3a5f95245 --- /dev/null +++ b/ex2data1.txt @@ -0,0 +1,100 @@ +34.62365962451697,78.0246928153624,0 +30.28671076822607,43.89499752400101,0 +35.84740876993872,72.90219802708364,0 +60.18259938620976,86.30855209546826,1 +79.0327360507101,75.3443764369103,1 +45.08327747668339,56.3163717815305,0 +61.10666453684766,96.51142588489624,1 +75.02474556738889,46.55401354116538,1 +76.09878670226257,87.42056971926803,1 +84.43281996120035,43.53339331072109,1 +95.86155507093572,38.22527805795094,0 +75.01365838958247,30.60326323428011,0 +82.30705337399482,76.48196330235604,1 +69.36458875970939,97.71869196188608,1 +39.53833914367223,76.03681085115882,0 +53.9710521485623,89.20735013750205,1 +69.07014406283025,52.74046973016765,1 +67.94685547711617,46.67857410673128,0 +70.66150955499435,92.92713789364831,1 +76.97878372747498,47.57596364975532,1 +67.37202754570876,42.83843832029179,0 +89.67677575072079,65.79936592745237,1 +50.534788289883,48.85581152764205,0 +34.21206097786789,44.20952859866288,0 +77.9240914545704,68.9723599933059,1 +62.27101367004632,69.95445795447587,1 +80.1901807509566,44.82162893218353,1 +93.114388797442,38.80067033713209,0 +61.83020602312595,50.25610789244621,0 +38.78580379679423,64.99568095539578,0 +61.379289447425,72.80788731317097,1 +85.40451939411645,57.05198397627122,1 +52.10797973193984,63.12762376881715,0 +52.04540476831827,69.43286012045222,1 +40.23689373545111,71.16774802184875,0 +54.63510555424817,52.21388588061123,0 +33.91550010906887,98.86943574220611,0 +64.17698887494485,80.90806058670817,1 +74.78925295941542,41.57341522824434,0 +34.1836400264419,75.2377203360134,0 +83.90239366249155,56.30804621605327,1 +51.54772026906181,46.85629026349976,0 +94.44336776917852,65.56892160559052,1 +82.36875375713919,40.61825515970618,0 +51.04775177128865,45.82270145776001,0 +62.22267576120188,52.06099194836679,0 +77.19303492601364,70.45820000180959,1 +97.77159928000232,86.7278223300282,1 +62.07306379667647,96.76882412413983,1 +91.56497449807442,88.69629254546599,1 +79.94481794066932,74.16311935043758,1 +99.2725269292572,60.99903099844988,1 +90.54671411399852,43.39060180650027,1 +34.52451385320009,60.39634245837173,0 +50.2864961189907,49.80453881323059,0 +49.58667721632031,59.80895099453265,0 +97.64563396007767,68.86157272420604,1 +32.57720016809309,95.59854761387875,0 +74.24869136721598,69.82457122657193,1 +71.79646205863379,78.45356224515052,1 +75.3956114656803,85.75993667331619,1 +35.28611281526193,47.02051394723416,0 +56.25381749711624,39.26147251058019,0 +30.05882244669796,49.59297386723685,0 +44.66826172480893,66.45008614558913,0 +66.56089447242954,41.09209807936973,0 +40.45755098375164,97.53518548909936,1 +49.07256321908844,51.88321182073966,0 +80.27957401466998,92.11606081344084,1 +66.74671856944039,60.99139402740988,1 +32.72283304060323,43.30717306430063,0 +64.0393204150601,78.03168802018232,1 +72.34649422579923,96.22759296761404,1 +60.45788573918959,73.09499809758037,1 +58.84095621726802,75.85844831279042,1 +99.82785779692128,72.36925193383885,1 +47.26426910848174,88.47586499559782,1 +50.45815980285988,75.80985952982456,1 +60.45555629271532,42.50840943572217,0 +82.22666157785568,42.71987853716458,0 +88.9138964166533,69.80378889835472,1 +94.83450672430196,45.69430680250754,1 +67.31925746917527,66.58935317747915,1 +57.23870631569862,59.51428198012956,1 +80.36675600171273,90.96014789746954,1 +68.46852178591112,85.59430710452014,1 +42.0754545384731,78.84478600148043,0 +75.47770200533905,90.42453899753964,1 +78.63542434898018,96.64742716885644,1 +52.34800398794107,60.76950525602592,0 +94.09433112516793,77.15910509073893,1 +90.44855097096364,87.50879176484702,1 +55.48216114069585,35.57070347228866,0 +74.49269241843041,84.84513684930135,1 +89.84580670720979,45.35828361091658,1 +83.48916274498238,48.38028579728175,1 +42.2617008099817,87.10385094025457,1 +99.31500880510394,68.77540947206617,1 +55.34001756003703,64.9319380069486,1 +74.77589300092767,89.52981289513276,1 diff --git a/ex2data2.txt b/ex2data2.txt new file mode 100644 index 000000000..a88899234 --- /dev/null +++ b/ex2data2.txt @@ -0,0 +1,118 @@ +0.051267,0.69956,1 +-0.092742,0.68494,1 +-0.21371,0.69225,1 +-0.375,0.50219,1 +-0.51325,0.46564,1 +-0.52477,0.2098,1 +-0.39804,0.034357,1 +-0.30588,-0.19225,1 +0.016705,-0.40424,1 +0.13191,-0.51389,1 +0.38537,-0.56506,1 +0.52938,-0.5212,1 +0.63882,-0.24342,1 +0.73675,-0.18494,1 +0.54666,0.48757,1 +0.322,0.5826,1 +0.16647,0.53874,1 +-0.046659,0.81652,1 +-0.17339,0.69956,1 +-0.47869,0.63377,1 +-0.60541,0.59722,1 +-0.62846,0.33406,1 +-0.59389,0.005117,1 +-0.42108,-0.27266,1 +-0.11578,-0.39693,1 +0.20104,-0.60161,1 +0.46601,-0.53582,1 +0.67339,-0.53582,1 +-0.13882,0.54605,1 +-0.29435,0.77997,1 +-0.26555,0.96272,1 +-0.16187,0.8019,1 +-0.17339,0.64839,1 +-0.28283,0.47295,1 +-0.36348,0.31213,1 +-0.30012,0.027047,1 +-0.23675,-0.21418,1 +-0.06394,-0.18494,1 +0.062788,-0.16301,1 +0.22984,-0.41155,1 +0.2932,-0.2288,1 +0.48329,-0.18494,1 +0.64459,-0.14108,1 +0.46025,0.012427,1 +0.6273,0.15863,1 +0.57546,0.26827,1 +0.72523,0.44371,1 +0.22408,0.52412,1 +0.44297,0.67032,1 +0.322,0.69225,1 +0.13767,0.57529,1 +-0.0063364,0.39985,1 +-0.092742,0.55336,1 +-0.20795,0.35599,1 +-0.20795,0.17325,1 +-0.43836,0.21711,1 +-0.21947,-0.016813,1 +-0.13882,-0.27266,1 +0.18376,0.93348,0 +0.22408,0.77997,0 +0.29896,0.61915,0 +0.50634,0.75804,0 +0.61578,0.7288,0 +0.60426,0.59722,0 +0.76555,0.50219,0 +0.92684,0.3633,0 +0.82316,0.27558,0 +0.96141,0.085526,0 +0.93836,0.012427,0 +0.86348,-0.082602,0 +0.89804,-0.20687,0 +0.85196,-0.36769,0 +0.82892,-0.5212,0 +0.79435,-0.55775,0 +0.59274,-0.7405,0 +0.51786,-0.5943,0 +0.46601,-0.41886,0 +0.35081,-0.57968,0 +0.28744,-0.76974,0 +0.085829,-0.75512,0 +0.14919,-0.57968,0 +-0.13306,-0.4481,0 +-0.40956,-0.41155,0 +-0.39228,-0.25804,0 +-0.74366,-0.25804,0 +-0.69758,0.041667,0 +-0.75518,0.2902,0 +-0.69758,0.68494,0 +-0.4038,0.70687,0 +-0.38076,0.91886,0 +-0.50749,0.90424,0 +-0.54781,0.70687,0 +0.10311,0.77997,0 +0.057028,0.91886,0 +-0.10426,0.99196,0 +-0.081221,1.1089,0 +0.28744,1.087,0 +0.39689,0.82383,0 +0.63882,0.88962,0 +0.82316,0.66301,0 +0.67339,0.64108,0 +1.0709,0.10015,0 +-0.046659,-0.57968,0 +-0.23675,-0.63816,0 +-0.15035,-0.36769,0 +-0.49021,-0.3019,0 +-0.46717,-0.13377,0 +-0.28859,-0.060673,0 +-0.61118,-0.067982,0 +-0.66302,-0.21418,0 +-0.59965,-0.41886,0 +-0.72638,-0.082602,0 +-0.83007,0.31213,0 +-0.72062,0.53874,0 +-0.59389,0.49488,0 +-0.48445,0.99927,0 +-0.0063364,0.99927,0 +0.63265,-0.030612,0 diff --git a/regularisedLogisticRegression.ipynb b/regularisedLogisticRegression.ipynb new file mode 100644 index 000000000..29006e75c --- /dev/null +++ b/regularisedLogisticRegression.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# used for manipulating directory paths\n", + "import os\n", + "\n", + "# used for mathematical operations of elements\n", + "import math\n", + "\n", + "# Scientific and vector computation for python\n", + "import numpy as np\n", + "\n", + "# Plotting library\n", + "from matplotlib import pyplot\n", + "\n", + "# Optimization module in scipy\n", + "from scipy import optimize\n", + "\n", + "# library written for this exercise providing additional functions for assignment submission, and others\n", + "import utils\n", + "\n", + "# define the submission/grader object for this exercise\n", + "grader = utils.Grader()\n", + "\n", + "# tells matplotlib to embed plots within the notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load Data\n", + "# The first two columns contains the X values and the third column\n", + "# contains the label (y).\n", + "data = np.loadtxt('ex2data2.txt', delimiter=',')\n", + "X = data[:, :2]\n", + "y = data[:, 2]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def sigmoid(z):\n", + " \"\"\"\n", + " Compute sigmoid function given the input z.\n", + " \n", + " Parameters\n", + " ----------\n", + " z : array_like\n", + " The input to the sigmoid function. This can be a 1-D vector \n", + " or a 2-D matrix. \n", + " \n", + " Returns\n", + " -------\n", + " g : array_like\n", + " The computed sigmoid function. g has the same shape as z, since\n", + " the sigmoid is computed element-wise on z.\n", + " \n", + " Instructions\n", + " ------------\n", + " Compute the sigmoid of each value of z (z can be a matrix, vector or scalar).\n", + " \"\"\"\n", + " # convert input to a numpy array\n", + " z = np.array(z)\n", + " \n", + " # You need to return the following variables correctly \n", + " g = np.zeros(z.shape)\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " g = 1/(1+np.exp((-z)))\n", + " \n", + " # =============================================================\n", + " return g" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def plotData(X, y):\n", + " \"\"\"\n", + " Plots the data points X and y into a new figure. Plots the data \n", + " points with * for the positive examples and o for the negative examples.\n", + " \n", + " Parameters\n", + " ----------\n", + " X : array_like\n", + " An Mx2 matrix representing the dataset. \n", + " \n", + " y : array_like\n", + " Label values for the dataset. A vector of size (M, ).\n", + " \n", + " Instructions\n", + " ------------\n", + " Plot the positive and negative examples on a 2D plot, using the\n", + " option 'k*' for the positive examples and 'ko' for the negative examples. \n", + " \"\"\"\n", + " # Create New Figure\n", + " fig = pyplot.figure()\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " # Find Indices of Positive and Negative Examples\n", + " pos = y == 1\n", + " neg = y == 0\n", + " \n", + " pyplot.plot(X[neg,0],X[neg,1],'ko',mfc='y', ms=8, mec='k', mew=1)\n", + "\n", + " pyplot.plot(X[pos,0],X[pos,1],'k*',lw=2, ms=10)\n", + "\n", + " \n", + " # ============================================================" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotData(X, y)\n", + "# Labels and Legend\n", + "pyplot.xlabel('Microchip Test 1')\n", + "pyplot.ylabel('Microchip Test 2')\n", + "\n", + "# Specified in plot order\n", + "pyplot.legend(['y = 1', 'y = 0'], loc='upper right')\n", + "pass" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Note that mapFeature also adds a column of ones for us, so the intercept\n", + "# term is handled\n", + "X = utils.mapFeature(X[:, 0], X[:, 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def costFunctionReg(theta, X, y, lambda_):\n", + " \"\"\"\n", + " Compute cost and gradient for logistic regression with regularization.\n", + " \n", + " Parameters\n", + " ----------\n", + " theta : array_like\n", + " Logistic regression parameters. A vector with shape (n, ). n is \n", + " the number of features including any intercept. If we have mapped\n", + " our initial features into polynomial features, then n is the total \n", + " number of polynomial features. \n", + " \n", + " X : array_like\n", + " The data set with shape (m x n). m is the number of examples, and\n", + " n is the number of features (after feature mapping).\n", + " \n", + " y : array_like\n", + " The data labels. A vector with shape (m, ).\n", + " \n", + " lambda_ : float\n", + " The regularization parameter. \n", + " \n", + " Returns\n", + " -------\n", + " J : float\n", + " The computed value for the regularized cost function. \n", + " \n", + " grad : array_like\n", + " A vector of shape (n, ) which is the gradient of the cost\n", + " function with respect to theta, at the current values of theta.\n", + " \n", + " Instructions\n", + " ------------\n", + " Compute the cost `J` of a particular choice of theta.\n", + " Compute the partial derivatives and set `grad` to the partial\n", + " derivatives of the cost w.r.t. each parameter in theta.\n", + " \"\"\"\n", + " # Initialize some useful values\n", + " m = y.size # number of training examples\n", + "\n", + " # You need to return the following variables correctly \n", + " J = 0\n", + " grad = np.zeros(theta.shape)\n", + "\n", + " # ===================== YOUR CODE HERE ======================\n", + " z=theta.dot(X.transpose())\n", + " h=sigmoid(z)\n", + " \n", + " for i in range(m):\n", + " J=J+((-1*(y[i]*math.log(h[i])+(1-y[i])*math.log(1-h[i])))/m)\n", + " \n", + " for i in range(1,theta.shape[0]):\n", + " J=J+(lambda_*theta[i]*theta[i])/(2*m)\n", + " \n", + " \n", + " for i in range(theta.shape[0]):\n", + " grad[i]=(((h-y).dot(X[:,i]))/m) \n", + " \n", + " for i in range(1,theta.shape[0]):\n", + " grad[i]=grad[i]+(lambda_*theta[i])/m\n", + " \n", + " # =============================================================\n", + " return J, grad" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost at initial theta (zeros): 0.693\n", + "Expected cost (approx) : 0.693\n", + "\n", + "Gradient at initial theta (zeros) - first five values only:\n", + "\t[0.0085, 0.0188, 0.0001, 0.0503, 0.0115]\n", + "Expected gradients (approx) - first five values only:\n", + "\t[0.0085, 0.0188, 0.0001, 0.0503, 0.0115]\n", + "\n", + "------------------------------\n", + "\n", + "Cost at test theta : 3.16\n", + "Expected cost (approx): 3.16\n", + "\n", + "Gradient at initial theta (zeros) - first five values only:\n", + "\t[0.3460, 0.1614, 0.1948, 0.2269, 0.0922]\n", + "Expected gradients (approx) - first five values only:\n", + "\t[0.3460, 0.1614, 0.1948, 0.2269, 0.0922]\n" + ] + } + ], + "source": [ + "# Initialize fitting parameters\n", + "initial_theta = np.zeros(X.shape[1])\n", + "\n", + "# Set regularization parameter lambda to 1\n", + "# DO NOT use `lambda` as a variable name in python\n", + "# because it is a python keyword\n", + "lambda_ = 1\n", + "\n", + "# Compute and display initial cost and gradient for regularized logistic\n", + "# regression\n", + "cost, grad = costFunctionReg(initial_theta, X, y, lambda_)\n", + "\n", + "print('Cost at initial theta (zeros): {:.3f}'.format(cost))\n", + "print('Expected cost (approx) : 0.693\\n')\n", + "\n", + "print('Gradient at initial theta (zeros) - first five values only:')\n", + "print('\\t[{:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}]'.format(*grad[:5]))\n", + "print('Expected gradients (approx) - first five values only:')\n", + "print('\\t[0.0085, 0.0188, 0.0001, 0.0503, 0.0115]\\n')\n", + "\n", + "\n", + "# Compute and display cost and gradient\n", + "# with all-ones theta and lambda = 10\n", + "test_theta = np.ones(X.shape[1])\n", + "cost, grad = costFunctionReg(test_theta, X, y, 10)\n", + "\n", + "print('------------------------------\\n')\n", + "print('Cost at test theta : {:.2f}'.format(cost))\n", + "print('Expected cost (approx): 3.16\\n')\n", + "\n", + "print('Gradient at initial theta (zeros) - first five values only:')\n", + "print('\\t[{:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}]'.format(*grad[:5]))\n", + "print('Expected gradients (approx) - first five values only:')\n", + "print('\\t[0.3460, 0.1614, 0.1948, 0.2269, 0.0922]')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def predict(theta, X):\n", + " \"\"\"\n", + " Predict whether the label is 0 or 1 using learned logistic regression.\n", + " Computes the predictions for X using a threshold at 0.5 \n", + " (i.e., if sigmoid(theta.T*x) >= 0.5, predict 1)\n", + " \n", + " Parameters\n", + " ----------\n", + " theta : array_like\n", + " Parameters for logistic regression. A vector of shape (n+1, ).\n", + " \n", + " X : array_like\n", + " The data to use for computing predictions. The rows is the number \n", + " of points to compute predictions, and columns is the number of\n", + " features.\n", + "\n", + " Returns\n", + " -------\n", + " p : array_like\n", + " Predictions and 0 or 1 for each row in X. \n", + " \n", + " Instructions\n", + " ------------\n", + " Complete the following code to make predictions using your learned \n", + " logistic regression parameters.You should set p to a vector of 0's and 1's \n", + " \"\"\"\n", + " m = X.shape[0] # Number of training examples\n", + "\n", + " # You need to return the following variables correctly\n", + " p = np.zeros(m)\n", + "\n", + " # ====================== YOUR CODE HERE ======================\n", + " for i in range(m):\n", + " if sigmoid(theta.dot(X.transpose()))[i]>=0.5 :\n", + " p[i]=1\n", + " else :\n", + " p[i]=0\n", + "\n", + " \n", + " # ============================================================\n", + " return p" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train Accuracy: 83.1 %\n", + "Expected accuracy (with lambda = 1): 83.1 % (approx)\n", + "\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Initialize fitting parameters\n", + "initial_theta = np.zeros(X.shape[1])\n", + "\n", + "# Set regularization parameter lambda to 1 (you should vary this)\n", + "lambda_ = 1\n", + "\n", + "# set options for optimize.minimize\n", + "options= {'maxiter': 100}\n", + "\n", + "res = optimize.minimize(costFunctionReg,\n", + " initial_theta,\n", + " (X, y, lambda_),\n", + " jac=True,\n", + " method='TNC',\n", + " options=options)\n", + "\n", + "# the fun property of OptimizeResult object returns\n", + "# the value of costFunction at optimized theta\n", + "cost = res.fun\n", + "\n", + "# the optimized theta is in the x property of the result\n", + "theta = res.x\n", + "\n", + "utils.plotDecisionBoundary(plotData, theta, X, y)\n", + "pyplot.xlabel('Microchip Test 1')\n", + "pyplot.ylabel('Microchip Test 2')\n", + "pyplot.legend(['y = 1', 'y = 0'])\n", + "pyplot.grid(False)\n", + "pyplot.title('lambda = %0.2f' % lambda_)\n", + "\n", + "# Compute accuracy on our training set\n", + "p = predict(theta, X)\n", + "\n", + "print('Train Accuracy: %.1f %%' % (np.mean(p == y) * 100))\n", + "print('Expected accuracy (with lambda = 1): 83.1 % (approx)\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}