Skip to main content

Linear Regression

· 6 min read
Shaurya Singhal

Source: View original notebook on GitHub

Category: Machine Learning / Learn ML

Linear Regression

Objective of supervised Learning

- we want to generate a function or hypothesis from training data using some algorithm and then from the function we can predict the output for our test data.

-

Objective of linear Regression

- let the function be a type of linear function i.e for vector x(x1,x2,x3,.....,xn) we have function as 
- f(x) = a(x1) + b(x2)+ c(x3)...............+ alpha(xn) + constant

- now we have to chose (a,b,c,d,.......,alpha) in such a way we predict as closely as possible to real outcome
- for that we defined our error/cost function as in figure and our goal is to minimize it : -
- here let data be univariate
- cost function is convex function so local minima is global minima.

Method 1 (minimizing error function equating differentiation to 0 and finding slope and intercept by solving two equations)

-  using formulas done by the calculating  slope and intercept value 
- for slope and intercept we minimize the error function using differentiation
    - slope = cov(x,y)/cov(x,x)
- intercept = E(y) - slope*E(x)
cov(x,y) = E(xy) - E(x)E(y)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
X = pd.read_csv('Datasets/linearX.csv')
Y = pd.read_csv('Datasets/linearY.csv')
X = X.values
Y = Y.values
X.shape

Output:

(99, 1)
Y.shape

Output:

(99, 1)

Visualization and Normalization/Standarization

%matplotlib inline
plt.style.use('seaborn') #just changing graph style
# normalization/standarization of Data 
X = (X-np.mean(X))/np.std(X)
plt.scatter(X,Y,marker='*',s=100)

Output

Output:

<matplotlib.collections.PathCollection at 0x11b8fef0>
# using method 1

# theta1
theta1_num = np.mean(X*Y) - np.mean(X)*np.mean(Y)
theta1_den = np.mean(X*X) - np.mean(X)*np.mean(X)
theta1 = theta1_num / theta1_den

# theta0
theta0 = np.mean(Y) - theta1*np.mean(X)

theta1,theta0

Output:

(0.0013579397686593838, 0.9966341414141414)
plt.scatter(X,Y,c='blue',marker='*',s=100)
plt.plot(X,theta0+theta1*X, 'r',lw=2)

Output

Output:

[<matplotlib.lines.Line2D at 0x11cef3f0>]
from sklearn.metrics import r2_score
r2_score(Y,theta0 + theta1*X)

Output:

0.43818504557919435

Method 2 (gradient descent) (Important)

what is Gradient Descent?
  • an algorithm to iteratively finding out the minimum value of a convex function by updating parameters in such a way that we move toward the minimum point .

  • Gradient descent is a first-order iterative optimization algorithm for finding the minimum of a function. To find a local minimum of a function using gradient descent, one takes steps proportional to the negative of the gradient(slope) (or approximate gradient) of the function at the current point.

X.shape

Output:

(99, 1)
Y.shape

Output:

(99, 1)

steps of gradient descent

- start from random point (theta0,theta1)
- update thetas using gradient
- move toward minimum till convergence

convergence criteria

  • fixed number of iteration
  • change in error < small number(0.001) --> good criteria than above

# lets do for univariate data 

# my cost function or error is (1/2)*(sigma [(hx-yi)**2] )

# see i didn't use m here(or anywhere) since that was just to make calculation simple ,i dont want that!!! :)
def hypothesis(X,theta):
return theta[0] + theta[1]*X
def gradient(X,Y,theta):

grad = np.zeros(2)
hx = hypothesis(X,theta)

grad[0] = np.sum(hx-Y)
grad[1] = np.sum((hx-Y)*X)

return grad


def error(X,Y,theta):
hx = hypothesis(X,theta)
return (1/2)*(np.sum((hx-Y)**2))
def gradientDescent(X,Y,learning_rate=0.0001):

# starting- point ()
theta = np.zeros(2)

# using fixed number of iteration
itr = 0
max_itr = 1000

error_list = []
while itr<max_itr:
grad = gradient(X,Y,theta)
error_list.append(error(X,Y,theta))
theta[0] = theta[0] - learning_rate * grad[0] # grad 0 is d(cost) / d(theta 0)
theta[1] = theta[1] - learning_rate * grad[1] # grad 1 is d(cost) / d(theta 1)

itr += 1

return theta,error_list
theta_ans,error_list = gradientDescent(X,Y)
theta_ans

Output:

array([0.99658654, 0.00135787])
# generating testing data
X_test = np.linspace(-2,5,50)
plt.scatter(X,Y, marker = '*',s=100)
plt.plot(X_test, hypothesis(X_test, theta_ans),'r',lw=3)
plt.show()

Output

# plotting error function vs iteration
plt.plot(error_list)
plt.xlim(0,400)
plt.xlabel('NUMBER OF ITERATION ')
plt.ylabel('ERROR')

Output

Output:

Text(0, 0.5, 'ERROR')
def gradientDescent_2(X,Y,learning_rate=0.0001):

# starting- point ()
theta = np.array([-3.9,3.9])

# using change in error being very low
error_list = []
error_list.append(error(X,Y,theta))

# theta_list for plotting trajectory
theta_list=[]
while True:
grad = gradient(X,Y,theta)
theta_list.append((theta[0],theta[1]))
theta[0] = theta[0] - learning_rate * grad[0] # grad 0 is d(cost) / d(theta 0)
theta[1] = theta[1] - learning_rate * grad[1] # grad 1 is d(cost) / d(theta 1)
error_temp = error(X,Y,theta)
change = abs(error_list[-1] - error_temp)
if change < 0.00000000000001:
break
error_list.append(error_temp)


return theta,error_list,theta_list
min_theta, error_list,theta_list = gradientDescent_2(X,Y)
min_theta

Output:

array([0.99663406, 0.001358  ])
plt.scatter(X,Y, marker = '*',s=100)
plt.plot(X_test, hypothesis(X_test, min_theta),'r',lw=3)
plt.show()

Output

plt.plot(error_list)
# plt.xlim(0,len(error_list))

plt.xlabel('NUMBER OF ITERATION ')
plt.ylabel('ERROR')

Output

Output:

Text(0, 0.5, 'ERROR')

Visualizing the error function in 3D space and the trajectory of theta vector

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# importing data

import numpy as np
import pandas as pd

X = pd.read_csv('Datasets/linearX.csv')
Y = pd.read_csv('Datasets/linearY.csv')
X = X.values
Y = Y.values
X = (X-np.mean(X))/np.std(X)


# creating data for Theta0 and theta1 and computing cost function
T0 = np.linspace(-4,4,X.shape[0])
T1 = np.linspace(-4,4,X.shape[0])

J = np.array([[0 for j in range(X.shape[0])]for i in range(X.shape[0])])
for i in range(X.shape[0]):
for j in range(X.shape[0]):
J[i][j] = np.sum((T0[i] + T1[j]*X - Y)**2)
T0,T1 = np.meshgrid(T0,T1)
# creating 3d figure and axes
fig = plt.figure()
ax = fig.gca(projection='3d')

ax.plot_surface(T0,T1,J,cmap = 'coolwarm',alpha=0.7)

theta_list = np.array(theta_list)

# trajectory in black
ax.scatter(theta_list[:,0], theta_list[:,1], error_list,marker='>', c='k')

Output

Output:

&lt;mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x18c9270&gt;

Creating plot, 3D/2D contour,surface, wireframe

fig = plt.figure(figsize=(15,10))

ax = plt.subplot(221,projection='3d')
# trajectory in black
ax.scatter(theta_list[:,0], theta_list[:,1], error_list,marker='>', c='k')
ax = ax.plot_surface(T0,T1,J,cmap = 'coolwarm')


ax1 = plt.subplot(222,projection='3d')
# trajectory in black
ax1.scatter(theta_list[:,0], theta_list[:,1], error_list,marker='>', c='k')
ax1 = ax1.plot_wireframe(T0,T1,J)

ax2 = plt.subplot(223,projection='3d')
# trajectory in black
ax2.scatter(theta_list[:,0], theta_list[:,1], error_list,marker='>', c='k')
ax2 = ax2.contour(T0,T1,J,cmap = 'rainbow')

ax3 = plt.subplot(224)
# trajectory in black
ax3.scatter(theta_list[:,0], theta_list[:,1], error_list,marker='^', c='k')
ax3 = ax3.contour(T0,T1,J,cmap = 'rainbow')

Output

Gradient Descent algo in sklearn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
X = pd.read_csv('Datasets/linearX.csv')
Y = pd.read_csv('Datasets/linearY.csv')
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)
lr_model = LinearRegression(normalize=True)
lr_model.fit(X_train,Y_train)

Output:

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
slope = lr_model.coef_
intercept = lr_model.intercept_
slope,intercept

Output:

(array([[0.00078001]]), array([0.99038226]))
model.score(X_test, Y_test) # figuring out how good is the model

Output:

0.4855894020818986

or can find out using r2_score() of sklearn.metrics

from sklearn.metrics import r2_score
r2_score(Y_test, lr_model.predict(X_test))

Output:

0.4886661642049145
plt.scatter(X,Y, marker = '*',s=100)
plt.plot(X, slope*X+intercept,'r',lw=3 )
plt.show()

Output