Source: View original notebook on GitHub
Category: Machine Learning / Learn ML
1. Data Generation for Supervised Learing(using Scipy(numpy,pandas,matplotlib))
Regression Data
import numpy as np
x = np.random.randn(300)
x.shape
Output:
(300,)
def hypothesis(x):
return 6*x +5
y = hypothesis(x)
y.shape
Output:
(300,)
import matplotlib.pyplot as plt
plt.scatter(x,y)
Output:
<matplotlib.collections.PathCollection at 0xf94f510>
def addnoise(y):
return 2*np.random.randn(y.shape[0]) # JUST DOUBLING more more noise
plt.scatter(x,y+addnoise(y),color ='orange')
plt.plot(x,hypothesis(x))
plt.show()
here is the regression data along line 6x+5
Generating Classification data
# lets generate multi-variate data this time
# with two features x1 and x2
# Docstring:
# multivariate_normal(mean, cov[, size, check_valid, tol])
# Class 1 data , value being 0
mean1 = np.array([2,4])
cov1 = np.array([[0.5,0.4],
[0.4,0.7]])
class1_arr = np.random.multivariate_normal(mean1,cov1,300)
# Class 2 data , value being 1
mean2 = np.array([-1,2])
cov2 = np.array([[1,0.4],
[0.4,1]])
class2_arr = np.random.multivariate_normal(mean2,cov2,400)
x = np.vstack((class1_arr,class2_arr))
y = np.ones(700)
y[:300] = 0
y.shape
Output:
(700,)
data = np.column_stack((x,y))
np.random.shuffle(data)
data
# here is my classification data
Output:
array([[-1.20215667, 1.81619577, 1. ],
[-2.18227822, 1.0554847 , 1. ],
[ 0.30946201, 2.3828793 , 1. ],
...,
[-0.38716961, 2.23936831, 1. ],
[ 0.25692066, 2.31069276, 1. ],
[ 2.04968868, 4.48527163, 0. ]])
import pandas as pd
df = pd.DataFrame(data,columns = ['x1','x2','class'])
df.head()
Output:
x1 x2 class
0 -1.202157 1.816196 1.0
1 -2.182278 1.055485 1.0
2 0.309462 2.382879 1.0
3 1.876904 4.091469 0.0
4 -0.772710 2.171740 1.0
ploting classification data
hardway
def colorplotting(x,y):
for i in range(y.shape[0]):
if y[i] == 1:
plt.scatter(x[i,0],x[i,1],color = 'green')
else:
plt.scatter(x[i,0],x[i,1],color = 'red')
colorplotting(x,y)
shortcut
plt.scatter(x[:,0],x[:,1], c= y )# giving y to color so that based on value colors are decided,works for catagorical data
Output:
<matplotlib.collections.PathCollection at 0x11e06b70>
2. Making data using scikit learn
(2a)Regression Data(make_regression)
from sklearn.datasets import make_regression
# try running to see the documentation
# make_regression?
XR,YR = make_regression(n_samples=400, n_features = 1,n_informative=1,n_targets=1,noise=25.1,random_state=21,shuffle=False)
XR.shape
Output:
(400, 1)
YR.shape
Output:
(400,)
import matplotlib.pyplot as plt
plt.scatter(XR,YR)
plt.show()
(2b)Classification Data(make_classification)
from sklearn.datasets import make_classification # Generate a random n-class classification problem
XC,YC = make_classification(n_samples=400,n_features=2,n_classes=3,n_clusters_per_class=1,n_informative=2,
n_redundant=0
)
print(XC.shape)
print(YC.shape)
Output:
(400, 2)
(400,)
plt.scatter(XC[:,0],XC[:,1],c=YC)
Output:
<matplotlib.collections.PathCollection at 0x14146870>
(2c)more Classification data(make_blobs)
from sklearn.datasets import make_blobs
XB,YB = make_blobs(n_samples=400,n_features=2,centers=5)
XB.shape
Output:
(400, 2)
YB.shape
Output:
(400,)
plt.scatter(XB[:,0], XB[:,1],c =YB)
Output:
<matplotlib.collections.PathCollection at 0x1410a910>
(2d) more (make_moons)
from sklearn.datasets import make_moons
XM,YM = make_moons(n_samples=50)
XM.shape
Output:
(50, 2)
YM.shape
Output:
(50,)
plt.scatter(XM[:,0],XM[:,1],c=YM)
plt.show()
3. subplots having all four above datasets
import matplotlib.pyplot as plt
import numpy as np
# way 1 - object oriented way
canvas, axes = plt.subplots(nrows=2,ncols=2,figsize=(15,10))
axes[0][0].scatter(XR,YR)
axes[0][0].set_title('Regression DATA')
axes[0][1].scatter(XC[:,0], XC[:,1],c=YC)
axes[0][1].set_title('Classification DATA')
axes[1][0].scatter(XB[:,0], XB[:,1],c=YB)
axes[1][0].set_title('Blobs DATA')
axes[1][1].scatter(XM[:,0], XM[:,1],c=YM)
axes[1][1].set_title('Moon DATA')
Output:
Text(0.5, 1.0, 'Moon DATA')
# canvas # all subpots are stored in canvas
# way 2 - Functional WAY
fig = plt.figure(figsize=(15,10))
plt.subplot(221)
plt.scatter(XR,YR)
plt.title('Regression DATA')
plt.subplot(222)
plt.scatter(XC[:,0], XC[:,1],c=YC)
plt.title('Classification DATA')
plt.subplot(223)
plt.scatter(XB[:,0], XB[:,1],c=YB)
plt.title('Blobs DATA')
plt.subplot(224)
plt.scatter(XM[:,0], XM[:,1],c=YM)
plt.title('Moon DATA')
Output:
Text(0.5, 1.0, 'Moon DATA')
# fig
# way3 (optional)
fig = plt.figure(figsize=(15,15))
plt.subplot2grid((10,10),(0,0),rowspan=4,colspan=4)
plt.scatter(XR,YR)
plt.title('Regression DATA')
plt.subplot2grid((10,10),(0,5),rowspan=4,colspan=4)
plt.scatter(XC[:,0], XC[:,1],c=YC)
plt.title('Classification DATA')
plt.subplot2grid((10,10),(5,0),rowspan=4,colspan=4)
plt.scatter(XB[:,0], XB[:,1],c=YB)
plt.title('Blobs DATA')
plt.subplot2grid((10,10),(5,5),rowspan=4,colspan=4)
plt.scatter(XM[:,0], XM[:,1],c=YM)
plt.title('Moon DATA')
Output:
Text(0.5, 1.0, 'Moon DATA')
