Source: View original notebook on GitHub
Category: Machine Learning / Learn ML

1. Data Generation for Supervised Learing(using Scipy(numpy,pandas,matplotlib))

Regression Data

import numpy as np

x = np.random.randn(300)

x.shape

Output:

(300,)

def hypothesis(x):
    return 6*x +5

y = hypothesis(x)

y.shape

Output:

(300,)

import matplotlib.pyplot as plt

plt.scatter(x,y)

Output

Output:

&lt;matplotlib.collections.PathCollection at 0xf94f510&gt;

def addnoise(y):
    return 2*np.random.randn(y.shape[0]) # JUST DOUBLING more more noise

plt.scatter(x,y+addnoise(y),color ='orange')
plt.plot(x,hypothesis(x))
plt.show()

Output

here is the regression data along line 6x+5

Generating Classification data

# lets generate multi-variate data this time
# with two features x1 and x2 


# Docstring:
# multivariate_normal(mean, cov[, size, check_valid, tol])

# Class 1 data , value being 0
mean1 = np.array([2,4])
cov1 = np.array([[0.5,0.4],
        [0.4,0.7]])
class1_arr = np.random.multivariate_normal(mean1,cov1,300)


# Class 2 data , value being 1
mean2 = np.array([-1,2])
cov2 = np.array([[1,0.4],
        [0.4,1]])
class2_arr = np.random.multivariate_normal(mean2,cov2,400)

x = np.vstack((class1_arr,class2_arr))

y  = np.ones(700)

y[:300] = 0

y.shape

Output:

(700,)

data  = np.column_stack((x,y))
np.random.shuffle(data)

data
# here is my classification data

Output:

array([[-1.20215667,  1.81619577,  1.        ],
       [-2.18227822,  1.0554847 ,  1.        ],
       [ 0.30946201,  2.3828793 ,  1.        ],
       ...,
       [-0.38716961,  2.23936831,  1.        ],
       [ 0.25692066,  2.31069276,  1.        ],
       [ 2.04968868,  4.48527163,  0.        ]])

import pandas as pd

df = pd.DataFrame(data,columns = ['x1','x2','class'])

df.head()

Output:

x1        x2  class
-1.202157  1.816196    1.0
-2.182278  1.055485    1.0
0.309462  2.382879    1.0
1.876904  4.091469    0.0
-0.772710  2.171740    1.0

ploting classification data

hardway

def colorplotting(x,y):
    for i in range(y.shape[0]):
        if y[i] == 1:
            plt.scatter(x[i,0],x[i,1],color = 'green')
        else:
            plt.scatter(x[i,0],x[i,1],color = 'red')

colorplotting(x,y)

Output

shortcut

plt.scatter(x[:,0],x[:,1], c= y )# giving y to color so that based on value colors are decided,works for catagorical data

Output

Output:

&lt;matplotlib.collections.PathCollection at 0x11e06b70&gt;

2. Making data using scikit learn

(2a)Regression Data(`make_regression`)

from sklearn.datasets import make_regression

# try running to see the documentation 
# make_regression?

XR,YR = make_regression(n_samples=400, n_features = 1,n_informative=1,n_targets=1,noise=25.1,random_state=21,shuffle=False)

XR.shape

Output:

(400, 1)

YR.shape

Output:

(400,)

import matplotlib.pyplot as plt

plt.scatter(XR,YR)
plt.show()

Output

(2b)Classification Data(`make_classification`)

from sklearn.datasets import make_classification # Generate a random n-class classification problem

XC,YC = make_classification(n_samples=400,n_features=2,n_classes=3,n_clusters_per_class=1,n_informative=2,
                            n_redundant=0
                           )

print(XC.shape)
print(YC.shape)

Output:

(400, 2)
(400,)

plt.scatter(XC[:,0],XC[:,1],c=YC)

Output

Output:

&lt;matplotlib.collections.PathCollection at 0x14146870&gt;

(2c)more Classification data(`make_blobs`)

from sklearn.datasets import make_blobs

XB,YB = make_blobs(n_samples=400,n_features=2,centers=5)

XB.shape

Output:

(400, 2)

YB.shape

Output:

(400,)

plt.scatter(XB[:,0], XB[:,1],c =YB)

Output

Output:

&lt;matplotlib.collections.PathCollection at 0x1410a910&gt;

(2d) more (`make_moons`)

from sklearn.datasets import make_moons

XM,YM = make_moons(n_samples=50)

XM.shape

Output:

(50, 2)

YM.shape

Output:

(50,)

plt.scatter(XM[:,0],XM[:,1],c=YM)
plt.show()

Output

3. subplots having all four above datasets

import matplotlib.pyplot as plt
import numpy as np

# way 1 - object oriented way
canvas, axes = plt.subplots(nrows=2,ncols=2,figsize=(15,10))

axes[0][0].scatter(XR,YR)
axes[0][0].set_title('Regression DATA')

axes[0][1].scatter(XC[:,0], XC[:,1],c=YC)
axes[0][1].set_title('Classification DATA')

axes[1][0].scatter(XB[:,0], XB[:,1],c=YB)
axes[1][0].set_title('Blobs DATA')

axes[1][1].scatter(XM[:,0], XM[:,1],c=YM)
axes[1][1].set_title('Moon DATA')

Output

Output:

Text(0.5, 1.0, 'Moon DATA')

# canvas # all subpots are stored in canvas

# way 2 - Functional WAY
fig = plt.figure(figsize=(15,10))

plt.subplot(221)
plt.scatter(XR,YR)
plt.title('Regression DATA')

plt.subplot(222)
plt.scatter(XC[:,0], XC[:,1],c=YC)
plt.title('Classification DATA')

plt.subplot(223)
plt.scatter(XB[:,0], XB[:,1],c=YB)
plt.title('Blobs DATA')

plt.subplot(224)
plt.scatter(XM[:,0], XM[:,1],c=YM)
plt.title('Moon DATA')

Output

Output:

Text(0.5, 1.0, 'Moon DATA')

# fig

# way3 (optional)
fig = plt.figure(figsize=(15,15))

plt.subplot2grid((10,10),(0,0),rowspan=4,colspan=4)
plt.scatter(XR,YR)
plt.title('Regression DATA')

plt.subplot2grid((10,10),(0,5),rowspan=4,colspan=4)
plt.scatter(XC[:,0], XC[:,1],c=YC)
plt.title('Classification DATA')

plt.subplot2grid((10,10),(5,0),rowspan=4,colspan=4)
plt.scatter(XB[:,0], XB[:,1],c=YB)
plt.title('Blobs DATA')

plt.subplot2grid((10,10),(5,5),rowspan=4,colspan=4)
plt.scatter(XM[:,0], XM[:,1],c=YM)
plt.title('Moon DATA')

Output

Output:

Text(0.5, 1.0, 'Moon DATA')

1. Data Generation for Supervised Learing(using Scipy(numpy,pandas,matplotlib))

Regression Data​

here is the regression data along line 6x+5​

Generating Classification data​

ploting classification data​

hardway​

shortcut​

2. Making data using scikit learn

(2a)Regression Data(make_regression)​

(2b)Classification Data(make_classification)​

(2c)more Classification data(make_blobs)

(2d) more (make_moons)

3. subplots having all four above datasets

Regression Data

here is the regression data along line 6x+5

Generating Classification data

ploting classification data

hardway

shortcut

(2a)Regression Data(`make_regression`)

(2b)Classification Data(`make_classification`)

(2c)more Classification data(`make_blobs`)

(2d) more (`make_moons`)