import pandas as pd
from sklearn.datasets import load_iris
colnames = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
iris = load_iris()
x = iris.data # take independent variables in x
y = iris.target # take target variable in y
x = pd.DataFrame(x, columns=colnames) # make a dataframe of independent variables
y = pd.Series(y, name='class') # make a pandas series of target variable
iris_data = pd.concat([x, y], axis=1) # concatenate both x & y to get complete dataset
Let's see data's first 5 rows
iris_data.head()
Let's see the shape of data
iris_data.shape
from sklearn.model_selection import train_test_split
train_test_split shuffles the rows befor splitting data.
Splitting data into train & test dataset in the proportion of 70 : 30 respectively.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=9)
Printing shape of training data below
print('shapes of training data:')
print('shape of x_train : ', x_train.shape)
print('shape of y_train : ', len(y_train))
Printing shape of test data below
print('shapes of test data:')
print('shape of x_test : ', x_test.shape)
print('shape of y_test : ', len(y_test))