PCA2-1
PCA2-1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('/content/car_evaluation.csv', header=None)
df.shape
(1728, 7)
df.head()
0 1 2 3 4 5 6
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 buying 1728 non-null object
1 maint 1728 non-null object
2 doors 1728 non-null object
3 persons 1728 non-null object
4 lug_boot 1728 non-null object
5 safety 1728 non-null object
6 class 1728 non-null object
dtypes: object(7)
memory usage: 94.6+ KB
buying
vhigh 432
high 432
med 432
low 432
Name: count, dtype: int64
maint
vhigh 432
high 432
med 432
low 432
Name: count, dtype: int64
doors
2 432
3 432
4 432
5more 432
Name: count, dtype: int64
persons
2 576
4 576
more 576
Name: count, dtype: int64
lug_boot
small 576
med 576
big 576
Name: count, dtype: int64
safety
low 576
med 576
high 576
Name: count, dtype: int64
class
unacc 1210
acc 384
good 69
vgood 65
Name: count, dtype: int64
df['class'].value_counts()
count
class
unacc 1210
acc 384
good 69
vgood 65
dtype: int64
df.isnull().sum()
buying 0
maint 0
doors 0
persons 0
lug_boot 0
safety 0
class 0
dtype: int64
X = df.drop(['class'], axis=1)
y = df['class']
X_train.shape, X_test.shape
X_train.dtypes
0
buying object
maint object
doors object
persons object
lug_boot object
safety object
dtype: object
Collecting category_encoders
Downloading category_encoders-2.6.3-py2.py3-none-any.whl.metadata (8.0 kB)
Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.26.4)
Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.3.2)
Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.13.1)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.14.3)
Requirement already satisfied: pandas>=1.0.5 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (2.1.4)
Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.5.6)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encod
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (2024
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (20
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.1->category_encoders) (1.16.0)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_encoder
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_
Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from statsmodels>=0.9.0->category_encoder
Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.9/81.9 kB 4.9 MB/s eta 0:00:00
Installing collected packages: category_encoders
Successfully installed category_encoders-2.6.3
import category_encoders as ce
X_train.head()
48 1 1 1 1 1 1
468 2 1 1 2 2 1
155 1 2 1 1 2 2
1721 3 3 2 1 2 2
1208 4 3 3 1 2 2
X_test.head()
599 2 2 4 3 1 2
1201 4 3 3 2 1 3
628 2 2 2 3 3 3
1498 3 2 2 2 1 3
1263 4 3 4 1 1 1
▾ DecisionTreeClassifier
DecisionTreeClassifier(max_depth=3, random_state=0)
y_pred_gini = clf_gini.predict(X_test)
y_pred_train_gini = clf_gini.predict(X_train)
y_pred_train_gini
Visualize Decision-Tree
plt.figure(figsize=(12,8))
from sklearn import tree
tree.plot_tree(clf_gini.fit(X_train, y_train))
[Text(0.4, 0.875, 'x[5] <= 1.5\ngini = 0.455\nsamples = 1157\nvalue = [255, 49, 813, 40]'),
Text(0.2, 0.625, 'gini = 0.0\nsamples = 386\nvalue = [0, 0, 386, 0]'),
Text(0.6, 0.625, 'x[3] <= 2.5\ngini = 0.577\nsamples = 771\nvalue = [255, 49, 427, 40]'),
Text(0.4, 0.375, 'x[0] <= 2.5\ngini = 0.631\nsamples = 525\nvalue = [255, 49, 181, 40]'),
Text(0.2, 0.125, 'gini = 0.496\nsamples = 271\nvalue = [124, 0, 147, 0]'),
Text(0.6, 0.125, 'gini = 0.654\nsamples = 254\nvalue = [131, 49, 34, 40]'),
Text(0.8, 0.375, 'gini = 0.0\nsamples = 246\nvalue = [0, 0, 246, 0]')]
import graphviz
dot_data = tree.export_graphviz(clf_gini, out_file=None,
feature_names=X_train.columns,
class_names=y_train,
filled=True)
Using Entropy
▾ DecisionTreeClassifier
DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
y_pred_entropy = clf_entropy.predict(X_test)
print('Model accuracy score with criterion entropy: {0:0.4f}'. format(accuracy_score(y_test, y_pred_entropy)))
y_pred_train_entropy = clf_entropy.predict(X_train)
y_pred_train_entropy
accuracy_score(y_train, y_pred_train_entropy)
0.7865168539325843
print(clf_entropy.score(X_train, y_train))
print(clf_entropy.score(X_test, y_test))
0.7865168539325843
0.8021015761821366
plt.figure(figsize=(12,8))
from sklearn import tree
tree.plot_tree(clf_entropy.fit(X_train, y_train))
[Text(0.4, 0.875, 'x[5] <= 1.5\nentropy = 1.2\nsamples = 1157\nvalue = [255, 49, 813, 40]'),
Text(0.2, 0.625, 'entropy = 0.0\nsamples = 386\nvalue = [0, 0, 386, 0]'),
Text(0.6, 0.625, 'x[3] <= 2.5\nentropy = 1.474\nsamples = 771\nvalue = [255, 49, 427, 40]'),
Text(0.4, 0.375, 'x[0] <= 2.5\nentropy = 1.638\nsamples = 525\nvalue = [255, 49, 181, 40]'),
Text(0.2, 0.125, 'entropy = 0.995\nsamples = 271\nvalue = [124, 0, 147, 0]'),
Text(0.6, 0.125, 'entropy = 1.759\nsamples = 254\nvalue = [131, 49, 34, 40]'),
Text(0.8, 0.375, 'entropy = 0.0\nsamples = 246\nvalue = [0, 0, 246, 0]')]
import graphviz
dot_data = tree.export_graphviz(clf_entropy, out_file=None,feature_names=X_train.columns,
class_names=y_train,filled=True)
graph
Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_entropy)
print('Confusion matrix\n\n',cm)
Confusion matrix
[[ 73 0 56 0]
[ 20 0 0 0]
[ 12 0 385 0]
[ 25 0 0 0]]
Classification Report
keyboard_arrow_down Random Forest
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('/content/car_evaluation.csv', header=None)
df.shape
(1728, 7)
df.head()
0 1 2 3 4 5 6
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 buying 1728 non-null object
1 maint 1728 non-null object
2 doors 1728 non-null object
3 persons 1728 non-null object
4 lug_boot 1728 non-null object
5 safety 1728 non-null object
6 class 1728 non-null object
dtypes: object(7)
memory usage: 94.6+ KB
buying
vhigh 432
high 432
med 432
low 432
Name: count, dtype: int64
maint
vhigh 432
high 432
med 432
low 432
Name: count, dtype: int64
doors
2 432
3 432
4 432
5more 432
Name: count, dtype: int64
persons
2 576
4 576
more 576
Name: count, dtype: int64
lug_boot
small 576
med 576
big 576
Name: count, dtype: int64
safety
low 576
med 576
high 576
Name: count, dtype: int64
class
unacc 1210
acc 384
good 69
vgood 65
Name: count, dtype: int64
df['class'].value_counts()
count
class
unacc 1210
acc 384
good 69
vgood 65
dtype: int64
df.isnull().sum()
buying 0
maint 0
doors 0
persons 0
lug_boot 0
safety 0
class 0
dtype: int64
X = df.drop(['class'], axis=1)
y = df['class']
X_train.shape, X_test.shape
X_train.dtypes
0
buying object
maint object
doors object
persons object
lug_boot object
safety object
dtype: object
Collecting category_encoders
Downloading category_encoders-2.6.3-py2.py3-none-any.whl.metadata (8.0 kB)
Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.26.4)
Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.3.2)
Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.13.1)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.14.3)
Requirement already satisfied: pandas>=1.0.5 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (2.1.4)
Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.5.6)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encod
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (2024
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (20
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.1->category_encoders) (1.16.0)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_encoder
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_
Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from statsmodels>=0.9.0->category_encoder
Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.9/81.9 kB 3.3 MB/s eta 0:00:00
Installing collected packages: category_encoders
Successfully installed category_encoders-2.6.3
import category_encoders as ce
X_train.head()
48 1 1 1 1 1 1
468 2 1 1 2 2 1
155 1 2 1 1 2 2
1721 3 3 2 1 2 2
1208 4 3 3 1 2 2
X_test.head()
599 2 2 4 3 1 2
1201 4 3 3 2 1 3
628 2 2 2 3 3 3
1498 3 2 2 2 1 3
1263 4 3 4 1 1 1
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=10, random_state=54)
y_pred = rfc.predict(X_test)
rfc_100 = RandomForestClassifier(n_estimators=100,random_state=54)
rfc_100.fit(X_train, y_train)
▾ RandomForestClassifier
RandomForestClassifier(random_state=54)
y_pred = rfc_100.predict(X_test)
clf= RandomForestClassifier(n_estimators=100,random_state=0)
clf.fit(X_train, y_train)
▾ RandomForestClassifier
RandomForestClassifier(random_state=0)
safety 0.295319
persons 0.233856
buying 0.151734
maint 0.146653
lug_boot 0.100048
doors 0.072389
dtype: float64
sns.barplot(x=feature_scores, y=feature_scores.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.show()
Dropping the least imp features
Again Splitting Train-Test as the least important feature has been dropped
rfc_100 = RandomForestClassifier(n_estimators=100,random_state=54)
rfc_100.fit(X_train, y_train)
▾ RandomForestClassifier
RandomForestClassifier(random_state=54)
y_pred = rfc_100.predict(X_test)
X, y = datasets.load_iris(return_X_y=True)
clf = DecisionTreeClassifier(random_state=42)
k_folds = KFold(n_splits = 5)
dataset = pd.read_csv('/content/Mall_Customers.csv')
dataset.head()
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
dataset.shape
(200, 5)
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CustomerID 200 non-null int64
1 Gender 200 non-null object
2 Age 200 non-null int64
3 Annual Income (k$) 200 non-null int64
4 Spending Score (1-100) 200 non-null int64
dtypes: int64(4), object(1)
memory usage: 7.9+ KB
dataset.describe()
X = dataset.iloc[:, 3:]
X.head()
0 15 39
1 15 81
2 16 6
3 16 77
4 17 40
Next steps: Generate code with X toggle_off View recommended plots New interactive sheet
import scipy.cluster.hierarchy as hc
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10
df = pd.read_csv('/content/Mall_Customers.csv')
print("Shape of the data= ", df.shape)
df.head()
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
plt.figure(figsize=(10,6))
plt.scatter(df['Annual Income (k$)'],df['Spending Score (1-100)'])
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('Unlabelled Mall Customer Data')
# Since we are going to use Annual Income and Spending Score columns only, lets create 2D array of these columns for
X = df.iloc[:, [3,4]].values
X[:5] # Show first 5 records only
array([[15, 39],
[15, 81],
[16, 6],
[16, 77],
[17, 40]])
for each value of i, a kmeans model will be created with i number of clusters
init = random tells us about the random initialization of cluster centres
inertia_ = Sum of squared distances of samples to their closest cluster
adds or appends the inertia_ value to the clustering_score
clustering_score = [] #creating an empty list to store inertia values
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'random', random_state = 42)
kmeans.fit(X)
clustering_score.append(kmeans.inertia_)
plt.figure(figsize=(10,6))
plt.plot(range(1, 11), clustering_score)
plt.scatter(5,clustering_score[4], s = 200, c = 'red', marker='*')
plt.title('The Elbow Method')
plt.xlabel('No. of Clusters')
plt.ylabel('Clustering Score')
plt.show()
array([4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2,
4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 0,
4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 1, 3, 1, 3, 1,
0, 1, 3, 1, 3, 1, 3, 1, 3, 1, 0, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1], dtype=int32)
0 1 Male 19 15 39 4
1 2 Male 21 15 81 2
2 3 Female 20 16 6 4
plt.figure(figsize=(10,6))
3 4 Female 23 16 77 2
plt.scatter(X[pred == 0, 0], X[pred == 0, 1], c = 'brown', label = 'Cluster 0')
plt.scatter(X[pred
4 ==
5 1, 0], X[pred
Female 31 == 1, 1], c = 'green',
17 label = 'Cluster 40
1') 4
plt.scatter(X[pred == 2, 0], X[pred == 2, 1], c = 'blue', label = 'Cluster 2')
...
plt.scatter(X[pred ... 3, 0],... X[pred
== ... == 3, ...
1], c = 'purple', label = 'Cluster...3') ...
plt.scatter(X[pred == 4, 0], X[pred == 4, 1], c = 'orange', label = 'Cluster 4')
195 196 Female 35 120 79 1
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:, 1],s = 300, c = 'red', label = 'Centroid', marker='*')
plt.xlabel('Annual
196 Income')
197 Female 45 126 28 3
plt.ylabel('Spending Score')
197
plt.legend() 198 Male 32 126 74 1
plt.title('Customer
198 199Clusters')
Male 32 137 18 3
Text(0.5,
199 1.0,
200'Customer
Male Clusters')
30 137 83 1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('/content/UniversalBank.csv')
df.describe()
Personal Securities
ID Age Experience Income ZIP Code Family CCAvg Education Mortgage
Loan Account
count 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5
mean 2500.500000 45.338400 20.104600 73.774200 93152.503000 2.396400 1.937938 1.881000 56.498800 0.096000 0.104400
std 1443.520003 11.463166 11.467954 46.033729 2121.852197 1.147663 1.747659 0.839869 101.713802 0.294621 0.305809
min 1.000000 23.000000 -3.000000 8.000000 9307.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000
25% 1250.750000 35.000000 10.000000 39.000000 91911.000000 1.000000 0.700000 1.000000 0.000000 0.000000 0.000000
50% 2500.500000 45.000000 20.000000 64.000000 93437.000000 2.000000 1.500000 2.000000 0.000000 0.000000 0.000000
75% 3750.250000 55.000000 30.000000 98.000000 94608.000000 3.000000 2.500000 3.000000 101.000000 0.000000 0.000000
max 5000.000000 67.000000 43.000000 224.000000 96651.000000 4.000000 10.000000 3.000000 635.000000 1.000000 1.000000
df.head()
ID Age Experience Income ZIP Code Family CCAvg Education Mortgage Personal Loan Securities Account CD Account Online CreditCard
0 1 25 1 49 91107 4 1.6 1 0 0 1 0 0 0
1 2 45 19 34 90089 3 1.5 1 0 0 1 0 0 0
2 3 39 15 11 94720 1 1.0 1 0 0 0 0 0 0
4 5 35 8 45 91330 4 1.0 2 0 0 0 0 0 1
df.isnull().sum()
ID 0
Age 0
Experience 0
Income 0
ZIP Code 0
Family 0
CCAvg 0
Education 0
Mortgage 0
Personal Loan 0
Securities Account 0
CD Account 0
Online 0
CreditCard 0
dtype: int64
0 25 1 49 4 1.6 1 0 0 1 0 0 0
1 45 19 34 3 1.5 1 0 0 1 0 0 0
2 39 15 11 1 1.0 1 0 0 0 0 0 0
3 35 9 100 1 2.7 2 0 0 0 0 0 0
4 35 8 45 4 1.0 2 0 0 0 0 0 1
plt.figure(figsize=(10,5))
plt.title('Heatmap showing correlation between all the features', fontsize=10)
sns.heatmap(df1.corr(),annot=True, cmap= 'mako')
zero_class = df1[df1['CreditCard']==0]
one_class = df1[df1['CreditCard']==1]
zero_class.head()
Age Experience Income Family CCAvg Education Mortgage Personal Loan Securities Account CD Account Online CreditCard
0 25 1 49 4 1.6 1 0 0 1 0 0 0
1 45 19 34 3 1.5 1 0 0 1 0 0 0
2 39 15 11 1 1.0 1 0 0 0 0 0 0
3 35 9 100 1 2.7 2 0 0 0 0 0 0
5 37 13 29 4 0.4 2 155 0 0 0 1 0
one_class.head()
Age Experience Income Family CCAvg Education Mortgage Personal Loan Securities Account CD Account Online CreditCard
4 35 8 45 4 1.0 2 0 0 0 0 0 1
7 50 24 22 1 0.3 3 0 0 0 0 0 1
15 60 30 22 1 1.5 3 0 0 0 0 1 1
19 55 28 21 1 0.5 2 0 0 1 0 0 1
Scatter Plot
plt.xlabel('CCAvg')
plt.ylabel('Family')
plt.scatter(zero_class['CCAvg'],zero_class['Family'],color='blue',marker='+')
plt.scatter(one_class['CCAvg'],one_class['Family'],color='red',marker='.')
<matplotlib.collections.PathCollection at 0x7edb67282950>
Standard Scaler
scaler = StandardScaler()
scaled = scaler.fit_transform(df1.drop('CreditCard',axis=1))
df_scaled = pd.DataFrame(scaled,columns=df1.columns[:-1])
df_scaled.head()
Age Experience Income Family CCAvg Education Mortgage Personal Loan Securities Account CD Account Online
0 -1.774417 -1.666078 -0.538229 1.397414 -0.193385 -1.049078 -0.555524 -0.325875 2.928915 -0.25354 -1.216618
1 -0.029524 -0.096330 -0.864109 0.525991 -0.250611 -1.049078 -0.555524 -0.325875 2.928915 -0.25354 -1.216618
2 -0.552992 -0.445163 -1.363793 -1.216855 -0.536736 -1.049078 -0.555524 -0.325875 -0.341423 -0.25354 -1.216618
3 -0.901970 -0.968413 0.569765 -1.216855 0.436091 0.141703 -0.555524 -0.325875 -0.341423 -0.25354 -1.216618
4 -0.901970 -1.055621 -0.625130 1.397414 -0.536736 0.141703 -0.555524 -0.325875 -0.341423 -0.25354 -1.216618
x = df_scaled
y = df1['CreditCard']
▾ SVC i ?
SVC()
y_pred=model.predict(x_test)
print('Model accuracy : {0:0.3f}%'.format(accuracy_score(y_test,y_pred)*100))
<Axes: >
array([[703, 5],
[245, 47]])
#classification report
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))
linear_classifier = SVC(kernel='linear').fit(x_train,y_train)
y_pred = linear_classifier.predict(x_test)
print('Model accuracy : {0:0.3f}%'.format(accuracy_score(y_test,y_pred)*100))
cm = confusion_matrix(y_test,y_pred)
cm_matrix = pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
sns.heatmap(cm_matrix,annot=True,fmt='d',cmap='mako')
<Axes: >
rbf_svc=SVC(kernel='rbf').fit(x_train,y_train)
y_pred=rbf_svc.predict(x_test)
print('Model accuracy : {0:0.3f}%'.format(accuracy_score(y_test,y_pred)*100))
cm = confusion_matrix(y_test,y_pred)
cm_matrix = pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
sns.heatmap(cm_matrix,annot=True,fmt='d',cmap='mako')
<Axes: >
Poly_svc = SVC(kernel='poly').fit(x_train,y_train)
y_pred = Poly_svc.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
cm_matrix = pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
sns.heatmap(cm_matrix,annot=True,fmt='d',cmap='mako')
<Axes: >
Poly_svc = SVC(kernel='sigmoid').fit(x_train,y_train)
y_pred = Poly_svc.predict(x_test)
print('Model accuracy : {0:0.3f}%'.format(accuracy_score(y_test,y_pred)*100))
cm = confusion_matrix(y_test,y_pred)
cm_matrix = pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
sns.heatmap(cm_matrix,annot=True,fmt='d',cmap='mako')
<Axes: >