0% found this document useful (0 votes)
42 views

ML Shristi File

Fyfufhccjufgjggjvfujgujgugihgigugkgigigkgigihgiigkugigkguigkgiigigigugkufigifufjcfufjcufuggjgugugigugivkgigivkf7fufif6fufufufyfgjgugihgihkhhiolhigivifufftddydtdudyfgjfugigigugiffugxugfyfuftdgiufugugugugufufugigugugigigiguguggufufuufufufugugufufufufugugiigguiguuguguguguguguguugugiggigí

Uploaded by

RAVI PARKASH
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
42 views

ML Shristi File

Fyfufhccjufgjggjvfujgujgugihgigugkgigigkgigihgiigkugigkguigkgiigigigugkufigifufjcfufjcufuggjgugugigugivkgigivkf7fufif6fufufufyfgjgugihgihkhhiolhigivifufftddydtdudyfgjfugigigugiffugxugfyfuftdgiufugugugugufufugigugugigigiguguggufufuufufufugugufufufufugugiigguiguuguguguguguguguugugiggigí

Uploaded by

RAVI PARKASH
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 49

Matu Ram Institute of Engineering

&Management, ROHTAK

Machine Learning Practical File

SUBMITTED TO: SUBMITTED BY:


Mrs. Jyoti Shristi Chauhan

A.P in CSE Deptt 4419/CSE

MRIEM, Rohtak MRIEM, Rohtak


PROGRAM - 1

AIM: To implement basic python libraries of machine


learning like numpy, pandas, SciPy, Scitkit-learn,
matplotlib,etc.

CODE:
#numpy

import numpy as np

# Creating two arrays of rank 2

x = np.array([[1, 2], [3, 4]])

y = np.array([[5, 6], [7, 8]])

# Creating two arrays of rank 1

v = np.array([9, 10])

w = np.array([11, 12])

# Inner product of vectors

print(np.dot(v, w), "\n")

# Matrix and Vector product

print(np.dot(x, v), "\n")

# Matrix and matrix product

print(np.dot(x, y))
Output 1:

219

[29 67]

[[19 22]
[43 50]]

#SciPy

# Python script using Scipy

# for image manipulation

from scipy.misc import imread, imsave, imresize

# Read a JPEG image into a numpy array

img = imread('D:/Programs / cat.jpg') # path of the image

print(img.dtype, img.shape)

# Tinting the image

img_tint = img * [1, 0.45, 0.3]

# Saving the tinted image

imsave('D:/Programs / cat_tinted.jpg', img_tint)

# Resizing the tinted image to be 300 x 300 pixels

img_tint_resize = imresize(img_tint, (300, 300))

# Saving the resized tinted image


imsave('D:/Programs / cat_tinted_resized.jpg',
img_tint_resize)

Output 2:

Original image:

Tinted image:
Resized tinted image:

#Scikit-learn

# Python script using Scikit-learn

# for Decision Tree Classifier

# Sample Decision Tree Classifier

from sklearn import datasets

from sklearn import metrics

from sklearn.tree import DecisionTreeClassifier

# load the iris datasets

dataset = datasets.load_iris()

# fit a CART model to the data


model = DecisionTreeClassifier()

model.fit(dataset.data, dataset.target)

print(model)

# make predictions

expected = dataset.target

predicted = model.predict(dataset.data)

# summarize the fit of the model

print(metrics.classification_report(expected, predicted))

print(metrics.confusion_matrix(expected, predicted))

Output 3:

DecisionTreeClassifier(class_weight=None,
criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False,
random_state=None,
splitter='best')
precision recall f1-score support

0 1.00 1.00 1.00 50


1 1.00 1.00 1.00 50
2 1.00 1.00 1.00 50

micro avg 1.00 1.00 1.00 150


macro avg 1.00 1.00 1.00 150
weighted avg 1.00 1.00 1.00 150

[[50 0 0]
[ 0 50 0]
[ 0 0 50]]

#pandas

# Python program using Pandas for

# arranging a given set of data

# into a table

# importing pandas as pd

import pandas as pd

data = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],

"capital": ["Brasilia", "Moscow", "New Delhi", "Beijing", "Pretoria"],

"area": [8.516, 17.10, 3.286, 9.597, 1.221],

"population": [200.4, 143.5, 1252, 1357, 52.98] }

data_table = pd.DataFrame(data)

print(data_table)
Output 4:

#Matplotlib

# Python program using Matplotlib

# for forming a linear plot

# importing the necessary packages and modules

import matplotlib.pyplot as plt

import numpy as np

# Prepare the data

x = np.linspace(0, 10, 100)

# Plot the data

plt.plot(x, x, label ='linear')


# Add a legend

plt.legend()

# Show the plot

plt.show(

Output 5:
PROGRAM - 2

AIM: To implement FIND-S algorithm.

CODE:
import csv
with
open('tennis.cs
v', 'r') as f:
reader =
csv.reader(f)
your_list =
list(reader)

h = [['0', '0', '0', '0', '0', '0']]

for i
in
yo
ur_
list
:
pri
nt(i
)
if i[-1]
==
"Tru
e": j
=0
for x in i:
if x != "True":
if x != h[0][j] and
h[0][j] == '0': h[0][j]
=x
elif x != h[0][j] and
h[0][j] != '0': h[0][j]
= '?'
else:
p
a
s
s

1
print("specific
hypothesis is") print(h)

Output:
'Sunny', 'Warm', 'Normal', 'Strong', 'Warm',
'Same',True
'Sunny', 'Warm', 'High', 'Strong', 'Warm',
'Same',True
'Rainy', 'Cold', 'High', 'Strong', 'Warm',
'Change',False
'Sunny', 'Warm', 'High', 'Strong',
'Cool','Change',True

Maximally Specific set


[['Sunny', 'Warm', '?', 'Strong', '?', '?']]
PROGRAM - 3

AIM: To implement FIND-S algorithm.

CODE:
# load the iris dataset

from sklearn.datasets import load_iris

iris = load_iris()

# store the feature matrix (X) and response vector (y)

X = iris.data

y = iris.target

# splitting X and y into training and testing sets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,


random_state=1)

# training the model on training set

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

gnb.fit(X_train, y_train)

# making predictions on the testing set

y_pred = gnb.predict(X_test)

# comparing actual response values (y_test) with predicted response values


(y_pred)
from sklearn import metrics

print("Gaussian Naive Bayes model accuracy(in %):",


metrics.accuracy_score(y_test, y_pred)*100)

Output:

Gaussian Naive Bayes model accuracy(in %): 95.0


PROGRAM - 4

AIM: To analyse the tested data using K-Means


Clustering algorithm.

CODE:
# importing required tools

import numpy as np

from matplotlib import pyplot as plt

# creating two test data

X = np.random.randint(10,35,(25,2))

Y = np.random.randint(55,70,(25,2))

Z = np.vstack((X,Y))

Z = Z.reshape((50,2))

# convert to np.float32

Z = np.float32(Z)

plt.xlabel('Test Data')

plt.ylabel('Z samples')

plt.hist(Z,256,[0,256])

plt.show()

# define criteria and apply kmeans()

criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)


ret,label,center
=cv2.kmeans(Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)

# Now separate the data

A = Z[label.ravel()==0]

B = Z[label.ravel()==1]

# Plot the data

plt.scatter(A[:,0],A[:,1])

plt.scatter(B[:,0],B[:,1],c = 'r')

plt.scatter(center[:,0],center[:,1],s = 80,c = 'y', marker = 's')

plt.xlabel('Test Data'),plt.ylabel('Z samples')

plt.show()
PROGRAM - 5

AIM: To implement Principal Component Analysis.

CODE:
import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

dataset = pd.read_csv('wine.csv')

X = dataset.iloc[:, 0:13].values

y = dataset.iloc[:, 13].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,


random_state = 0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

# Applying PCA function on training and testing set of X component

from sklearn.decomposition import PCA

pca = PCA(n_components = 2)

X_train = pca.fit_transform(X_train)

X_test = pca.transform(X_test)
explained_variance = pca.explained_variance_ratio_

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state = 0)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

# Predicting the training set and result through scatter plot

from matplotlib.colors import ListedColormap

X_set, y_set = X_train, y_train

X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,

stop = X_set[:, 0].max() + 1, step = 0.01),

np.arange(start = X_set[:, 1].min() - 1,

stop = X_set[:, 1].max() + 1, step = 0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(),

X2.ravel()]).T).reshape(X1.shape), alpha = 0.75,

cmap = ListedColormap(('yellow', 'white', 'aquamarine')))

plt.xlim(X1.min(), X1.max())

plt.ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],

c = ListedColormap(('red', 'green', 'blue'))(i), label = j)

plt.title('Logistic Regression (Training set)')


plt.xlabel('PC1') # for Xlabel

plt.ylabel('PC2') # for Ylabel

plt.legend() # to show legend

# show scatter plot

plt.show()

# Visualising the Test set results through scatter plot

from matplotlib.colors import ListedColormap

X_set, y_set = X_test, y_test

X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,

stop = X_set[:, 0].max() + 1, step = 0.01),

np.arange(start = X_set[:, 1].min() - 1,

stop = X_set[:, 1].max() + 1, step = 0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(),

X2.ravel()]).T).reshape(X1.shape), alpha = 0.75,

cmap = ListedColormap(('yellow', 'white', 'aquamarine')))

plt.xlim(X1.min(), X1.max())

plt.ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],

c = ListedColormap(('red', 'green', 'blue'))(i), label = j)

# title for scatter plot

plt.title('Logistic Regression (Test set)')

plt.xlabel('PC1') # for Xlabel


plt.ylabel('PC2') # for Ylabel

plt.legend()

# show scatter plot

plt.show()
PROGRAM - 6

AIM: To implement k-nearest neighbor algorithm.

CODE:
# Import necessary modules

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.datasets import load_iris

import numpy as np

import matplotlib.pyplot as plt

irisData = load_iris()

X = irisData.data

y = irisData.target

# Split into training and test set

X_train, X_test, y_train, y_test = train_test_split(

X, y, test_size = 0.2, random_state=42)

neighbors = np.arange(1, 9)

train_accuracy = np.empty(len(neighbors))

test_accuracy = np.empty(len(neighbors))

# Loop over K values

for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)

knn.fit(X_train, y_train)

# Compute training and test data accuracy

train_accuracy[i] = knn.score(X_train, y_train)

test_accuracy[i] = knn.score(X_test, y_test)

# Generate plot

plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')

plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')

plt.legend()

plt.xlabel('n_neighbors')

plt.ylabel('Accuracy')

plt.show()
PROGRAM - 7

AIM: To implement linear regression.

CODE:
import numpy as np

import matplotlib.pyplot as plt

def estimate_coef(x, y):

# number of observations/points

n = np.size(x)

# mean of x and y vector

m_x = np.mean(x)

m_y = np.mean(y)

# calculating cross-deviation and deviation about x

SS_xy = np.sum(y*x) - n*m_y*m_x

SS_xx = np.sum(x*x) - n*m_x*m_x

# calculating regression coefficients

b_1 = SS_xy / SS_xx

b_0 = m_y - b_1*m_x

return (b_0, b_1)

def plot_regression_line(x, y, b):


# plotting the actual points as scatter plot

plt.scatter(x, y, color = "m",

marker = "o", s = 30)

# predicted response vector

y_pred = b[0] + b[1]*x

# plotting the regression line

plt.plot(x, y_pred, color = "g")

# putting labels

plt.xlabel('x')

plt.ylabel('y')

plt.show()

def main():

# observations / data

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients

b = estimate_coef(x, y)

print("Estimated coefficients:\nb_0 = {} \

\nb_1 = {}".format(b[0], b[1]))

# plotting regression line

plot_regression_line(x, y, b)

if __name__ == "__main__":
main()

Output:

Estimated coefficients:
b_0 = -0.0586206896552
b_1 = 1.45747126437
PROGRAM - 8

AIM: To implement logistics regression.

CODE:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

dataset = pd.read_csv("User_Data.csv")

x = dataset.iloc[:, [2, 3]].values

y = dataset.iloc[:, 4].values

# Splitting the dataset to train and test

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25,


random_state = 0)

sc_x = StandardScaler()

xtrain = sc_x.fit_transform(xtrain)

xtest = sc_x.transform(xtest)

print (xtrain[0:10, :])

Output 1:
[[ 0.58164944 -0.88670699]
[-0.60673761 1.46173768]
[-0.01254409 -0.5677824 ]
[-0.60673761 1.89663484]
[ 1.37390747 -1.40858358]
[ 1.47293972 0.99784738]
[ 0.08648817 -0.79972756]
[-0.01254409 -0.24885782]
[-0.21060859 -0.5677824 ]
[-0.21060859 -0.19087153]]

# Train the model

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state = 0)

classifier.fit(xtrain, ytrain)

# prediction

y_pred = classifier.predict(xtest)

# Test the performance of our model

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(ytest, y_pred)

print ("Confusion Matrix : \n", cm)

# Accuracy

from sklearn.metrics import accuracy_score

print ("Accuracy : ", accuracy_score(ytest, y_pred))


Output 2:

Confusion Matrix :
[[65 3]
[ 8 24]]

Out of 100 :
True Positive + True Negative = 65 + 24
False Positive + False Negative = 3 + 8
Performance measure – Accuracy

Accuracy: 0.89

# Visualizing the performance of our model

from matplotlib.colors import ListedColormap

X_set, y_set = xtest, ytest

X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,

stop = X_set[:, 0].max() + 1, step = 0.01),

np.arange(start = X_set[:, 1].min() - 1,

stop = X_set[:, 1].max() + 1, step = 0.01))

plt.contourf(X1, X2, classifier.predict(

np.array([X1.ravel(), X2.ravel()]).T).reshape(

X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))


plt.xlim(X1.min(), X1.max())

plt.ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],

c = ListedColormap(('red', 'green'))(i), label = j)

plt.title('Classifier (Test set)')

plt.xlabel('Age')

plt.ylabel('Estimated Salary')

plt.legend()

plt.show()
PROGRAM - 9

AIM: To implement Naïve Bayes Algorithm.

CODE:
# Importing library

import math

import random

import csv

# the categorical class names are changed to numberic data

def encode_class(mydata):

classes = []

for i in range(len(mydata)):

if mydata[i][-1] not in classes:

classes.append(mydata[i][-1])

for i in range(len(classes)):

for j in range(len(mydata)):

if mydata[j][-1] == classes[i]:

mydata[j][-1] = i

return mydata

# Splitting the data

def splitting(mydata, ratio):


train_num = int(len(mydata) * ratio)

train = []

# initially testset will have all the dataset

test = list(mydata)

while len(train) < train_num:

# index generated randomly from range 0

# to length of testset

index = random.randrange(len(test))

# from testset, pop data rows and put it in train

train.append(test.pop(index))

return train, test

# Group the data rows under each class yes or no in dictionary eg: dict[yes]
and dict[no]

def groupUnderClass(mydata):

dict = {}

for i in range(len(mydata)):

if (mydata[i][-1] not in dict):

dict[mydata[i][-1]] = []

dict[mydata[i][-1]].append(mydata[i])

return dict

# Calculating Mean

def mean(numbers):
return sum(numbers) / float(len(numbers))

# Calculating Standard Deviation

def std_dev(numbers):

avg = mean(numbers)

variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)

return math.sqrt(variance)

def MeanAndStdDev(mydata):

info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]

# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]

# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3

# delete summaries of last class

del info[-1]

return info

# find Mean and Standard Deviation under each class

def MeanAndStdDevForClass(mydata):

info = {}

dict = groupUnderClass(mydata)

for classValue, instances in dict.items():

info[classValue] = MeanAndStdDev(instances)

return info
# Calculate Gaussian Probability Density Function

def calculateGaussianProbability(x, mean, stdev):

expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))

return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo

# Calculate Class Probabilities

def calculateClassProbabilities(info, test):

probabilities = {}

for classValue, classSummaries in info.items():

probabilities[classValue] = 1

for i in range(len(classSummaries)):

mean, std_dev = classSummaries[i]

x = test[i]

probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)

return probabilities

# Make prediction - highest probability is the prediction

def predict(info, test):

probabilities = calculateClassProbabilities(info, test)

bestLabel, bestProb = None, -1

for classValue, probability in probabilities.items():

if bestLabel is None or probability > bestProb:

bestProb = probability

bestLabel = classValue
return bestLabel

# returns predictions for a set of examples

def getPredictions(info, test):

predictions = []

for i in range(len(test)):

result = predict(info, test[i])

predictions.append(result)

return predictions

# Accuracy score

def accuracy_rate(test, predictions):

correct = 0

for i in range(len(test)):

if test[i][-1] == predictions[i]:

correct += 1

return (correct / float(len(test))) * 100.0

# driver code

# add the data path in your system

filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive


bayes\filedata.csv'

# load the file and store it in mydata list

mydata = csv.reader(open(filename, "rt"))


mydata = list(mydata)

mydata = encode_class(mydata)

for i in range(len(mydata)):

mydata[i] = [float(x) for x in mydata[i]]

# split ratio = 0.7

# 70% of data is training data and 30% is test data used for testing

ratio = 0.7

train_data, test_data = splitting(mydata, ratio)

print('Total number of examples are: ', len(mydata))

print('Out of these, training examples are: ', len(train_data))

print("Test examples are: ", len(test_data))

# prepare model

info = MeanAndStdDevForClass(train_data)

# test model

predictions = getPredictions(info, test_data)

accuracy = accuracy_rate(test_data, predictions)

print("Accuracy of your model is: ", accuracy)

Output:

Total number of examples are: 200


Out of these, training examples are: 140
Test examples are: 60
Accuracy of your model is: 71.237678
PROGRAM - 10

AIM: To implement Decision Tree Algorithm.

CODE:
# Importing the required packages

import numpy as np

import pandas as pd

from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score

from sklearn.metrics import classification_report

# Function importing Dataset

def importdata():

balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-
'+'databases/balance-scale/balance-scale.data',sep= ',', header = None)

# Printing the dataset shape

print ("Dataset Length: ", len(balance_data))

print ("Dataset Shape: ", balance_data.shape)

# Printing the dataset obseravtions


print ("Dataset: ",balance_data.head())

return balance_data

# Function to split the dataset

def splitdataset(balance_data):

# Separating the target variable

X = balance_data.values[:, 1:5]

Y = balance_data.values[:, 0]

# Splitting the dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(

X, Y, test_size = 0.3, random_state = 100)

return X, Y, X_train, X_test, y_train, y_test

# Function to perform training with giniIndex.

def train_using_gini(X_train, X_test, y_train):

# Creating the classifier object

clf_gini = DecisionTreeClassifier(criterion = "gini",

random_state = 100,max_depth=3, min_samples_leaf=5)

# Performing training

clf_gini.fit(X_train, y_train)

return clf_gini

# Function to perform training with entropy.


def tarin_using_entropy(X_train, X_test, y_train):

# Decision tree with entropy

clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state =


100,max_depth = 3, min_samples_leaf = 5)

# Performing training

clf_entropy.fit(X_train, y_train)

return clf_entropy

# Function to make predictions

def prediction(X_test, clf_object):

# Predicton on test with giniIndex

y_pred = clf_object.predict(X_test)

print("Predicted values:")

print(y_pred)

return y_pred

# Function to calculate accuracy

def cal_accuracy(y_test, y_pred):

print("Confusion Matrix: ",confusion_matrix(y_test, y_pred))

print ("Accuracy : ",accuracy_score(y_test,y_pred)*100)

print("Report : ",classification_report(y_test, y_pred))

# Driver code

def main():
# Building Phase

data = importdata()

X, Y, X_train, X_test, y_train, y_test = splitdataset(data)

clf_gini = train_using_gini(X_train, X_test, y_train)

clf_entropy = tarin_using_entropy(X_train, X_test, y_train)

# Operational Phase

print("Results Using Gini Index:")

# Prediction using gini

y_pred_gini = prediction(X_test, clf_gini)

cal_accuracy(y_test, y_pred_gini)

print("Results Using Entropy:")

# Prediction using entropy

y_pred_entropy = prediction(X_test, clf_entropy)

cal_accuracy(y_test, y_pred_entropy)

# Calling main function

if __name__=="__main__":

main()
Output:

Data Information:

Dataset Length: 625


Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5

Results Using Gini Index:

Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'R' 'L' 'L' 'L'
'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
'R' 'R' 'L' 'R'
'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R'
'L' 'L' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R'
'L' 'R' 'L' 'R'
'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R'
'R' 'R' 'R' 'R'
'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L'
'L' 'L' 'R' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']

Confusion Matrix: [[ 0 6 7]
[ 0 67 18]
[ 0 19 71]]
Accuracy : 73.4042553191
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.73 0.79 0.76 85
R 0.74 0.79 0.76 90
avg/total 0.68 0.73 0.71 188

Results Using Entropy:

Predicted values:
['R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L'
'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L'
'R' 'L' 'L' 'L'
'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
'L' 'R' 'L' 'L'
'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R'
'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R'
'R' 'R' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R'
'L' 'R' 'L' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L'
'L' 'L' 'L' 'R'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R']

Confusion Matrix: [[ 0 6 7]
[ 0 63 22]
[ 0 20 70]]
Accuracy : 70.7446808511
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.71 0.74 0.72 85
R 0.71 0.78 0.74 90
avg / total 0.66 0.71 0.68 188
PROGRAM - 11

AIM: To implement Support Vector Machine


Algorithm.

CODE:
# import libraries

import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

%matplotlib inline

# Importing Data file

data = pd.read_csv('bc2.csv')

dataset = pd.DataFrame(data)

Dataset.columns

Output 1:

Index(['ID', 'ClumpThickness', 'Cell Size', 'Cell Shape',


'Marginal Adhesion',
'Single Epithelial Cell Size', 'Bare Nuclei', 'Normal
Nucleoli', 'Bland Chromatin',
'Mitoses', 'Class'], dtype='object')

dataset.info()

Output 2:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
ID 699 non-null int64
ClumpThickness 699 non-null int64
Cell Size 699 non-null int64
Cell Shape 699 non-null int64
Marginal Adhesion 699 non-null int64
Single Epithelial Cell Size 699 non-null int64
Bare Nuclei 699 non-null object
Normal Nucleoli 699 non-null int64
Bland Chromatin 699 non-null int64
Mitoses 699 non-null int64
Class 699 non-null int64
dtypes: int64(10), object(1)
memory usage: 60.1+ KB

dataset.describe().transpose()

Output 3:
mean std min 25% 50% 75% max
count

ID 699 1.07170 617095. 61634.0 870688. 117171 123829 134543


4e+06 729819 5 0.0 8.0 52.0

clump 699 4.41774 2.81574 1.0 2.0 4.0 6.0 10.0


Thickne 0e+00 1
ss

Cell 699.0 4.41774 2.81574 1.0 1.0 1.0 5.0 10.0


Size 0e+00 1

Cell 699.0 3.13447 3.05145 1.0 1.0 1.0 5.0 10.0


Shape 8e+00 9

Margin 699.0 2.80686 2.97191 1.0 1.0 1.0 4.0 10.0


al 7e+00 3
Adhensi
on

Single 699.0 3.21602 2.85537 1.0 2.0 2.0 4.0 10.0


Epitheli 3e+00 9
al cell
size

Normal 699.0 3.43776 2.21430 1.0 2.0 3.0 5.0 10.0


Nucleol 8e+00 0
i

Bland 699.0 2.86695 2.43836 1.0 1.0 1.0 4.0 10.0


chroma 3e+00 4
tin

Mitoses 699.0 1.58941 3.05363 1.0 1.0 1.0 1.0 10.0


3e+00 4

class 699.0 2.68955 1.71507 2.0 2.0 2.0 4.0 4.0


7e+00 8

dataset = dataset.replace('?', np.nan)


dataset = dataset.apply(lambda x: x.fillna(x.median()),axis=0)

# converting the hp column from object 'Bare Nuclei'/ string type to float

dataset['Bare Nuclei'] = dataset['Bare Nuclei'].astype('float64')

dataset.isnull().sum()

Output 4:

ID 0
ClumpThickness 0
Cell Size 0
Cell Shape 0
Marginal Adhesion 0
Single Epithelial Cell Size 0
Bare Nuclei 0
Normal Nucleoli 0
Bland Chromatin 0
Mitoses 0
Class 0
dtype: int64

from sklearn.model_selection import train_test_split

# To calculate the accuracy score of the model

from sklearn.metrics import accuracy_score, confusion_matrix

target = dataset["Class"]

features = dataset.drop(["ID","Class"], axis=1)


X_train, X_test, y_train, y_test = train_test_split(features,target, test_size =
0.2, random_state = 10)

from sklearn.svm import SVC

# Building a Support Vector Machine on train data

svc_model = SVC(C= .1, kernel='linear', gamma= 1)

svc_model.fit(X_train, y_train)

prediction = svc_model .predict(X_test)

# check the accuracy on the training set

print(svc_model.score(X_train, y_train))

print(svc_model.score(X_test, y_test))

Output 5:

0.9749552772808586
0.9642857142857143

print("Confusion Matrix:\n",confusion_matrix(prediction,y_test))

Output 6:

Confusion Matrix:
[[95 2]
[ 3 40]]

# Building a Support Vector Machine on train data


svc_model = SVC(kernel='rbf')

svc_model.fit(X_train, y_train)

Output 7:

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,


decision_function_shape='ovr', degree=3,
gamma='auto_deprecated',
kernel='rbf', max_iter=-1, probability=False,
random_state=None,
shrinking=True, tol=0.001, verbose=False)

print(svc_model.score(X_train, y_train))

print(svc_model.score(X_test, y_test))

Output 8:

0.998211091234347
0.9571428571428572

#Building a Support Vector Machine on train data(changing the kernel)

svc_model = SVC(kernel='poly')

svc_model.fit(X_train, y_train)
prediction = svc_model.predict(X_test)

print(svc_model.score(X_train, y_train))

print(svc_model.score(X_test, y_test))

Output 9:

1.0
0.9357142857142857

svc_model = SVC(kernel='sigmoid')

svc_model.fit(X_train, y_train)

prediction = svc_model.predict(X_test)

print(svc_model.score(X_train, y_train))

print(svc_model.score(X_test, y_test))

Output 10:

0.3434704830053667
0.32857142857142857

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy