ML Shristi File
ML Shristi File
&Management, ROHTAK
CODE:
#numpy
import numpy as np
v = np.array([9, 10])
w = np.array([11, 12])
print(np.dot(x, y))
Output 1:
219
[29 67]
[[19 22]
[43 50]]
#SciPy
print(img.dtype, img.shape)
Output 2:
Original image:
Tinted image:
Resized tinted image:
#Scikit-learn
dataset = datasets.load_iris()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
Output 3:
DecisionTreeClassifier(class_weight=None,
criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False,
random_state=None,
splitter='best')
precision recall f1-score support
[[50 0 0]
[ 0 50 0]
[ 0 0 50]]
#pandas
# into a table
# importing pandas as pd
import pandas as pd
data_table = pd.DataFrame(data)
print(data_table)
Output 4:
#Matplotlib
import numpy as np
plt.legend()
plt.show(
Output 5:
PROGRAM - 2
CODE:
import csv
with
open('tennis.cs
v', 'r') as f:
reader =
csv.reader(f)
your_list =
list(reader)
for i
in
yo
ur_
list
:
pri
nt(i
)
if i[-1]
==
"Tru
e": j
=0
for x in i:
if x != "True":
if x != h[0][j] and
h[0][j] == '0': h[0][j]
=x
elif x != h[0][j] and
h[0][j] != '0': h[0][j]
= '?'
else:
p
a
s
s
1
print("specific
hypothesis is") print(h)
Output:
'Sunny', 'Warm', 'Normal', 'Strong', 'Warm',
'Same',True
'Sunny', 'Warm', 'High', 'Strong', 'Warm',
'Same',True
'Rainy', 'Cold', 'High', 'Strong', 'Warm',
'Change',False
'Sunny', 'Warm', 'High', 'Strong',
'Cool','Change',True
CODE:
# load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
Output:
CODE:
# importing required tools
import numpy as np
X = np.random.randint(10,35,(25,2))
Y = np.random.randint(55,70,(25,2))
Z = np.vstack((X,Y))
Z = Z.reshape((50,2))
# convert to np.float32
Z = np.float32(Z)
plt.xlabel('Test Data')
plt.ylabel('Z samples')
plt.hist(Z,256,[0,256])
plt.show()
A = Z[label.ravel()==0]
B = Z[label.ravel()==1]
plt.scatter(A[:,0],A[:,1])
plt.scatter(B[:,0],B[:,1],c = 'r')
plt.show()
PROGRAM - 5
CODE:
import numpy as np
import pandas as pd
dataset = pd.read_csv('wine.csv')
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
pca = PCA(n_components = 2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
explained_variance = pca.explained_variance_ratio_
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.show()
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.legend()
plt.show()
PROGRAM - 6
CODE:
# Import necessary modules
import numpy as np
irisData = load_iris()
X = irisData.data
y = irisData.target
neighbors = np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Generate plot
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()
PROGRAM - 7
CODE:
import numpy as np
# number of observations/points
n = np.size(x)
m_x = np.mean(x)
m_y = np.mean(y)
# putting labels
plt.xlabel('x')
plt.ylabel('y')
plt.show()
def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
plot_regression_line(x, y, b)
if __name__ == "__main__":
main()
Output:
Estimated coefficients:
b_0 = -0.0586206896552
b_1 = 1.45747126437
PROGRAM - 8
CODE:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset = pd.read_csv("User_Data.csv")
y = dataset.iloc[:, 4].values
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)
Output 1:
[[ 0.58164944 -0.88670699]
[-0.60673761 1.46173768]
[-0.01254409 -0.5677824 ]
[-0.60673761 1.89663484]
[ 1.37390747 -1.40858358]
[ 1.47293972 0.99784738]
[ 0.08648817 -0.79972756]
[-0.01254409 -0.24885782]
[-0.21060859 -0.5677824 ]
[-0.21060859 -0.19087153]]
classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, ytrain)
# prediction
y_pred = classifier.predict(xtest)
cm = confusion_matrix(ytest, y_pred)
# Accuracy
Confusion Matrix :
[[65 3]
[ 8 24]]
Out of 100 :
True Positive + True Negative = 65 + 24
False Positive + False Negative = 3 + 8
Performance measure – Accuracy
Accuracy: 0.89
np.array([X1.ravel(), X2.ravel()]).T).reshape(
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
PROGRAM - 9
CODE:
# Importing library
import math
import random
import csv
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
train = []
test = list(mydata)
# to length of testset
index = random.randrange(len(test))
train.append(test.pop(index))
# Group the data rows under each class yes or no in dictionary eg: dict[yes]
and dict[no]
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict
# Calculating Mean
def mean(numbers):
return sum(numbers) / float(len(numbers))
def std_dev(numbers):
avg = mean(numbers)
return math.sqrt(variance)
def MeanAndStdDev(mydata):
# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
del info[-1]
return info
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
info[classValue] = MeanAndStdDev(instances)
return info
# Calculate Gaussian Probability Density Function
probabilities = {}
probabilities[classValue] = 1
for i in range(len(classSummaries)):
x = test[i]
return probabilities
bestProb = probability
bestLabel = classValue
return bestLabel
predictions = []
for i in range(len(test)):
predictions.append(result)
return predictions
# Accuracy score
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
# driver code
mydata = encode_class(mydata)
for i in range(len(mydata)):
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
# prepare model
info = MeanAndStdDevForClass(train_data)
# test model
Output:
CODE:
# Importing the required packages
import numpy as np
import pandas as pd
def importdata():
balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-
'+'databases/balance-scale/balance-scale.data',sep= ',', header = None)
return balance_data
def splitdataset(balance_data):
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
# Driver code
def main():
# Building Phase
data = importdata()
# Operational Phase
cal_accuracy(y_test, y_pred_gini)
cal_accuracy(y_test, y_pred_entropy)
if __name__=="__main__":
main()
Output:
Data Information:
Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'R' 'L' 'L' 'L'
'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
'R' 'R' 'L' 'R'
'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R'
'L' 'L' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R'
'L' 'R' 'L' 'R'
'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R'
'R' 'R' 'R' 'R'
'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L'
'L' 'L' 'R' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 67 18]
[ 0 19 71]]
Accuracy : 73.4042553191
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.73 0.79 0.76 85
R 0.74 0.79 0.76 90
avg/total 0.68 0.73 0.71 188
Predicted values:
['R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L'
'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L'
'R' 'L' 'L' 'L'
'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
'L' 'R' 'L' 'L'
'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R'
'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R'
'R' 'R' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R'
'L' 'R' 'L' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R'
'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L'
'L' 'L' 'L' 'R'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 63 22]
[ 0 20 70]]
Accuracy : 70.7446808511
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.71 0.74 0.72 85
R 0.71 0.78 0.74 90
avg / total 0.66 0.71 0.68 188
PROGRAM - 11
CODE:
# import libraries
import pandas as pd
import numpy as np
%matplotlib inline
data = pd.read_csv('bc2.csv')
dataset = pd.DataFrame(data)
Dataset.columns
Output 1:
dataset.info()
Output 2:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
ID 699 non-null int64
ClumpThickness 699 non-null int64
Cell Size 699 non-null int64
Cell Shape 699 non-null int64
Marginal Adhesion 699 non-null int64
Single Epithelial Cell Size 699 non-null int64
Bare Nuclei 699 non-null object
Normal Nucleoli 699 non-null int64
Bland Chromatin 699 non-null int64
Mitoses 699 non-null int64
Class 699 non-null int64
dtypes: int64(10), object(1)
memory usage: 60.1+ KB
dataset.describe().transpose()
Output 3:
mean std min 25% 50% 75% max
count
# converting the hp column from object 'Bare Nuclei'/ string type to float
dataset.isnull().sum()
Output 4:
ID 0
ClumpThickness 0
Cell Size 0
Cell Shape 0
Marginal Adhesion 0
Single Epithelial Cell Size 0
Bare Nuclei 0
Normal Nucleoli 0
Bland Chromatin 0
Mitoses 0
Class 0
dtype: int64
target = dataset["Class"]
svc_model.fit(X_train, y_train)
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))
Output 5:
0.9749552772808586
0.9642857142857143
print("Confusion Matrix:\n",confusion_matrix(prediction,y_test))
Output 6:
Confusion Matrix:
[[95 2]
[ 3 40]]
svc_model.fit(X_train, y_train)
Output 7:
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))
Output 8:
0.998211091234347
0.9571428571428572
svc_model = SVC(kernel='poly')
svc_model.fit(X_train, y_train)
prediction = svc_model.predict(X_test)
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))
Output 9:
1.0
0.9357142857142857
svc_model = SVC(kernel='sigmoid')
svc_model.fit(X_train, y_train)
prediction = svc_model.predict(X_test)
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))
Output 10:
0.3434704830053667
0.32857142857142857