0% found this document useful (0 votes)
6 views

3 Classification

Uploaded by

metapi5906
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

3 Classification

Uploaded by

metapi5906
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 16

CHAPTER 3: CLASSIFICATION

1. ID3 ALGORITHM:[pg.no:45-47]

# Load libraries

import pandas as pd

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

from sklearn import tree

import matplotlib.pyplot as plt

# Load dataset

df = pd.read_csv("diabetes.csv",header=None) # Assuming there are no column names in the CSV file

# Split dataset into features and target variable

X = df.values[1:,0:8] # Features

y = df.values[1:,8] # Target variable

# Split dataset into training set and test set (70% training and 30% test)

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.3, random_state=1)

# Create Decision Tree classifier object

clf_entropy = DecisionTreeClassifier(class_weight=None, criterion='entropy',

max_depth=3, max_features=None,

max_leaf_nodes=None,

min_samples_leaf=5,

min_samples_split=2,

min_weight_fraction_leaf=0.0,presort=False,

random_state=100, splitter='best')

clf_entropy= clf_entropy.fit(X_train,y_train)

y_pred_en = clf_entropy.predict(X_test)

print(y_pred_en)

print("Accuracy is:", accuracy_score(y_test,y_pred_en)* 100)

tree.plot_tree(clf_entropy)
plt.show()

2. NAIVE BAYES CLASSIFICATION:[pg.no:51-53]

PROGRAM:

from sklearn import preprocessing

from sklearn.naive_bayes import GaussianNB

age = ['youth', 'youth', 'middle-aged', 'senior', 'senior',

'senior', 'middle-aged', 'youth', 'youth', 'senior', 'youth',

'middle-aged', 'middle-aged', 'senior']

income = ['high', 'high', 'high', 'medium', 'low', 'low',

'low', 'medium', 'low', 'medium', 'medium', 'medium',

'high', 'medium']

student = ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes',

'no', 'yes', 'yes', 'yes','no','yes','no']

credit_rating = ['fair', 'excellent', 'fair', 'fair','fair',

'excellent', 'excellent', 'fair', 'fair', 'fair',

'excellent', 'excellent', 'fair', 'excellent']

buys_computer = ['no', 'no', 'yes', 'yes', 'yes', 'no',

'yes', 'no', 'yes', 'yes','yes','yes','yes','no']

# Create Label Encoder object

le = preprocessing.LabelEncoder()

# Converting string labels into numbers

age_encoded = le.fit_transform(age)

print(age_encoded)

income_encoded = le.fit_transform(income)

print(income_encoded)

student_encoded = le.fit_transform(student)

print(student_encoded)

credit_encoded = le.fit_transform(credit_rating)

print(credit_encoded)

# Converting string labels into numbers

label = le.fit_transform(buys_computer)
print(label)

# Combining age, income, student, and credit rating into a single list of tuples

features = list(zip(age_encoded, income_encoded,

student_encoded, credit_encoded))

# Create a Gaussian Naive Bayes model

model = GaussianNB()

# Train the model using the training sets

model.fit(features, label)

# Predict output

predicted = model.predict([[2, 2, 1, 1]]) # 2: youth, 2: medium, 1: yes, 1: fair

print("Predicted Value:", predicted)

Output:

3. MULTINOMINAL NAIVE BAYES CLASSIFICATION:[pg.no:54-55]

PROGRAM:

# Import scikit-learn dataset library

from sklearn import datasets

# Load dataset

wine = datasets.load_wine()

# Print the names of the 13 features

print("Features:", wine.feature_names)

# Print the label type of wine (class 0, class 1, class 2)

print("Labels:", wine.target_names)

# Print data (feature) shape

print(wine.data.shape)

# Print the wine data features (top 5)

print(wine.data[:5])
print(wine.target)

# Import train test split function from sklearn.model_selection

from sklearn.model_selection import train_test_split

# Split dataset into training set and test set (70% training and 30% test)

X_train, X_test, y_train, y_test =train_test_split(wine.data, wine.target, test_size=0.3,

random_state=109)

# Import Gaussian Naive Bayes model

from sklearn.naive_bayes import GaussianNB

# Create a Gaussian Classifier

gnb = GaussianNB()

# Train the model using the training sets

gnb.fit(X_train, y_train)

# Predict the response for the test dataset

y_pred = gnb.predict(X_test)

print("Predicted Labels:",y_pred)

# Import scikit-learn metrics module for accuracy calculation

from sklearn import metrics

# Model Accuracy to find out how accurate is the classifier

print("Accuracy:", metrics.accuracy_score(y_test, y_pred) *100)

Output:
4. LINEAR KERNEL:[pg.no:59-61]

PROGRAM:

# Load libraries

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score,classification_report, confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sn

from sklearn.svm import SVC

# Assign column names to the dataset

column_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

# Load dataset

df = pd.read_csv("iris.csv", names=column_names)

# Split dataset into features and target

X = df.drop('Class', axis=1) # Features

y = df['Class'] # Target variable

# Split dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y,


test_size=0.3, random_state=1)

# Import support vector classifier from sklearn.svm

clf = SVC(kernel='linear')

# Fit the classifier to the training data

clf.fit(X_train, y_train)

# Predict the classes on test set

y_pred = clf.predict(X_test)

print(y_pred)

# Calculate accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy * 100)

# Print classification report and confusion matrix

print(classification_report(y_test, y_pred))

# Generate and display confusion matrix heatmap

cm = pd.crosstab(y_test,y_pred,rownames=['Actual'],colnames=['Predicted'])

ax = sn.heatmap(cm, annot=True)

plt.show()

Output:
5. POLYNOMINAL KERNEL:[pg.no:61-63]

PROGRAM:

# Import libraries

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score,classification_report, confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sn

# Assign column names to the dataset

colnames = ['sepal-length', 'sepal-width', 'petal-length',

'petal-width', 'Class']

df = pd.read_csv("iris.csv", names=colnames)

# Split dataset into features and target variable


X = df.drop('Class', axis=1) # Features

y = df['Class'] # Target variable

# Split dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.3, random_state=1)

# Import support vector classifier from sklearn

from sklearn.svm import SVC

clf = SVC(kernel='poly', degree=8) # Polynomial kernel with degree 8

# Fit the classifier to the training data

clf.fit(X_train, y_train)

# Make predictions on the test data

y_pred = clf.predict(X_test)

# Print the predicted labels

print(y_pred)

# Calculate and print the accuracy score

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy*100)

# Print the classification report

print(classification_report(y_test, y_pred))

# Generate and display the confusion matrix as a heatmap

confusion_matrix = pd.crosstab(y_test, y_pred,

rownames=['Actual'],colnames=['Predicted'])

ax = sn.heatmap(confusion_matrix, annot=True)

plt.show()

Output:
6. RADIAL BASIS FUNCTION KERNEL:[pg.no:63-65]

PROGRAM:

# Load libraries

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score,classification_report, confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sn

from sklearn.svm import SVC

# Assign column names to the dataset

col_names = ['sepal-length', 'sepal-width', 'petal-length',

'petal-width', 'Class']

# Load the dataset

dataset = pd.read_csv("iris.csv", names=col_names)

# Separate features and target variable

X = dataset.drop('Class', axis=1)

y = dataset['Class']

# Split dataset into training set and test set (70% training and 30% test)

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.3, random_state=1)

# Import support vector classifier

clf = SVC(kernel='rbf', gamma=0.1)

# Fit the classifier to the training data

clf.fit(X_train, y_train)

# Predict the labels for the test set

y_pred = clf.predict(X_test)

# Print the predicted labels and accuracy

print("Predicted labels:", y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred))

# Print the classification report

print(classification_report(y_test, y_pred))
# Generate and display confusion matrix as a heatmap

confusion_matrix = pd.crosstab(y_test, y_pred,

rownames=['Actual'], colnames=['Predicted'])

sn.heatmap(confusion_matrix, annot=True)

plt.show()
7. K-NEAREST NEIGHBOURS:[pg.no:66-68]

PROGRAM:

# Load libraries

import pandas as pd

import numpy as np

# Import module for splitting test data and training data

from sklearn.model_selection import train_test_split

# Import scikit-learn metrics module for accuracy calculation

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# Load dataset

df = pd.read_csv("diabetes.csv", header=None)

# Split dataset into features and target variable

X = df.values[1:,0:8] # Features

y = df.values[1:, 8] # Target variable


# Split dataset into training set and test set (70% training and 30% test)

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=1)

# Import KNN classifier from sklearn.neighbors

from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=5)

# Fitting the classifier to the training data

clf = clf.fit(X_train, y_train)

# Predicting the test set results

y_pred = clf.predict(X_test)

print(y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred) * 100)

# Import module for classification report and confusion matrix from sklearn.metrics

from sklearn.metrics import classification_report,confusion_matrix

print("Confusion Matrix:")

print(confusion_matrix(y_test, y_pred))

print("Classification Report:")

print(classification_report(y_test, y_pred))

error = []

# Calculating error for K values between 1 and 40

for i in range(1, 40):

knn = KNeighborsClassifier(n_neighbors=i)

knn.fit(X_train, y_train)

pred_i = knn.predict(X_test)

error.append(np.mean(pred_i != y_test))

plt.figure(figsize=(12, 6))

plt.plot(range(1, 40), error, color='red',linestyle='dashed', marker='o',


markerfacecolor='blue',markersize=10)

plt.title('Error Rate K Value')

plt.xlabel('K Value')

plt.ylabel('Mean Error')

plt.show()
8. RANDOM FOREST :[pg.no:70-71]

PROGRAM:

# Load libraries

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score,classification_report, confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sn

# Load dataset

df = pd.read_csv("iris.csv")

# Assign column names to the dataset

colnames = ['sepal-length', 'sepal-width', 'petal-length',

'petal-width', 'Class']

df.columns = colnames

# Split dataset into features and target

X = df.drop('Class', axis=1) # Features

y = df['Class'] # Target variable

# Split dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.3, random_state=1)

# Import random forest classifier and fit the model

clf = RandomForestClassifier(n_estimators=100)

clf.fit(X_train, y_train)

# Make predictions on the test set

y_pred = clf.predict(X_test)

# Print predictions and accuracy score

print("Predictions:", y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred) * 100)

# Print classification report and confusion matrix

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")

cm = confusion_matrix(y_test, y_pred)

print(cm)

# Generate heatmap and display it

plt.figure(figsize=(8, 6))

sn.heatmap(cm, annot=True, fmt='d')

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy