ML assignment
ML assignment
import pandas as pd
from sklearn.preprocessing import StandardScaler
print(fl.info())
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
# Elbow Method
Ac = []
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++',n_init ='auto', random_state=10)
kmeans.fit(sceled_data)
Ac.append(kmeans.inertia_)
# Plotting the results of the Elbow Method
plt.plot(range(1, 11), Ac)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Ac')
plt.show()
# Silhouette Score
for n_clusters in range(2, 11):
clusterer = KMeans(n_clusters=n_clusters,n_init='auto', random_state=10)
cluster_labels = clusterer.fit_predict(sceled_data)
silhouette_avg = silhouette_score(sceled_data, cluster_labels)
print(f"For n_clusters = {n_clusters}, the average silhouette_score is : {silhouette_avg}")
optimal_clusters = 4
Explanation: #importing pandas library to read the file, then we are normalizing the data using
the standard scalar function
#from importing K-Means from the ski library and fitting our dataset K-means s calculated
#Elbow method and silhouette score is calculated both of them show the optimum value at 4
# calculating Elbow method and silhouette score is not good for hierarchical clustering so we
taking for k-means
#then the from ski learn hierarchical clustering is imported and graph is plotted.
Task 2:
import numpy as np
# For simplicity, inaccessible states are set to None and terminal states with
their rewards
grid_utilities = np.array([
])
reward = -0.1
success_prob = 0.8
x, y = state
# Directions
directions = {
'RIGHT': (0, 1)
dx, dy = directions[action]
if 0 <= new_x < nrows and 0 <= new_y < ncols and grid[new_x, new_y] is
not None:
else:
primary_utility = grid[x, y]
# Calculate the utility of perpendicular mooves
perp_utility = 0
dx, dy = directions[perp_action]
if 0 <= perp_x < nrows and 0 <= perp_y < ncols and grid[perp_x,
perp_y] is not None:
else:
return total_utility
green_states = [(1 ,0), (3, 2), (4, 1)] # Placeholder positions for green states
optimal_actions = {}
optimal_actions[state] = optimal_action
optimal_actions
It gave me the answer of {(1, 0): 'UP', (3, 2): 'DOWN', (4, 1): 'LEFT'}
Task 3:
import pandas as pd #its used to load the dataset in this program
from sklearn.decomposition import FactorAnalysis #Factor analysis is imported from inbuilt
sklearn library
# Creating the Factor Analysis model with 2 components that is reducing more factors in to two
fact_ana = FactorAnalysis(n_components=2, random_state=0)
fact_ana.fit(X)
# Formatting the components for better understding that is we are fixing the colums and index
components_d = pd.DataFrame(components, columns=X.columns, index=['Size', 'Quality'])
components_d
sqft_basemen
grade sqft_above sqft_living15
condition t
#first components have positive number for sqft_above and square ft variance which captures the
size of the house and component 2 gives the quality of the house rom the condition and the grade
#Factor analysis helps to find the latent variables in the data and serve as a dimensionality
reduction technique
Task 4
import pandas as pd
# Display the first few rows of the dataset to understand its structure
data.head()
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
# Data Preprocessing
# Encode categorical variables
le = LabelEncoder()
categorical_cols = ['customer', 'age', 'gender', 'zipcodeOri', 'merchant', 'zipMerchant', 'category']
for col in categorical_cols:
data[col] = le.fit_transform(data[col])
cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(confusion_matrix = cm,display_labels=model.classes_).plot()
plt.show() #
accuracy, report
accuracy: 0.996207821473316
classification report:
precision recall f1-score support