23BCE7092_ML_Lab_Assignment[1]
23BCE7092_ML_Lab_Assignment[1]
import numpy as np
df = pd.read_csv(filename)
print(df)
df = df.drop(columns=['Day'], errors='ignore')
# Display the first few rows of the dataset
print(df.head())
label_encoders = {}
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
label_encoders[column] = le
print(df)
model.fit(X_train, y_train)
plt.figure(figsize=(10, 6))
plot_tree(
model,
feature_names=X.columns,
class_names=label_encoders[df.columns[-1]].classes_ if df.columns[-1] in label_encoders else
None,
filled=True,
rounded=True,
fontsize=10
plt.show()
Output:
2.Linear Regression
Code:
# Import required libraries
import pandas as pd
import numpy as np
# Load dataset
print(data.head())
print(data.info())
data = data.dropna()
print(data[data['Production'] == '='])
label_encoders = {}
data[col] = le.fit_transform(data[col])
label_encoders[col] = le
y = data['Production']
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")
Output:
3.Logistic Regression
Code:
import numpy as np
import pandas as pd
# Read the dataset using pandas (replace 'study_hours.csv' with your actual file path)
data = pd.read_csv('study_hours.csv')
print(data)
# Assuming the target column is 'status' and all other columns are features
X = data.drop(columns=['status'])
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Evaluate the model
# Print results
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
Output:
4.Titanic Dataset:
Code:
import pandas as pd
data = pd.read_csv(file_path)
print("Dataset Preview:")
print(data.head())
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
label_encoders = {}
le = LabelEncoder()
data[col] = le.fit_transform(data[col])
label_encoders[col] = le
y = data['Survived']
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Display results
print("\nModel Evaluation:")
print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
Output:
5. Clustering:
Code:
import numpy as np
import pandas as pd
scaler = StandardScaler()
marks_scaled = scaler.fit_transform(marks)
k = 2 # Number of clusters
df['Cluster'] = kmeans.fit_predict(marks_scaled)
# Get centroids
centroids = kmeans.cluster_centers_
df.to_csv('student_marks_clustered.csv', index=False)
# Performance Metrics
print("\nCluster Information:")
plt.figure(figsize=(8, 6))
plt.xlabel('Subject 1 (Scaled)')
plt.ylabel('Subject 2 (Scaled)')
plt.legend()
plt.show()
Output: