Open navigation menu

Scribd

0% found this document useful (0 votes)

10 views

Slip

Uploaded by

Copyright

© © All Rights Reserved

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views

Slip

Uploaded by

Copyright

© © All Rights Reserved

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 5

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

import statsmodels.api as sm

from sklearn.metrics import r2_score, mean_squared_error

# Linear Regression

X = sm.add_constant(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

reg = sm.OLS(y_train, X_train).fit() #reg.summary()

y_pred = reg.predict(X_test)

params = reg.params

p_val = reg.pvalues[1]

print(f"Coefficients: b0: {params['const']}, b1: {params['Salary']}")

print(r2_score(y_test, y_pred))

print(mean_squared_error(y_test, y_pred))

residuals = reg.resid

std_residuals = reg.get_influence().resid_studentized_internal

influence = reg.get_influence()

cook_distance = influence.cooks_distance[0] #plt.stem()

leverage = influence.hat_matrix_diag

leverage_threshold = 3 * (len(X_train.columns) + 1) / len(X_train)

#cooks_threshold = 1

#cook_outliers = np.where(cook_distance > cooks_threshold)[0]

#MLR

from statsmodels.stats.outliers_influence import variance_inflation_factor

def get_vif_factors(input_df) :

vif = pd.DataFrame()

vif["Features"] = input_df.columns

vif["VIF"] = [variance_inflation_factor(input_df.values, i) for i in range(input_df.shape[1])]

return vif

#Residual plot between standardized model.fittedvalues and standardized model.resid

#get_standardized_values = lambda x : (x - np.mean(x))/(np.std(x))

#Logistic Regression

X = pd.get_dummies(X_features, drop_first=True)

X = sm.add_constant(X)

model_1 = sm.Logit(train_Y, train_X).fit()

significant_features = model_1.pvalues[model_1.pvalues < 0.05].index

#make model with significant features

from sklearn import metrics

cutoff = np.arange(0.1, 0.91, 0.01)

youdens_index = []

cutoff_index = []

for i in cutoff:

predicted_values = model_2.predict(test_X)

predicted_values = (predicted_values > i).astype(int)

confusion_matrix = metrics.confusion_matrix(test_Y, predicted_values)

sensitivity = confusion_matrix[1][1]/(confusion_matrix[1][1] + confusion_matrix[1][0])

specificity = confusion_matrix[0][0]/(confusion_matrix[0][0] + confusion_matrix[0][1])

youden_index = sensitivity + specificity - 1

youdens_index.append(youden_index)

cutoff_index.append(i)

print("Youden index : ", youdens_index)

max_youden = -40

optimal_cutoff = -1

for i in range(0, len(youdens_index)):

if(youdens_index[i] > max_youden) :

max_youden = youdens_index[i]

optimal_cutoff = cutoff_index[i]

#print( metrics.classification_report( test_Y, predicted_values ) )

'''

For cost based

cost = 5*confusion_matrix[0][1] + confusion_matrix[1][0]

and then find min instead of max like in youden

'''

#DecisionTree
from sklearn.tree import DecisionTreeClassifier

data = pd.get_dummies(data, columns=['famhist']) #no drop first

model1 = DecisionTreeClassifier(criterion='gini', max_depth=6, random_state=42) #gini

model3 = DecisionTreeClassifier(criterion='entropy', max_depth=6, random_state=42) #info gain

model1.fit(X_train, y_train)

from sklearn.tree import plot_tree

plot_tree(model1, feature_names=list(X_features.columns), class_names=["No CHD", "CHD"], filled=True, rounded=True)

#KNN

from sklearn.neighbors import KNeighborsClassifier

from sklearn.utils import resample, shuffle

upsampled_not_joined = resample(not_joined, replace = True, n_samples = 4000)

dfs = [joined, upsampled_not_joined]

new_df = pd.concat(dfs)

new_df = shuffle(new_df)

X = pd.get_dummies(X_features, drop_first=True)

Y = new_df.Status.map(lambda x: int(x == 'Joined'))

train_X, test_X, train_Y, test_Y = train_test_split(X,Y,train_size=0.8, random_state=42)

knn_clf = KNeighborsClassifier()

knn_clf.fit( train_X, train_Y )

from sklearn.model_selection import GridSearchCV

tuned_parameters = [{ 'n_neighbors': range(5,10),'metric': ['canberra', 'euclidean','minkowski']}]

clf = GridSearchCV( KNeighborsClassifier(),tuned_parameters,cv=10,scoring='roc_auc')

clf.fit(train_X, train_Y)

print("Best score for KNN is : ",clf.best_score_)

print("Best parameter for KNN is : ", clf.best_params_)

#ensemble

from sklearn.ensemble import RandomForestClassifier

radm_clf = RandomForestClassifier(max_depth=10, n_estimators=10)

radm_clf.fit( train_X, train_Y )

from sklearn.ensemble import AdaBoostClassifier

from sklearn.linear_model import LogisticRegression

logreg_clf = LogisticRegression()
ada_clf = AdaBoostClassifier(logreg_clf, n_estimators=50)

ada_clf.fit(train_X, train_Y)

#SVM

from sklearn.svm import SVC

svm_clf = SVC(kernel='linear', C=1.0, probability=True)

svm_clf_poly = SVC(kernel='poly', degree=3, probability=True)

svm_clf_rbf = SVC(kernel='rbf', gamma=0.1, probability=True)

svm_clf_sigmoid = SVC(kernel='sigmoid', gamma='scale', coef0=0.0, probability=True)

svm_clf.fit(train_X, train_Y)

#Clustering

# normalize data

from sklearn.cluster import KMeans

cluster_range = range(1, 10)

cluster_errors = []

#elbow method to find best no. of clusters

for num_clusters in cluster_range:

clusters = KMeans(num_clusters, n_init="auto")

clusters.fit(scaled_df)

cluster_errors.append(clusters.inertia_)

plt.plot(cluster_range, cluster_errors, marker = "o")

#got k from plot

k=3

clusters = KMeans(k, random_state = 42, n_init="auto")

clusters.fit(scaled_df)

df["clusterid"] = clusters.labels_

# clusters.cluster_centers_

# clusters.labels_

#ROC Curve

from sklearn import metrics

def draw_roc_curve( model, test_X, test_y ):

test_results_df = pd.DataFrame( { 'actual': test_y } )

test_results_df = test_results_df.reset_index()

predict_proba_df = pd.DataFrame( model.predict_proba( test_X.values ) )

test_results_df['chd_1'] = predict_proba_df.iloc[:,1:2]

fpr, tpr, thresholds = metrics.roc_curve( test_results_df.actual,test_results_df.chd_1,drop_intermediate = False )

auc_score = metrics.roc_auc_score( test_results_df.actual,

test_results_df.chd_1 )

plt.plot(fpr, tpr, label = 'ROC curve (area = %0.2f)' % auc_score)

plt.plot([0, 1], [0, 1], 'k--')

plt.xlim([0.0, 1.0])

plt.ylim([0.0, 1.05])

#PCA

from sklearn.preprocessing import StandardScaler

from numpy.linalg import eig

X_norm = StandardScaler().fit_transform(X)

X_norm = pd.DataFrame(X_norm, columns=feat)

X_mean_adj = X_norm - X_norm.mean()

cov_mat = X_mean_adj.cov()

val, vec = eig(cov_mat)

val = np.array(val)

vec = np.array(vec)

sorted_idx = np.argsort(-val)

val = val[sorted_idx]

vec = vec[:, sorted_idx]

pc1 = np.dot(vec[:, 0], X_mean_adj.T)

from sklearn.decomposition import PCA

pca = PCA(n_components=2)

principle_components = pca.fit_transform(X_norm)

principle_components

You might also like

The AI Wealth Creation Blueprint PDF
67% (3)
The AI Wealth Creation Blueprint PDF
50 pages
The Age of AI and Our Human Future (Henry Kissinger, Eric Schmidt Etc.) (Z-Library)
100% (8)
The Age of AI and Our Human Future (Henry Kissinger, Eric Schmidt Etc.) (Z-Library)
148 pages
How To Hack Atm
87% (15)
How To Hack Atm
1 page
Christopher Langan - CTMU, The Cognitive-Theoretic Model of The Universe, A New Kind of Reality Theory
88% (8)
Christopher Langan - CTMU, The Cognitive-Theoretic Model of The Universe, A New Kind of Reality Theory
56 pages
Data Structure and Algorithmic Thinking With Python Data Structure and Algorithmic Puzzles PDF
95% (20)
Data Structure and Algorithmic Thinking With Python Data Structure and Algorithmic Puzzles PDF
471 pages
Gayle Laakmann McDowell - Cracking The Coding Interview - 189 Programming Questions and Solutions (2015, CareerCup)
81% (48)
Gayle Laakmann McDowell - Cracking The Coding Interview - 189 Programming Questions and Solutions (2015, CareerCup)
708 pages
Gödel, Escher, Bach - An Eternal Golden Braid (20th Anniversary Edition) by Douglas R. Hofstadter (Charm-Quark) PDF
100% (10)
Gödel, Escher, Bach - An Eternal Golden Braid (20th Anniversary Edition) by Douglas R. Hofstadter (Charm-Quark) PDF
821 pages
Cracking The Coding Interview - 189 Programming Questions and Solutions (6th Edition) (EnglishOnlineClub - Com)
100% (10)
Cracking The Coding Interview - 189 Programming Questions and Solutions (6th Edition) (EnglishOnlineClub - Com)
708 pages
Regression Analysis - Cheatsheet
No ratings yet
Regression Analysis - Cheatsheet
9 pages
Chris Bailey - Hyperfocus - The New Science of Attention, Productivity, and Creativity-Viking (2018)
100% (25)
Chris Bailey - Hyperfocus - The New Science of Attention, Productivity, and Creativity-Viking (2018)
306 pages
Explainati On Interpretation of STATA Regression Output BY DR, Wahid Sherani
No ratings yet
Explainati On Interpretation of STATA Regression Output BY DR, Wahid Sherani
3 pages
The Art of Asking ChatGPT For High-Quality Answers A Complete Guide To Prompt Engineering Techniques (Ibrahim John) (Z-Library)
100% (24)
The Art of Asking ChatGPT For High-Quality Answers A Complete Guide To Prompt Engineering Techniques (Ibrahim John) (Z-Library)
52 pages
Banana Pancakes - Ukulele Chord Chart
100% (1)
Banana Pancakes - Ukulele Chord Chart
2 pages
The Fabric of Reality
100% (1)
The Fabric of Reality
6 pages
MidTerm MGT782 JULY 2023
No ratings yet
MidTerm MGT782 JULY 2023
6 pages
75 Productivity Hacks - System Sunday
100% (7)
75 Productivity Hacks - System Sunday
75 pages
Mercedes-Benz Greener Manufacturing Ai
0% (1)
Mercedes-Benz Greener Manufacturing Ai
16 pages
Military Remote Viewing Manual
100% (5)
Military Remote Viewing Manual
72 pages
Cs 229, Autumn 2016 Problem Set #2: Naive Bayes, SVMS, and Theory
No ratings yet
Cs 229, Autumn 2016 Problem Set #2: Naive Bayes, SVMS, and Theory
20 pages
Machine Learning For Humans
100% (4)
Machine Learning For Humans
97 pages
Health Insurance Book
No ratings yet
Health Insurance Book
3 pages
05 E RandomForest LoanData
No ratings yet
05 E RandomForest LoanData
8 pages
assig_5_mining
No ratings yet
assig_5_mining
5 pages
Machine Learning Model Building
No ratings yet
Machine Learning Model Building
6 pages
ML Codes
No ratings yet
ML Codes
9 pages
Slip Clustering
No ratings yet
Slip Clustering
2 pages
ML pdf
No ratings yet
ML pdf
30 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Classification Review
No ratings yet
Classification Review
8 pages
ML Lab
No ratings yet
ML Lab
7 pages
Vertopal.com Experiment01 Baseline Models Accuracy
No ratings yet
Vertopal.com Experiment01 Baseline Models Accuracy
35 pages
MLfull
No ratings yet
MLfull
29 pages
LAB1 pdf
No ratings yet
LAB1 pdf
4 pages
DWDM Lab 3
No ratings yet
DWDM Lab 3
10 pages
I Avaliação Parcial - 25.0 PTS - Gabarito
No ratings yet
I Avaliação Parcial - 25.0 PTS - Gabarito
9 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
5) Randomforest - Ipynb - Colaboratory
No ratings yet
5) Randomforest - Ipynb - Colaboratory
12 pages
23BCE7199 ML Lab Assignment[1]
No ratings yet
23BCE7199 ML Lab Assignment[1]
15 pages
22104057_Prakhar_Week 5
No ratings yet
22104057_Prakhar_Week 5
8 pages
Import Numpy As NP Import Pandas As PD
No ratings yet
Import Numpy As NP Import Pandas As PD
7 pages
ML
No ratings yet
ML
7 pages
AML_lab[1] (1)
No ratings yet
AML_lab[1] (1)
14 pages
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
100% (1)
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
73 pages
DA_012307
No ratings yet
DA_012307
8 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
ml_all_projectpdf_removed
No ratings yet
ml_all_projectpdf_removed
41 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
AI ML - Cycle 2 Programs (1)
No ratings yet
AI ML - Cycle 2 Programs (1)
15 pages
Udacity Machine Learning Analysis Supervised Learning
100% (1)
Udacity Machine Learning Analysis Supervised Learning
504 pages
DOC-20241108-WA0003
No ratings yet
DOC-20241108-WA0003
16 pages
20-SE-66 ML Assign 2
No ratings yet
20-SE-66 ML Assign 2
4 pages
1st PGM
No ratings yet
1st PGM
10 pages
23BCE7092_ML_Lab_Assignment[1]
No ratings yet
23BCE7092_ML_Lab_Assignment[1]
14 pages
Fall Semester 2020-21 AI With Python ECE-4031
No ratings yet
Fall Semester 2020-21 AI With Python ECE-4031
5 pages
Linear SVM: 'Target'
No ratings yet
Linear SVM: 'Target'
13 pages
vertopal.com_project
No ratings yet
vertopal.com_project
16 pages
Machine Learning Practical PDF
No ratings yet
Machine Learning Practical PDF
12 pages
AIML PRACTICALS
No ratings yet
AIML PRACTICALS
22 pages
ai int-1
No ratings yet
ai int-1
6 pages
ML assignment
No ratings yet
ML assignment
11 pages
Data analytics
No ratings yet
Data analytics
10 pages
Final ML File
No ratings yet
Final ML File
34 pages
ML 7
No ratings yet
ML 7
6 pages
Codes
No ratings yet
Codes
6 pages
ANN_EXPERIENTIAL_LEARNING
No ratings yet
ANN_EXPERIENTIAL_LEARNING
43 pages
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
AML_code_for_m2
No ratings yet
AML_code_for_m2
7 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Project Data Mining (AMAN YADAV)
No ratings yet
Project Data Mining (AMAN YADAV)
12 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
23 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Tous Les Algo de ML
No ratings yet
Tous Les Algo de ML
7 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
FB Models PDF
No ratings yet
FB Models PDF
14 pages
Amazing Java: Learn Java Quickly
From Everand
Amazing Java: Learn Java Quickly
Andrei Besedin
No ratings yet
Roadmap How To Learn AI in 2024 (Uncovered AI)
No ratings yet
Roadmap How To Learn AI in 2024 (Uncovered AI)
6 pages
My Ai Cheat List
100% (11)
My Ai Cheat List
3 pages
Teas Topics To Study
100% (12)
Teas Topics To Study
6 pages
The Secrets of A Slot Machine
No ratings yet
The Secrets of A Slot Machine
4 pages
From Music To Mathematic
100% (1)
From Music To Mathematic
4 pages
2045: The Year Man Becomes Immortal
No ratings yet
2045: The Year Man Becomes Immortal
9 pages
Tech Trend 2024 Report-2
No ratings yet
Tech Trend 2024 Report-2
11 pages
Rationality From AI To Zombies
86% (7)
Rationality From AI To Zombies
1,813 pages
Mind Control Patents
100% (1)
Mind Control Patents
41 pages
Wisc V Interpretation
100% (1)
Wisc V Interpretation
8 pages
Attention Is All You Need
67% (3)
Attention Is All You Need
11 pages
Python Programming and Maching Learning 2 in 1 B08Y5DPX32
100% (7)
Python Programming and Maching Learning 2 in 1 B08Y5DPX32
145 pages
Current and Future Trends on AI Applications - Mohammed A Al-Sharafi
No ratings yet
Current and Future Trends on AI Applications - Mohammed A Al-Sharafi
456 pages
Psych Unit 7a Practice Quiz
No ratings yet
Psych Unit 7a Practice Quiz
4 pages
Visualising Multicollinearity in Python
No ratings yet
Visualising Multicollinearity in Python
18 pages
Sample MCQs
No ratings yet
Sample MCQs
67 pages
Appendix A PV/FV Tables: Present Value of Ordinary Annuity
No ratings yet
Appendix A PV/FV Tables: Present Value of Ordinary Annuity
3 pages
Lecture 23
No ratings yet
Lecture 23
16 pages
LIFE TABLE
No ratings yet
LIFE TABLE
35 pages
Aptitude Software IFRS 17 Solution Factsheet June 2021
No ratings yet
Aptitude Software IFRS 17 Solution Factsheet June 2021
4 pages
Interpretasi Panel Eviews-1
No ratings yet
Interpretasi Panel Eviews-1
9 pages
Salinan BERN 2143 ENGINEERING STATISTIC - UTeM Engineering Students Academic Performance Survey
No ratings yet
Salinan BERN 2143 ENGINEERING STATISTIC - UTeM Engineering Students Academic Performance Survey
21 pages
Axis Reinsurance Oveview - Final
No ratings yet
Axis Reinsurance Oveview - Final
10 pages
Nonlinear Regression
No ratings yet
Nonlinear Regression
43 pages
Slides 1 Arnold Ventures 2024
No ratings yet
Slides 1 Arnold Ventures 2024
68 pages
2024 L2 QuantMethods
No ratings yet
2024 L2 QuantMethods
57 pages
Lab 1
No ratings yet
Lab 1
2 pages
Enterprise Risk Management
No ratings yet
Enterprise Risk Management
12 pages
Panel Data Methods
No ratings yet
Panel Data Methods
17 pages
Slide 1
No ratings yet
Slide 1
12 pages
Gratuity Valuation From - 1000
No ratings yet
Gratuity Valuation From - 1000
151 pages
St4-Pension and Other Benefits
No ratings yet
St4-Pension and Other Benefits
1,270 pages
Health Insurance in INDIA
No ratings yet
Health Insurance in INDIA
36 pages
The Only Graduate Career Guide To Actuaries
No ratings yet
The Only Graduate Career Guide To Actuaries
111 pages
Ias 19
No ratings yet
Ias 19
41 pages
Marine and Energy Pricing Paper Final 0
No ratings yet
Marine and Energy Pricing Paper Final 0
77 pages
General Insurance Track Reading Materials: Approach"
No ratings yet
General Insurance Track Reading Materials: Approach"
6 pages
Hawig Scores
No ratings yet
Hawig Scores
10 pages
Accounting For Employment Benefits PDF Free
No ratings yet
Accounting For Employment Benefits PDF Free
5 pages
Decision Science Assignment PDF
No ratings yet
Decision Science Assignment PDF
8 pages
2019 Quantitative - Finance Splet
No ratings yet
2019 Quantitative - Finance Splet
4 pages

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy