Slip
Slip
import numpy as np
import statsmodels.api as sm
# Linear Regression
X = sm.add_constant(X)
y_pred = reg.predict(X_test)
params = reg.params
p_val = reg.pvalues[1]
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
residuals = reg.resid
std_residuals = reg.get_influence().resid_studentized_internal
influence = reg.get_influence()
leverage = influence.hat_matrix_diag
#cooks_threshold = 1
#MLR
def get_vif_factors(input_df) :
vif = pd.DataFrame()
vif["Features"] = input_df.columns
return vif
X = pd.get_dummies(X_features, drop_first=True)
X = sm.add_constant(X)
youdens_index = []
cutoff_index = []
for i in cutoff:
predicted_values = model_2.predict(test_X)
youdens_index.append(youden_index)
cutoff_index.append(i)
max_youden = -40
optimal_cutoff = -1
max_youden = youdens_index[i]
optimal_cutoff = cutoff_index[i]
'''
'''
#DecisionTree
from sklearn.tree import DecisionTreeClassifier
model1.fit(X_train, y_train)
#KNN
new_df = pd.concat(dfs)
new_df = shuffle(new_df)
X = pd.get_dummies(X_features, drop_first=True)
knn_clf = KNeighborsClassifier()
clf.fit(train_X, train_Y)
#ensemble
logreg_clf = LogisticRegression()
ada_clf = AdaBoostClassifier(logreg_clf, n_estimators=50)
ada_clf.fit(train_X, train_Y)
#SVM
svm_clf.fit(train_X, train_Y)
#Clustering
# normalize data
cluster_errors = []
clusters.fit(scaled_df)
cluster_errors.append(clusters.inertia_)
k=3
clusters.fit(scaled_df)
df["clusterid"] = clusters.labels_
# clusters.cluster_centers_
# clusters.labels_
#ROC Curve
test_results_df = test_results_df.reset_index()
test_results_df['chd_1'] = predict_proba_df.iloc[:,1:2]
test_results_df.chd_1 )
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
#PCA
X_norm = StandardScaler().fit_transform(X)
cov_mat = X_mean_adj.cov()
val = np.array(val)
vec = np.array(vec)
sorted_idx = np.argsort(-val)
val = val[sorted_idx]
pca = PCA(n_components=2)
principle_components = pca.fit_transform(X_norm)
principle_components