-
Notifications
You must be signed in to change notification settings - Fork 0
/
Heart Disease Prediction.py
66 lines (57 loc) · 2.08 KB
/
Heart Disease Prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from google.colab import drive
mount = '/content/drive'
drive.mount(mount)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as opt
import statsmodels.api as sm
from sklearn import preprocessing
file_path = '/content/drive/MyDrive/dataset.csv'
disease_df = pd.read_csv(file_path)
disease_df.dropna(axis = 0, inplace = True)
print(disease_df.head(), disease_df.shape)
print(disease_df.TenYearCHD.value_counts())
X = np.asarray(disease_df[['age', 'male', 'cigsPerDay',
'totChol', 'sysBP', 'glucose']])
y = np.asarray(disease_df['TenYearCHD'])
scaler = preprocessing.StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size = 0.3, random_state = 4)
print ('Train set:', X_train.shape, y_train.shape)
print ('Test set:', X_test.shape, y_test.shape)
fig, ax = plt.subplots(figsize=(8, 5))
sns.countplot(x='TenYearCHD', data=disease_df, ax=ax)
ax.set_title('Count of TenYearCHD')
ax.set_xlabel('Data')
ax.set_ylabel('Count')
plt.show()
p = disease_df['TenYearCHD'].plot()
plt.show(p)
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
#Defining the pipeline
pipeline = Pipeline([
('scaler', StandardScaler()),
('lr', LogisticRegression())
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test, y_pred)
conf_matrix = pd.DataFrame(data = cm,
columns = ['Predicted:0', 'Predicted:1'],
index =['Actual:0', 'Actual:1'])
plt.figure(figsize = (8, 5))
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = "Greens")
plt.show()
print (classification_report(y_test, y_pred))
#Calculating accuracy of the model..
from sklearn.metrics import accuracy_score
print('Accuracy of the model is =',
accuracy_score(y_test, y_pred))