Classification of Dry Bean
Classification of Dry Bean
import pandas as pd
import matplotlib as plt
%matplotlib inline
import seaborn as sns
df=pd.read_csv('Dry_Bean.csv')
import warnings
warnings.filterwarnings('ignore')
In [2]: df.head()
In [3]: df.shape
(13611, 17)
Out[3]:
In [4]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13611 entries, 0 to 13610
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Area 13611 non-null int64
1 Perimeter 13611 non-null float64
2 MajorAxisLength 13611 non-null float64
3 MinorAxisLength 13611 non-null float64
4 AspectRation 13611 non-null float64
5 Eccentricity 13611 non-null float64
6 ConvexArea 13611 non-null int64
7 EquivDiameter 13611 non-null float64
8 Extent 13611 non-null float64
9 Solidity 13611 non-null float64
10 roundness 13611 non-null float64
11 Compactness 13611 non-null float64
12 ShapeFactor1 13611 non-null float64
13 ShapeFactor2 13611 non-null float64
14 ShapeFactor3 13611 non-null float64
15 ShapeFactor4 13611 non-null float64
16 Class 13611 non-null object
dtypes: float64(14), int64(2), object(1)
memory usage: 1.8+ MB
In [5]: df.isnull().sum()
Area 0
Out[5]:
Perimeter 0
MajorAxisLength 0
MinorAxisLength 0
AspectRation 0
Eccentricity 0
ConvexArea 0
EquivDiameter 0
Extent 0
Solidity 0
roundness 0
Compactness 0
ShapeFactor1 0
ShapeFactor2 0
ShapeFactor3 0
ShapeFactor4 0
Class 0
dtype: int64
In [6]: df.describe()
In [7]: print(df['Class'].value_counts())
DERMASON 3546
SIRA 2636
SEKER 2027
HOROZ 1928
CALI 1630
BARBUNYA 1322
BOMBAY 522
Name: Class, dtype: int64
In [8]: sns.histplot(x='Class',data=df)
<Axes: ylabel='Class'>
Out[9]:
In [10]: df['Class'].unique()
In [11]: df['Class'].value_counts()
DERMASON 3546
Out[11]:
SIRA 2636
SEKER 2027
HOROZ 1928
CALI 1630
BARBUNYA 1322
BOMBAY 522
Name: Class, dtype: int64
In [12]: sns.countplot(x='Class',data=df)
In [13]: #plt.figure(figsize=(20,15))
heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True, fmt=".2f", linewidth=
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
<Axes: >
Out[14]:
In [15]: df.hist(bins=30 , figsize=(15,15))
Out[20]: ▾ LogisticRegression
LogisticRegression(random_state=0)
In [21]: y_pred=regressor.predict(x_test)
In [22]: regressor.score(x_test,y_test)
0.7064354980899207
Out[22]:
In [23]: cm=confusion_matrix(y_test,y_pred)
print(cm)
[[169 0 91 0 53 1 5]
[ 0 110 1 0 0 0 0]
[ 88 0 311 0 15 2 3]
[ 0 0 0 811 12 28 54]
[ 9 0 16 20 290 5 151]
[ 0 0 0 104 10 279 77]
[ 0 0 0 56 111 87 434]]
In [24]: report=classification_report(y_test,y_pred)
print(report)
In [25]: Evaluation=pd.DataFrame(['LR'],columns=['Algorithm'])
Evaluation.loc[0,'Algorithm']='LR'
Evaluation.loc[0,'Precision']=metrics.precision_score(y_test, y_pred, average='micr
Evaluation.loc[0,'Recall']=metrics.recall_score(y_test, y_pred, average='micro')
Evaluation.loc[0,'F1 Score']=metrics.f1_score(y_test, y_pred, average='micro')
Evaluation.loc[0,'Accuracy']=metrics.accuracy_score(y_test,y_pred)
Evaluation
Out[26]: ▾ KNeighborsClassifier
KNeighborsClassifier()
In [27]: y_pred=classifier.predict(x_test)
In [28]: report=classification_report(y_test,y_pred)
print(report)
precision recall f1-score support
In [29]: classifier.score(x_test,y_test)
0.7272994416691155
Out[29]:
In [30]: cm=confusion_matrix(y_test,y_pred)
print(cm)
[[152 0 113 0 45 0 9]
[ 0 110 1 0 0 0 0]
[116 0 273 0 28 1 1]
[ 0 0 0 816 2 54 33]
[ 44 0 21 15 322 0 89]
[ 0 0 0 94 5 289 82]
[ 4 0 1 84 54 32 513]]
In [31]: Evaluation.loc[1,'Algorithm']='KNN'
Evaluation.loc[1,'Precision']=metrics.precision_score(y_test, y_pred, average='micr
Evaluation.loc[1,'Recall']=metrics.recall_score(y_test, y_pred, average='micro')
Evaluation.loc[1,'F1 Score']=metrics.f1_score(y_test, y_pred, average='micro')
Evaluation.loc[1,'Accuracy']=metrics.accuracy_score(y_test,y_pred)
Evaluation
Out[32]: ▾ DecisionTreeClassifier
DecisionTreeClassifier()
In [33]: y_pred=dtc_model.predict(x_test)
In [34]: report=classification_report(y_test,y_pred)
print(report)
precision recall f1-score support
In [35]: dtc_model.score(x_test,y_test)
0.8959741404642962
Out[35]:
In [36]: cm=confusion_matrix(y_test,y_pred)
print(cm)
[[286 0 13 0 3 7 10]
[ 1 110 0 0 0 0 0]
[ 19 0 387 0 10 1 2]
[ 0 0 0 805 8 21 71]
[ 4 0 12 3 453 0 19]
[ 3 0 0 13 0 434 20]
[ 6 0 3 76 17 12 574]]
Out[38]: ▾ GaussianNB
GaussianNB()
In [39]: report=classification_report(y_test,y_pred)
print(report)
precision recall f1-score support
In [40]: y_pred=classifier.predict(x_test)
In [41]: classifier.score(x_test,y_test)
0.7637378783426388
Out[41]:
In [42]: cm=confusion_matrix(y_test,y_pred)
print(cm)
Out[44]: ▾ SVC
SVC(kernel='linear', random_state=0)
In [45]: y_pred=classifier.predict(x_test)
In [46]: report=classification_report(y_test,y_pred)
print(report)
precision recall f1-score support
In [47]: classifier.score(x_test,y_test)
0.9138995004407875
Out[47]:
In [48]: cm=confusion_matrix(y_test,y_pred)
print(cm)
[[275 0 26 0 3 4 11]
[ 0 110 1 0 0 0 0]
[ 13 0 393 0 7 1 5]
[ 0 0 0 840 2 12 51]
[ 1 0 12 5 464 0 9]
[ 1 0 0 2 0 445 22]
[ 1 0 2 76 17 9 583]]
In [49]: Evaluation.loc[4,'Algorithm']='SVM_linear'
Evaluation.loc[4,'Precision']=metrics.precision_score(y_test, y_pred, average='micr
Evaluation.loc[4,'Recall']=metrics.recall_score(y_test, y_pred, average='micro')
Evaluation.loc[4,'F1 Score']=metrics.f1_score(y_test, y_pred, average='micro')
Evaluation.loc[4,'Accuracy']=metrics.accuracy_score(y_test,y_pred)
Evaluation
0.6394357919482809
Out[50]:
In [51]: y_pred=classifier.predict(x_test)
In [52]: report=classification_report(y_test,y_pred)
print(report)
precision recall f1-score support
In [53]: cm=confusion_matrix(y_test,y_pred)
print(cm)
[[ 23 0 209 0 77 0 10]
[ 1 110 0 0 0 0 0]
[ 23 0 357 0 37 0 2]
[ 0 0 0 787 0 90 28]
[ 16 0 23 9 280 19 144]
[ 0 0 0 174 9 123 164]
[ 0 0 0 39 58 95 496]]
In [55]: #plt.figure(figsize=(10,5))
sns.barplot(x='Algorithm',y='Precision',data=Evaluation)
In [57]: sns.barplot(x='Algorithm',y='Recall',data=Evaluation)
In [59]: Evaluation[Evaluation.Recall==Evaluation.Recall.min()]
Out[59]: Algorithm Precision Recall F1 Score Accuracy
In [60]: Evaluation[Evaluation.Recall==Evaluation.Recall.max()]
In [61]: Evaluation[Evaluation.Precision==Evaluation.Precision.min()]
In [62]: Evaluation[Evaluation.Precision==Evaluation.Precision.max()]
In [63]: Evaluation[Evaluation.Accuracy==Evaluation.Accuracy.min()]
In [64]: Evaluation[Evaluation.Accuracy==Evaluation.Accuracy.max()]
In [65]: Evaluation
In [ ]: