Virat Kohli Analysis
Virat Kohli Analysis
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
24-01-
0 116 Out Australia 6 2 NaN Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 NaN M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
22-02-
3 107 Out Australia 5 2 NaN M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 NaN Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
08-03-
67 123 Out Australia 3 2 129.47 JSCA International Stadium Ranchi Home Lost ODI No Yes NaN
2019
11-08-
68 120 Out West Indies 3 1 96.00 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
14-08-
69 114 Not Out West Indies 3 2 115.15 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
71 rows × 15 columns
In [7]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Score 71 non-null int64
1 Out/Not Out 71 non-null object
2 Against 71 non-null object
3 Batting Order 71 non-null int64
4 Inn. 71 non-null int64
5 Strike Rate 44 non-null float64
6 Venue 71 non-null object
7 Column1 71 non-null object
8 HA 71 non-null category
9 Date 71 non-null object
10 Result 71 non-null object
11 Format 71 non-null object
12 Man of the Match 71 non-null object
13 Captain 71 non-null object
14 Unnamed: 14 0 non-null float64
dtypes: category(1), float64(2), int64(3), object(9)
memory usage: 8.1+ KB
In [9]: df.shape
(71, 15)
Out[9]:
In [10]: df.columns
In [12]: df.describe()
In [17]: # replace null values in strike rate column with mean of the data
mean = df['Strike Rate'].mean()
mean
114.01954545454545
Out[17]:
24-01-
0 116 Out Australia 6 2 114.019545 Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 114.019545 M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
22-02-
3 107 Out Australia 5 2 114.019545 M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 114.019545 Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
08-03-
67 123 Out Australia 3 2 129.470000 JSCA International Stadium Ranchi Home Lost ODI No Yes NaN
2019
11-08-
68 120 Out West Indies 3 1 96.000000 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
14-08-
69 114 Not Out West Indies 3 2 115.150000 Queen's Park Oval Port of Spain Away Won ODI Yes Yes NaN
2019
71 rows × 15 columns
In [20]: sns.histplot(df['Score'])
In [22]: sns.countplot(x=df['Format'])
plt.show()
In [23]: plt.figure(figsize=(12,6))
sns.countplot(x=df['Against'])
plt.show()
In [24]: df.head(10)
24-01-
0 116 Out Australia 6 2 114.019545 Adelaide Oval Adelaide Away Lost Test No No NaN
2012
New 31-08-
1 103 Out 5 2 114.019545 M. Chinnaswamy Stadium Bangalore Home Won Test Yes No NaN
Zealand 2012
22-02-
3 107 Out Australia 5 2 114.019545 M. A. Chidambaram Stadium Chennai Home Won Test No No NaN
2013
18-12-
4 119 Out South Africa 4 1 114.019545 Wanderers Stadium Johannesburg Away Drawn Test No No NaN
2013
New 14-02-
5 105 Not Out 4 4 114.019545 Basin Reserve Wellington Away Drawn Test No No NaN
Zealand 2014
09-12-
6 115 Out Australia 4 2 114.019545 Adelaide Oval Adelaide Away Lost Test No Yes NaN
2014
09-12-
7 141 Out Australia 4 4 114.019545 Adelaide Oval Adelaide Away Lost Test No Yes NaN
2014
26-12-
8 169 Out Australia 4 2 114.019545 Melbourne Cricket Ground Melbourne Away Drawn Test No No NaN
2014
06-01-
9 147 Out Australia 4 2 114.019545 Sydney Cricket Ground Sydney Away Drawn Test No Yes NaN
2015
In [25]: ## Kohli scores more centuries in Adelaide oval stadium which is in Australia
plt.figure(figsize=(12,6))
sns.countplot(x=df['Venue'])
plt.xticks(rotation="vertical")
plt.show()
In [28]: new=df.groupby('HA').size().reset_index().rename(columns={0:"Centuries_count"})
new
Out[28]: HA Centuries_count
0 Away 39
1 Home 32
In [30]: ## Kohli has good record on overseas because he score more centuries in overseas rather than home
plt.pie('Centuries_count',labels='HA',data=new,startangle=90,autopct="%1.0f%%")
plt.show()
In [31]: ## Kohli scores more number of centuries in tests when he became captain
sns.countplot(x=df['Format'],hue=df['Captain'])
plt.show()
In [32]: win=df.groupby(df['Result']).size().reset_index().rename(columns={0:"Counts"})
win
0 Drawn 7
1 Lost 13
2 Lost (D/L) 1
3 Tied 1
4 Won 48
5 Won (D/L) 1
In [37]: plt.pie('Counts',labels='Result',data=win,autopct="%1.0f%%")
plt.show()
In [ ]: