-
Notifications
You must be signed in to change notification settings - Fork 0
/
Code
121 lines (96 loc) · 4.08 KB
/
Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#--> Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as mlt
import seaborn as sns
#--> read Matches file
matches=pd.read_csv('C:/Users/vrajm/Desktop/IPL/matches.csv')
#--> print only the first 5 rows
matches.head(5)
#--> read Delivery file
delivery=pd.read_csv('C:/Users/vrajm/Desktop/IPL/deliveries.csv')
#--> print only the first 5 rows
delivery.head(5)
#--> finding the number of matches played in total
print ("Total number of matches played:", len(matches))
#--> Location of Matches, name of teams that played and the name of unique umpires
print(' \n Location for all matches: \n',matches['city'].unique(), ' \n \n Teams :',matches['team1'].unique(), '\n \nTotal umpires ',matches['umpire1'].unique())
#--> Cleaning:remove the column with no data or consists of NaN
del matches['umpire3']
#--> Matches file after removing umpire3 column
matches.head(4)
#-->
a= matches['player_of_match'].value_counts()
b= a.idxmax()
print(' Who has the highest man of the match awards?\n', b)
c= matches['winner'].value_counts()
d=c.idxmax()
print(' Which team has won the most?\n', d)
#-->
x=matches.iloc[[matches['win_by_runs'].idxmax()]]
print('It was in', x['season'].item(),',','when the biggest score difference was', x['win_by_runs'].item(), ',', 'between', x['team1'].item(), '&', x['team2'].item(),'.' ,
' ','The match was won by:', x['winner'].item())
#--> The graph will show the decision of fielding or batting if a team wins the toss.
sns.countplot(y='season',hue='toss_decision',data=matches)
mlt.show()
#--> The graph shows the team that won the most tosses
u=matches['toss_winner'].value_counts().plot.bar()
for l in u.patches:
u.annotate(format(l.get_height()), (l.get_x()+0.15, l.get_height()+1))
mlt.show()
#--> The graph shows the team that won the most number of matches
u=pd.concat([matches['team1'],matches['team2']])
matches_played_byteams=u.value_counts().reset_index()
matches_played_byteams.columns=['Team','Total Matches']
matches_played_byteams['wins']=matches['winner'].value_counts().reset_index()['winner']
matches_played_byteams.set_index('Team',inplace=2)
matches_played_byteams.plot.bar()
mlt.show()
#--> Probability that the toss winner will win the match
z=matches[matches['toss_winner']==matches['winner']]
slices=[len(z),(len(matches)-len(z))]
labels=['Yes','No']
mlt.pie(slices,labels=labels,autopct='%1.2f%%')
fig = mlt.gcf()
mlt.show()
#--> Most popular grounds to play
ls = matches['venue'].value_counts().plot.bar(width=.8)
ls.set_xlabel('Grounds')
ls.set_ylabel('count')
mlt.show()
#--> The Graph shows the top 5 most Man of the match winners
pp = matches['player_of_match'].value_counts().head(5).plot.bar( color='R')
pp.set_xlabel('MOM')
pp.set_ylabel('Total')
for p in pp.patches:
pp.annotate(format(p.get_height()), (p.get_x()+0.15, p.get_height()+0.25))
mlt.show()
#--> Team that won the most
sns.countplot(x='winner', data=matches)
mlt.xticks(rotation='vertical')
mlt.show()
#--> Highest run scoring batsman (Top 5)
max_runs=delivery.groupby(['batsman'])['batsman_runs'].sum()
ax=max_runs.sort_values(ascending=False)[:5].plot.bar()
for p in ax.patches:
ax.annotate(format(p.get_height()), (p.get_x()+0.1, p.get_height()+1))
mlt.show()
#--> Player that has scored the most number of sixes
ax=delivery[delivery['batsman_runs']==6].batsman.value_counts()[:10].plot.bar()
for p in ax.patches:
ax.annotate(format(p.get_height()), (p.get_x(), p.get_height()+10))
mlt.show()
#--> Orange cap winner each year with their total runs
orange=matches[['id','season']]
orange=orange.merge(delivery,left_on='id',right_on='match_id',how='left')
orange=orange.groupby(['season','batsman'])['batsman_runs'].sum().reset_index()
orange=orange.sort_values('batsman_runs',ascending=0)
orange=orange.drop_duplicates(subset=["season"])
orange.sort_values(by='season')
#--> Top 5 bowlers with most number of wickets
dismissal_kinds = ["bowled", "caught", "lbw", "stumped", "caught and bowled", "hit wicket"]
yo=delivery[delivery["dismissal_kind"].isin(dismissal_kinds)]
zz=yo['bowler'].value_counts()[:5].plot.bar()
for p in zz.patches:
zz
mlt.show()