-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathQue_Ans_IPL_Analysis_Case_Study.sql
More file actions
287 lines (225 loc) · 7.34 KB
/
Copy pathQue_Ans_IPL_Analysis_Case_Study.sql
File metadata and controls
287 lines (225 loc) · 7.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
-- IPL analysis Case Study Questions and Answers
-- Case study on @ iThinkData By Vaibhav Chavan
-- Subscribe to @ iThinkData For more SQL case study and Tutorials
-- Let's get started...
-- A. Basic Level Questions
-- Q1. Find out each seasons winner team.
SELECT season,
winner
FROM matches
WHERE match_type ='Final'
GROUP BY season
ORDER BY season ;
-- Q2. How many matches were played in each season from 2008 to 2024?
SELECT season,
COUNT(season) AS 'Total Matches in Season'
FROM matches
GROUP BY season
ORDER BY season;
-- Q3. In which cities were the most IPL matches hosted?
SELECT city,
COUNT(id) AS 'Total Matches in City'
FROM matches
GROUP BY city
ORDER BY COUNT(id) DESC ;
-- Q4. Which player has won the most "Player of the Match" awards in IPL history?
SELECT player_of_match,
COUNT(player_of_match) AS ' Highest POM Awards'
FROM matches
GROUP BY player_of_match
ORDER BY COUNT(player_of_match) DESC
LIMIT 1;
-- Q5. Which top 3 teams won the most matches across all seasons?
SELECT winner,
COUNT(winner) AS 'Most Matches winner Team'
FROM matches
GROUP BY winner
ORDER BY COUNT(winner) DESC limit 3;
-- Q6. Identify the top scorer(runs) in IPL history.
SELECT batter,
SUM(batsman_runs) AS total_runs
FROM deliveries
GROUP BY batter
ORDER BY total_runs DESC
LIMIT 1;
-- B. Intermediate Level Questions
-- Q1. Which team hit the most 4s and 6s during the powerplay (1st-6th over) in the IPL?
SELECT d.batting_team,
SUM(CASE WHEN d.batsman_runs = 4 THEN 1 ELSE 0 END) AS total_4s,
SUM(CASE WHEN d.batsman_runs = 6 THEN 1 ELSE 0 END) AS total_6s
FROM deliveries d
JOIN matches m ON d.match_id = m.id
WHERE d.overs BETWEEN 0 AND 5
GROUP BY d.batting_team
ORDER BY (total_4s + total_6s) DESC;
-- Q2. Find the top 3 bowlers with the best economy rate
-- (minimum 30 overs bowled) in 2024 IPL season.
SELECT b.bowler,
ROUND(SUM(b.total_runs) / (COUNT(b.bowler) / 6.0),2) AS economy_rate
FROM deliveries b
JOIN matches m ON b.match_id = m.id
WHERE m.season = 2024
GROUP BY b.bowler
HAVING COUNT(b.bowler) >= 180 -- At least 30 overs (30*6=total balls)
ORDER BY economy_rate DESC
LIMIT 3;
-- Q3. Identify the batsmen with the best strike rate (minimum 100 balls faced)
-- in death overs (16th-20th) across all IPL seasons.
SELECT d.batter,
ROUND(SUM(d.batsman_runs) * 100.0 / COUNT(*),2) AS strike_rate,
COUNT(*) AS balls_faced
FROM deliveries d
JOIN matches m ON d.match_id = m.id
WHERE d.overs BETWEEN 15 AND 19
GROUP BY d.batter
HAVING COUNT(*) >= 100 -- Minimum 100 balls faced
ORDER BY strike_rate DESC;
-- Q4. Identify the fastest century in IPL (minimum of 50 balls faced).
-- Find the batsman who scored it, the number of balls faced, and the strike rate.
SELECT d.batter,
COUNT(*) AS balls_faced,
SUM(d.batsman_runs) AS total_runs,
SUM(d.batsman_runs) * 100.0 / COUNT(*) AS strike_rate
FROM deliveries d
JOIN matches m ON d.match_id = m.id
GROUP BY d.batter
HAVING total_runs >= 100 AND COUNT(*) >= 50
ORDER BY balls_faced ASC
LIMIT 1;
-- Q5. Identify the most consistent batsmen by calculating the average runs per match
-- for batsmen with at least 500 total runs across all matches.
WITH TotalBatsmanRuns AS (
SELECT b.batter,
COUNT(DISTINCT b.match_id) AS total_matches,
SUM(b.batsman_runs) AS total_runs
FROM deliveries b
JOIN matches m ON b.match_id = m.id
GROUP BY b.batter
HAVING SUM(b.batsman_runs) >= 500 -- Minimum 500 total runs
)
SELECT batter,
total_runs,
total_matches,
(total_runs * 1.0 / total_matches) AS avg_runs_per_match
FROM TotalBatsmanRuns
ORDER BY avg_runs_per_match DESC;
-- C. High Level Questions
-- Q1. Identify the top 3 finishers (batsmen) in each IPL season final match based on
-- total runs scored in the last 5 overs (overs 16-20) of a match.
-- The query should return the top 3 batsmen for each seasons ranked
-- by their total runs in the death overs in final match.
WITH DeathOverRuns AS (
SELECT d.batter,
m.season,
SUM(d.batsman_runs) AS runs_in_death_overs
FROM deliveries d
JOIN matches m ON d.match_id = m.id
WHERE d.overs BETWEEN 15 AND 19 -- Death overs
AND m.match_type = 'final'
GROUP BY d.batter, m.season
),
RankedFinishers AS (
SELECT batter,
season,
runs_in_death_overs,
ROW_NUMBER() OVER (PARTITION BY season ORDER BY runs_in_death_overs DESC) AS ranks
FROM DeathOverRuns
)
SELECT batter,
season,
runs_in_death_overs
FROM RankedFinishers
WHERE ranks <= 3
ORDER BY season,
ranks;
-- Q2. Identify all the players who got out in the 90s (between 90 and 99 runs) and how they were dismissed.
WITH batsman_totals AS (
SELECT match_id,
batter,
SUM(batsman_runs) AS total_runs
FROM deliveries
GROUP BY match_id, batter
)
SELECT d.match_id,
d.batter,
d.dismissal_kind,
bt.total_runs
FROM deliveries d
JOIN batsman_totals bt ON d.batter = bt.batter AND d.match_id = bt.match_id
WHERE d.is_wicket = 1
AND bt.total_runs BETWEEN 90 AND 99
ORDER BY bt.total_runs DESC;
-- Q3. Identify bowlers who successfully defended 6 or fewer runs in the last over
-- of a match (over 20) and the matches where they achieved this feat.
WITH final_over_defenses AS (
SELECT m.season,
d.match_id,
m.team1,
m.team2,
d.bowling_team,
d.bowler,
SUM(d.total_runs) AS runs_conceded
FROM deliveries d
JOIN matches m ON d.match_id = m.id
WHERE d.overs = 19
GROUP BY d.match_id,
d.bowling_team,
d.bowler
)
SELECT season,
match_id,
team1,
team2,
bowling_team,
bowler,
runs_conceded
FROM final_over_defenses
WHERE runs_conceded <= 6
ORDER BY runs_conceded ASC;
-- Q4. Find the bowlers with the most 4 or 5-wicket hauls in IPL history.
WITH WicketHauls AS (
SELECT b.bowler,
b.match_id,
COUNT(*) AS wickets_in_match
FROM deliveries b
JOIN matches m ON b.match_id = m.id
WHERE b.is_wicket = 1
AND b.dismissal_kind NOT IN ('run out', 'retired hurt', 'obstructing the field') -- Only count proper wickets
GROUP BY b.bowler, b.match_id
HAVING COUNT(*) >= 4 -- 4 or more wickets in a match
)
SELECT bowler,
COUNT(*) AS four_five_wicket_hauls
FROM WicketHauls
GROUP BY bowler
ORDER BY four_five_wicket_hauls DESC;
-- Q5. Identify the bowlers with the most wickets in the death overs (overs 16-20) for each season.
WITH DeathOverWickets AS (
SELECT b.bowler,
m.season,
COUNT(b.is_wicket) AS total_wickets
FROM deliveries b
JOIN matches m ON b.match_id = m.id
WHERE b.overs BETWEEN 15 AND 19
AND b.is_wicket = 1
GROUP BY b.bowler, m.season
),
RankedDeathOverBowlers AS (
SELECT bowler,
season,
total_wickets,
ROW_NUMBER() OVER (PARTITION BY season ORDER BY total_wickets DESC) AS ranks
FROM DeathOverWickets
)
SELECT season,bowler, total_wickets
FROM RankedDeathOverBowlers
WHERE ranks <= 5
ORDER BY season, ranks;
-- ------------------------------------
/*
For more such SQL Case Study and Data Analysis related stuff,
Subscribe my youtube channel www.youtube.com/@iThinkData
Github: www.github.com/vaibhavchavan20
LinkedIn: www.linkedin.com/in/vaibhav-chavan
WhatsApp: www.bit.ly/WhatsAppiThinkData
*/