-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSQL Code Part 2.sql
195 lines (125 loc) · 5.71 KB
/
SQL Code Part 2.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/* Continued.....
Bike Share Data Analysis
Skills Used: Union, Joins, Temp Tables, Aggregate Functions, Converting Data Types, Extracting Date and Time
Update, Alter Table, Create Table, Insert Into
*/
-- Adding a new column to calculate the ride length from datetime2
ALTER TABLE [dbo].[all_data_202004_202104]
ADD ride_length int
UPDATE [dbo].[all_data_202004_202104]
SET ride_length = DATEDIFF(MINUTE, started_at, ended_at)
-- Extracting month and year from datetime2 format and adding them as new columns
ALTER TABLE [dbo].[all_data_202004_202104]
ADD day_of_week nvarchar(50),
month_m nvarchar(50),
year_y nvarchar(50)
UPDATE [dbo].[all_data_202004_202104]
SET day_of_week = DATENAME(WEEKDAY, started_at),
month_m = DATENAME(MONTH, started_at),
year_y = year(started_at)
ALTER TABLE [dbo].[all_data_202004_202104]
ADD month_int int
UPDATE [dbo].[all_data_202004_202104] -- Extracting month num from datetime2 format
SET month_int = DATEPART(MONTH, started_at)
ALTER TABLE [dbo].[all_data_202004_202104]
ADD date_yyyy_mm_dd date
UPDATE [dbo].[all_data_202004_202104] -- Casting datetime2 format to date
SET date_yyyy_mm_dd = CAST(started_at AS date)
-- Deleted rows where (NULL values), (ride length = 0), (ride length < 0), (ride_length > 1440 mins) for accurate analysis
DELETE FROM [dbo].[all_data_202004_202104]
Where ride_id IS NULL OR
start_station_name IS NULL OR
ride_length IS NULL OR
ride_length = 0 OR
ride_length < 0 OR
ride_length > 1440
-- Checking for any duplicates by checking count
Select Count(DISTINCT(ride_id)) AS uniq,
Count(ride_id) AS total
From [dbo].[all_data_202004_202104]
-- Calculating Number of Riders Each Day by User Type and Creating View to store date for Further Visualization
Create View users_per_day AS
Select
Count(case when member_casual = 'member' then 1 else NULL END) AS num_of_members,
Count(case when member_casual = 'casual' then 1 else NULL END) AS num_of_casual,
Count(*) AS num_of_users,
day_of_week
From [dbo].[all_data_202004_202104]
Group BY day_of_week
--Calculating Average Ride Length for Each User Type and Creating View to store data for further Data Visualization
Create View avg_ride_length AS
SELECT member_casual AS user_type, AVG(ride_length)AS avg_ride_length
From [dbo].[all_data_202004_202104]
Group BY member_casual
-- Creating temporary tables exclusively for Casual Users and Members
CREATE TABLE #member_table (
ride_id nvarchar(50),
rideable_type nvarchar(50),
member_casual nvarchar(50),
ride_length int,
day_of_week nvarchar(50),
month_m nvarchar(50),
year_y int )
INSERT INTO #member_table (ride_id, rideable_type, member_casual, ride_length, day_of_week, month_m, year_y)
(Select ride_id, rideable_type, member_casual, ride_length, day_of_week, month_m, year_y
From [dbo].[all_data_202004_202104]
Where member_casual = 'member')
CREATE TABLE #casual_table (
ride_id nvarchar(50),
rideable_type nvarchar(50),
member_casual nvarchar(50),
ride_length int,
day_of_week nvarchar(50),
month_m nvarchar(50),
year_y int )
INSERT INTO #casual_table (ride_id, rideable_type, member_casual, ride_length, day_of_week, month_m, year_y)
(Select ride_id, rideable_type, member_casual, ride_length, day_of_week, month_m, year_y
From [dbo].[all_data_202004_202104]
Where member_casual = 'casual')
Select *
From #casual_table
Select *
From #member_table
-- Calculating User Traffic Every Month Since Startup
Select month_int AS Month_Num,
month_m AS Month_Name,
year_y AS Year_Y,
Count(case when member_casual = 'member' then 1 else NULL END) AS num_of_member,
Count(case when member_casual = 'casual' then 1 else NULL END) AS num_of_casual,
Count(member_casual) AS total_num_of_users
From [dbo].[all_data_202004_202104]
Group BY year_y, month_int, month_m
ORDER BY year_y, month_int, month_m
-- Calculating Daily Traffic Since Startup
Select
Count(case when member_casual = 'member' then 1 else NULL END) AS num_of_members,
Count(case when member_casual = 'casual' then 1 else NULL END) AS num_of_casual,
Count(*) AS num_of_users,
date_yyyy_mm_dd AS date_d
From [dbo].[all_data_202004_202104]
Group BY date_yyyy_mm_dd
ORDER BY date_yyyy_mm_dd
-- Calculating User Traffic Hour Wise
Alter Table [dbo].[all_data_202004_202104]
ADD hour_of_day int
UPDATE [dbo].[all_data_202004_202104]
SET hour_of_day = DATEPART(hour, started_at)
Select
hour_of_day AS Hour_of_day,
Count(case when member_casual = 'member' then 1 else NULL END) AS num_of_members,
Count(case when member_casual = 'casual' then 1 else NULL END) AS num_of_casual,
Count(*) AS num_of_users
From [dbo].[all_data_202004_202104]
Group By Hour_Of_Day
Order By Hour_Of_Day
--Calculating Most Popular Stations for Casual Users, (limiting results to top 20 station)
Select
TOP 20 start_station_name AS Station_name,
Count(case when member_casual = 'casual' then 1 else NULL END) AS num_of_casual
From [dbo].[all_data_202004_202104]
Group By start_station_name
Order By num_of_casual DESC
/*
Select *
From [dbo].[all_data_202004_202104]
*/