-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathbike_sharing_inital_analysis.R
149 lines (110 loc) · 3.73 KB
/
bike_sharing_inital_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
## R Shiny Dashboard of “Bike Sharing Dataset” - Analysis ##
#Import the dataset
data <- read.csv('hour.csv')[,-1]
str(data)
#Data preparation
#Arranging values and changing data type
data$yr <- as.factor(ifelse(data$yr == 0, '2011', '2012'))
data$mnth <- as.factor(months(as.Date(data$dteday),
abbreviate = TRUE))
data$hr <- factor(data$hr)
data$weekday <- as.factor(weekdays(as.Date(data$dteday)))
data$season <- as.factor(ifelse(data$season == 1, 'Spring',
ifelse(data$season == 2, 'Summer',
ifelse(data$season == 3,
'Fall', 'Winter'))))
data$weathersit <- as.factor(ifelse(data$weathersit == 1, 'Good',
ifelse(data$weathersit == 2,
'Fair',
ifelse(data$weathersit ==
3, 'Bad',
'Very Bad'))))
data$holiday<-as.factor(ifelse(data$holiday == 0, 'No', 'Yes'))
data$workingday<-as.factor(ifelse(data$workingday == 0, 'No',
'Yes'))
#Changing columns names
names(data)[names(data) == "registered"] <- "new"
names(data)[names(data) == "cnt"] <- "total"
#Denormalizing the values
#Temperature
for (i in 1:nrow(data)){
tn = data[i, 10]
t = (tn * (39 - (-8))) + (-8)
data[i, 10] <- t
}
#Feeling temperature
for (i in 1:nrow(data)){
tn = data[i, 11]
t = (tn * (50 - (-16))) + (-16)
data[i, 11] <- t
}
#Humidity
data$hum <- data$hum * 100
#Wind speed
data$windspeed <- data$windspeed * 67
#Write the new file
data <- data[-1]
write.csv(data, "bike_sharing.csv", row.names = FALSE)
#Modeling
#Dropping columns
data <- data[c(-1,-2,-7,-13,-14)]
#Splitting data
library(caTools)
set.seed(123)
split = sample.split(data$total, SplitRatio = 0.8)
train_set = subset(data, split == TRUE)
test_set = subset(data, split == FALSE)
#Write new files for the train and test sets
write.csv(train_set, "bike_train.csv", row.names = FALSE)
write.csv(test_set, "bike_test.csv", row.names = FALSE)
#Multilinear regression
multi = lm(formula = total ~ ., data = train_set)
#Predicting the test values
y_pred_m = predict(multi, newdata = test_set)
#Performance metrics
#install.packages('Metrics')
library(Metrics)
mae_m = mae(test_set[[10]], y_pred_m)
rmse_m = rmse(test_set[[10]], y_pred_m)
mae_m
rmse_m
#Decision tree
library(rpart)
dt = rpart(formula = total ~ ., data = train_set,
control = rpart.control(minsplit = 3))
#Predicting the test values
y_pred_dt = predict(dt, newdata = test_set)
#Performance metrics
mae_dt = mae(test_set[[10]], y_pred_dt)
rmse_dt = rmse(test_set[[10]], y_pred_dt)
mae_dt
rmse_dt
#Random forest
library(randomForest)
set.seed(123)
rf = randomForest(formula = total ~ ., data = train_set,
ntree = 100)
#Predicting the test values
y_pred_rf = predict(rf, newdata = test_set)
#Performance metrics
mae_rf = mae(test_set[[10]], y_pred_rf)
rmse_rf = rmse(test_set[[10]], y_pred_rf)
mae_rf
rmse_rf
#Saving the model
saveRDS(rf, file = "./rf.rda")
#Single prediction value
test_pred<- test_set
values = data.frame(mnth = 'Jan',
hr = '0',
holiday = 'No',
weekday = 'Saturday',
weathersit = 'Good',
temp = 3.28,
atemp = 3.0014,
hum = 81,
windspeed = 0,
total = NA)
test_pred <- rbind(test_pred,values)
prediction <- predict(rf, newdata = test_pred[nrow(test_pred),-10])
prediction