-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path4. Bus-Stop-Finding.Rmd
843 lines (597 loc) · 30.8 KB
/
4. Bus-Stop-Finding.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
---
title: "Bus Stop Finding"
output: html_document
date: "2023-11-16"
---
```{r setup, include=FALSE}
library('GISTools')
library('raster')
library('tmap')
library('rgdal')
library('maptools')
library('raster')
library(raster)
library(sp)
library(sf)
library(tidyverse)
library(rgeos)
library(ggplot2)
library(psych)
library(spatstat)
library(spdep)
library(jsonlite)
library(gstat)
library(magick)
```
# Finding optimal bus stops
## Creating base layer map
Here we also set the crs to WGS84 as it causes calculation later to be simple as other data is left as lat lon coordinated.
```{r base}
#If we do not want the subzone lines
subzones <- st_read(dsn = "dataset/basemap", layer = "MP14_SUBZONE_NO_SEA_PL")
base_map_no_border <- st_read(dsn = "dataset/basemap", layer = "SGP_adm0")
base_map_no_border <- base_map_no_border %>% select(ISO)
base_map_no_border <- st_transform(base_map_no_border, crs=4326)
#If we want the subzone lines
base_map <- st_read(dsn = "dataset/basemap", layer = "MP14_SUBZONE_NO_SEA_PL")
base_map <- st_transform(base_map, crs=4326)
base_map <- st_make_valid(base_map)
```
## Loading data
In this file, we will be loading 4 main datasets:
- Youth density
- HDB locations
- Bus Stop locations
- Nightlife Locations
```{r load}
#Youth density
yth_data <- read.csv("dataset/filteredSG_population_density.csv") %>% select(longitude, latitude, youth)
yth_spatial <- st_as_sf(yth_data, coords = c("longitude", "latitude"), crs = 4326)
#HDB Locations
hdb <- read.csv("dataset/hdb.csv") %>% mutate(addr = paste(blk_no,street)) %>% select(addr,lat,lng)
hdb_spatial <- st_as_sf(hdb, coords = c("lng", "lat"), crs = 4326)
#Read & Load Bus Stop Data
bus_data <- data.frame()
for(i in 0:10) {
file_name <- paste0("dataset/busstops/response(", i, ").json")
# Handle the case for response.json (without the index)
if(i == 0) {
file_name <- "dataset/busstops/response.json"
}
json_read <- fromJSON(file_name, flatten = TRUE)
bus_data <- rbind(bus_data, json_read$value)
}
bus_spatial <- st_as_sf(bus_data, coords = c("Longitude", "Latitude"), crs = 4326)
#Nightlife DataFrame
nightlife <- st_read('dataset/processed_datasets/nightlife_locations.geojson', crs = st_crs(subzones))
nightlife <- st_transform(nightlife, crs = st_crs(4326))
```
# HDBs Bus Stop Analysis
## Visualizing Youth Density
So the first step is to see how youth density looks like. Now what we notice is that youth density is a point plot, so let's see how that looks.
```{r yth pts}
#Plot the points to visualise how the values look #plot takes a while to load
yth_spatial_map <- tm_shape(base_map_no_border) + tm_borders() +
tm_shape(yth_spatial) + tm_dots(size = 0.05, col = 'youth', title = 'Youth Density(%)') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Youth Densities(%) \nin Singapore",
title.position = c('left', 'top'))
yth_spatial_map
tmap_save(yth_spatial_map, filename = "plots/optimal_stops/yth_spatial_map.png")
```
No what we realise is that the youth points are clustered around a certain value, so we can actually represent them as polygons instead.
```{r yth polys}
#We notice that a lot of the same values are congregated together, so lets make them into polygon
#Group points with the same value and create convex hulls
grouped_points <- yth_spatial %>%
group_by(youth) %>%
summarize()
yth_poly <- grouped_points %>%
st_convex_hull()
yth_poly <- yth_poly[-14,] #remove the individual point which is out of place
#Lets see how this looks like
yth_poly_plot <- tm_shape(base_map_no_border) + tm_borders() +
tm_shape(yth_poly) + tm_polygons(col = 'youth', title = 'Youth Density') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="HDB locations \nwithin Singapore",
title.position = c('left', 'top'))
yth_poly_plot
tmap_save(yth_poly_plot, filename = "plots/optimal_stops/yth_poly_map.png")
```
Computation is MUCH faster now as we have reduced from 217054 data points to 32 polygons, great!
Now lets visualise the HDBs.
```{r hdb_yth poly}
joined <- st_join(hdb_spatial,yth_poly) #combine the HDB data with the youth density to assign youth density values to each HDB
#HDB point graph
hdb_point_plot <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_spatial) + tm_dots(size = 0.1, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "bottom"),
legend.text.size = 0.75,
legend.title.size = 1.5,
title="HDB locations \nwithin Singapore",
title.position = c('left', 'top'))
hdb_point_plot
tmap_save(hdb_point_plot, filename = "plots/optimal_stops/hdb_point_plot.png")
```
Lastly, lets see the combination of both youth density and HDBs
```{r yth and hdbs}
hdb_yth_plot <- tm_shape(base_map) + tm_borders() +
tm_shape(yth_poly) + tm_polygons(col = 'youth', title = 'Youth Density') +
tm_shape(hdb_spatial) + tm_dots(size = 0.1, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="HDB locations \nwithin Singapore",
title.position = c('left', 'top'))
hdb_yth_plot
tmap_save(hdb_yth_plot, filename = "plots/optimal_stops/hdb_yth_plot.png")
```
## Fixing data
So now our objective is to assing a youth density value to each HDB. However, what we noticed is that some HDBs (especially in the punggol area) are missing values and we also have some locations with multiple values. Now in order to fill in the data, rather than imputing with the mean, we decided that we can interpolate the data.
In terms of deciding which interpolation method to use, we decided to use IDW as the appropriate method to fill in the data. This is becaus we wanted a deterministic model in order to fill in the missing data **consistently**. Furthermore, kringing would not perform well for this form of missing data which only has data from one side but not the other.
So our first step is to create a simple dataframe which contains both the youth density data and HDB locations.
```{r joined}
joined <- st_join(hdb_spatial,yth_poly) #combine the HDB data with the youth density to assign youth density values to each HDB
#First, we need to seperate out data with NA values and only interpolate on coordinates with points
joined_noNA <- joined %>% filter(!is.na(youth))
joined_spatial <- as(joined_noNA, "Spatial")
```
We then create our raster grid for the IDW process.
```{r sg raster}
base_spatial <- as(base_map, 'Spatial')
#Creating a raster layer for interpolation
grd <- as.data.frame(spsample(base_spatial, "regular", n=10000))
names(grd) <- c("lat", "lng")
coordinates(grd) <- c("lat", "lng")
gridded(grd) <- TRUE
fullgrid(grd) <- TRUE
crs(grd) <- crs(base_spatial)
```
Now onto creating the IDW, determining a good idp value and plotting it out.
```{r idw}
yth.idw <- gstat::idw(youth ~ 1, joined_spatial, newdata=grd, idp=3.0) #idp of 3 chosen as we want closer values to mean more
#Find out what is a good idp value to use #10min runtime
IDW.out <- vector(length = length(joined_spatial))
for (i in 1:length(joined_spatial)) {
IDW.out[i] <- gstat::idw(youth ~ 1, joined_spatial[-i,], joined_spatial[i,], idp=3.0)$var1.pred
}
#Plot the differences
OP <- par(pty="s", mar=c(4,3,0,0))
plot(IDW.out ~ joined_spatial$youth, asp=1, xlab="Observed", ylab="Predicted", pch=16,
col=rgb(0,0,0,0.5))
abline(lm(IDW.out ~ joined_spatial$youth), col="red", lw=2,lty=2)
abline(0,1)
par(OP)
#Looks like the IDW looks valid
r_hdb_idw <- raster(yth.idw)
r_hdb_idw.m <- mask(r_hdb_idw, base_spatial)
idw_full <- tm_shape(r_hdb_idw.m) +
tm_raster(n=10,palette = "Blues", stretch.palette = FALSE,title="Predicted Youth Density (%)") +
tm_shape(base_map) + tm_borders() + tm_fill(alpha = 0) +
tm_shape(joined_spatial) + tm_dots(size=0.005) +
tm_legend(legend.outside=TRUE) +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="IDW of HDB locations",
title.position = c('left', 'top'))
idw_full
tmap_save(idw_full, filename = "plots/optimal_stops/idw_full.png")
```
## Filtering
Now that we have assigned each HDB a youth density value, the next step would be to filter out HDBs with lower youth density values.
The first step to doing that is by visualising the IDW values in a histogram. We hace also calculated some key stats for the youth density values.
```{r hist}
#EDA on youth density, making a histogram of youth densities
yth_hist <- r_hdb_idw@data@values
hist <- ggplot(data.frame(values = yth_hist), aes(x = values)) +
geom_histogram(binwidth = 0.1, fill = "lightblue", color = "black", aes(y = ..count..)) +
ggtitle("Histogram of Youth Densities (%)") +
xlab("Youth Density (%)") +
ylab("Frequency") + theme_minimal()
hist
ggsave("plots/optimal_stops/hist.png",hist)
#A couple of statistics for youth density
mean(yth_hist)
median(yth_hist)
quantile(yth_hist,0.25)
quantile(yth_hist,0.75)
max(yth_hist)
```
The next step would be to determine a good threshold for youth density to filter out the HDB locations. Ideally, we would want to split the HDBs into 2 regions for the ideal bus stops to run well.
Over a number of iterations, we set this filter to take the HDBs in the top 50% of youth densitys.
Unfortunately, because assigning each HDB to the value from the raster grid is computationally intensive, we decided to reclassify the raster layer into 25 layers from 0 to 12.5 with a 0.5 interval. This can be seen in the chunk below.
```{r assigning yth}
#Before we move on, we realise that with spatial interpolation, the assigning of values will take a really long time
#As such, we reclassify the values into groups before converting them nto polygons for analysis
reclass_value_idw <- c(0,0.25,0) #to reclass values into 25 different bins
for (i in seq(0,24,1)) {
reclass_value_idw[3*i + 4] = 0.25 + 0.5*i
reclass_value_idw[3*i + 5] = 0.75 + 0.5*i
reclass_value_idw[3*i + 6] = 0.5 + 0.5*i
}
reclass_r_hdb_idw <- reclassify(as(r_hdb_idw, "RasterLayer"), reclass_value_idw) #reclassify kde values to groups
filtered_r_hdb_idw <- reclass_r_hdb_idw
filtered_r_hdb_idw[filtered_r_hdb_idw <= quantile(yth_hist,0.5)] <- NA
#overlaid IDW polygon plot with hdb plot
filtered_r_hdb_idw.m <- mask(filtered_r_hdb_idw, base_spatial)
idw_filter <- tm_shape(filtered_r_hdb_idw.m) +
tm_raster(n=10,palette = "Blues", stretch.palette = FALSE,title="Predicted Youth Density (%)") +
tm_shape(base_map) + tm_borders() + tm_fill(alpha = 0) +
tm_shape(joined_spatial) + tm_dots(size=0.005) +
tm_legend(legend.outside=TRUE) +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered IDW of HDB locations",
title.position = c('left', 'top'))
idw_filter
tmap_save(idw_filter, filename = "plots/optimal_stops/idw_filter.png")
```
Now what we are left is to remove the HDBs which lie outside the new raster layer. In order to accomplish this, we convert the raster values into polygons of equal value. Then an intersection is taken to fitler out the remaining HDBs. The resultant plot is as such.
```{r hdb filter}
#Converting raster layer into polygons to view overlay over Singapore
threshold <- quantile(yth_hist,0.5) #define a threshold to determine what values of youth density we drop
hdb_idw <- rasterToPolygons(reclass_r_hdb_idw, fun = function(x) {x > threshold},dissolve = TRUE) #
hdb_idw <- st_as_sf(hdb_idw)
hdb_idw <- hdb_idw %>% rename(youth = var1.pred)
hdb_idw <- st_cast(hdb_idw,'MULTIPOLYGON')
st_crs(hdb_idw) <- st_crs("EPSG:4326") #set crs for new polygons
hdb_idw <- st_intersection(hdb_idw, base_map) #to ensure new polygons stay within SG boundary
#assigning each hdb a youth value by intersecting the multipolygons to with all hdbs
hdb_yth_pts <- st_intersection(hdb_spatial, hdb_idw) #takes some time
idw_final <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_idw) + tm_polygons(col = 'youth', title = 'Youth Density', palette = 'Blues') +
tm_shape(hdb_yth_pts) + tm_dots(size = 0.1, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered HDB locations",
title.position = c('left', 'top'))
idw_final
tmap_save(idw_final, filename = "plots/optimal_stops/idw_final.png")
```
## Determining HDB Centroids
Now we have the fitered HDBs, we want to find out the centroids of the filtered HDB areas. This is so that we can then find out busstops which are most relevant to these HDBs. In order to accomplish this, we ran a KDE with a bandwidth of 1.4km on the filtered HDBs to view their clustering.
```{r kde start}
hdb_points <- as(hdb_yth_pts, "Spatial")
hdb_centers <- kde.points(hdb_points, h = 0.013) #1.4km bandwidth
hdb_centers_sf <- st_as_sf(hdb_centers)
r_hdb_centers <- raster(hdb_centers)
r_hdb_centers.m <- mask(r_hdb_centers, base_spatial)
kde_first <- tm_shape(r_hdb_centers.m) +
tm_raster(n=10, stretch.palette = FALSE,title="KDE Values") +
tm_shape(base_map) + tm_borders() + tm_fill(alpha = 0) +
tm_shape(hdb_yth_pts) + tm_dots(size=0.005) +
tm_legend(legend.outside=TRUE) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="KDE of HDB locations",
title.position = c('left', 'top'))
kde_first
tmap_save(kde_first, filename = "plots/optimal_stops/kde_first.png")
```
We then reclassified the kde values into 5 equal groups from 0 to 500 and removed the first 2 layers. This results in the following.
```{r kde second}
#Reclassify values in raster to make contour lines as seen in KDE plot.
reclass_values <- c(0,100,1, #reclassify kde values from 0-50 in group 1 and so on
100,200,2,
200,300,3,
300,400,4,
400,500,5)
reclass_hdb_centers <- reclassify(as(hdb_centers, "RasterLayer"), reclass_values) #reclassify kde values to groups
hdb_centers_poly <- rasterToPolygons(reclass_hdb_centers, dissolve = T, n = 16, digits = 3) #to make a polygon layer
hdb_centers_poly <- st_as_sf(hdb_centers_poly) #to make an SF object
hdb_centers_poly <- hdb_centers_poly[-c(1,2),] #remove polys with low kde values
hdb_centers_poly <- st_cast(hdb_centers_poly,'POLYGON') #to split multipolygon to polygon to obtain centers
hdb_centers_poly$kde <- factor(hdb_centers_poly$kde) #to tell R to recognise the colours as categories
kde_second <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(hdb_yth_pts) + tm_dots(size = 0.01, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered KDEs with HDBs",
title.position = c('left', 'top'))
kde_second
tmap_save(kde_second, filename = "plots/optimal_stops/kde_second.png")
```
Now it is hard to see the KDE polygons on that plot. To make the polygons easier to view, we plot the centroids of the individual layers to determine the central point of the HDB locations.
```{r KDE centroids}
#get the centroids of each polygon
hdb_centers_points <- st_centroid(hdb_centers_poly)
#HDB Centroids
kde_centroid <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(hdb_centers_points) + tm_dots(size = 0.1, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered KDEs with HDBs",
title.position = c('left', 'top'))
kde_centroid
tmap_save(kde_centroid, filename = "plots/optimal_stops/kde_centroid.png")
```
## Bus Stop Mapping
The next set is to find out how the Bus Stops fare with our HDBs. First lets take a look at how the bus stops are distributed around Singapore.
```{r all bus}
bus_all <- tm_shape(base_map) + tm_borders() +
tm_shape(bus_spatial) + tm_bubbles(size = 0.15, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Bus Stops in Singapore",
title.size = 1,
title.position = c('left', 'top'))
bus_all
tmap_save(bus_all, filename = "plots/optimal_stops/bus_all.png")
```
We overlay these with our KDEs earlier.
```{r all bus kde}
#Map to show how bustops fare with kdes
bus_all_kde <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(bus_spatial) + tm_bubbles(size = 0.15, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Bus Stops with KDEs",
title.size = 1,
title.position = c('left', 'top'))
bus_all_kde
tmap_save(bus_all_kde, filename = "plots/optimal_stops/bus_all_kde.png")
```
Next filter out only bus stops within these kdes. This step is not really necessary, but is doen to reduce computational intensity when calculating the best bus stop for each.
```{r filtered bus kde}
#remove busstops which are not inside the polygons
bus_inside_spatial <- st_intersection(bus_spatial, hdb_centers_poly)
#Show busstops which are inside the KDEs
bus_filtered_kde <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(bus_inside_spatial) + tm_bubbles(size = 0.15, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered Bus Stops with KDEs",
title.size = 1,
title.position = c('left', 'top'))
bus_filtered_kde
tmap_save(bus_filtered_kde, filename = "plots/optimal_stops/bus_filtered_kde.png")
```
Lastly, we find the closest bus stop to each centroid found earlier and assign each polygon a bus stop!
```{r filtered bus}
#find closest busstop to each centroid
closest_busstops_index <- st_nearest_feature(hdb_centers_points, bus_inside_spatial)
closest_busstops <- bus_inside_spatial[closest_busstops_index,]
#busstops
bus_final <- tm_shape(base_map) + tm_borders() +
tm_shape(hdb_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(closest_busstops) + tm_bubbles(size = 0.25, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Final Bus Stops",
title.size = 1,
title.position = c('left', 'top'))
bus_final
tmap_save(bus_final, filename = "plots/optimal_stops/bus_final.png")
```
Now to help out with plotting optimal rouths, we want to tag each bus stop with an additional attribute stating what subzone it is in. We then can export this as a geojson file to be saved later.
```{r subzone tagging}
#Now we want to add the region attribute to each datapoint for each bus stop for later
#First extract out the polygons with the regions only
combined_base_map <- subzones %>%
group_by(REGION_N) %>%
summarise(geometry = st_combine(geometry))
combined_base_map <- st_transform(combined_base_map, crs = 4326)
combined_base_map <- st_make_valid(combined_base_map)
final_closest_busstops <- st_intersection(closest_busstops,combined_base_map) #get the regions for each busstop
final_closest_busstops <- distinct(final_closest_busstops) #remove duplicated rows
final_closest_busstops <- final_closest_busstops %>% rename(Region = REGION_N)
#Export as a geojson such for bus line analysis
#st_write(final_closest_busstops, "dataset/processed_datasets/bus_stops.geojson", driver = 'geoJSON',append = T) # DO NOT RUN AGAIN, IT WILL KEEP ADDING LAYERS
```
We lastly want to evaluate how well our bus stops capture the HDBs in Singapore.
We do 2 forms of evaluation, one for all HDBS and one for our filtered HDBs.To then capture covered, we created a 1.5km buffer for each bus stop. The union of all busstops then determine our effectiveness in capturing the HDBs in Singapore.
```{r bus stop eval}
#create buffers for each centroid
closest_busstops_buffer <- st_buffer(final_closest_busstops, dist = 1500) #1.5km buffer
closest_busstops_buffer <- st_union(closest_busstops_buffer) #join the buffers together
closest_busstops_buffer <- st_cast(closest_busstops_buffer,'POLYGON') #join the buffers together
closest_busstops_buffer <- st_make_valid(closest_busstops_buffer)
num_hdb_captured <- nrow(st_intersection(hdb_spatial, closest_busstops_buffer))
percentage_captured <- num_hdb_captured / nrow(hdb_spatial)
print(percentage_captured)
#1.5km radius: 47.1% HDB captured for all HDBs
num_hdb_captured <- nrow(st_intersection(hdb_yth_pts, closest_busstops_buffer))
percentage_captured <- num_hdb_captured / nrow(hdb_yth_pts)
print(percentage_captured)
#1.5km radius: 97.7% HDB captured for filtered HDBs
```
We can then visualise the evaluatation below.
```{r eval plots}
#Busstop buffers with ALL HDBs
eval_all <- tm_shape(base_map) + tm_borders() +
tm_shape(closest_busstops_buffer) + tm_polygons(col = 'yellow') +
tm_shape(hdb_spatial) + tm_bubbles(size = 0.05, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Bus Stop Buffers \nwith All HDBs",
title.size = 1,
title.position = c('left', 'top'))
eval_all
#Busstop buffers with Filtered HDBs
eval_filter <-tm_shape(base_map) + tm_borders() +
tm_shape(closest_busstops_buffer) + tm_polygons(col = 'yellow') +
tm_shape(hdb_yth_pts) + tm_bubbles(size = 0.05, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "bottom"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Bus Stop Buffers \nwith Filtered HDBs",
title.size = 1,
title.position = c('left', 'top'))
eval_filter
tmap_save(eval_all, filename = "plots/optimal_stops/eval_all.png")
tmap_save(eval_filter, filename = "plots/optimal_stops/eval_filter.png")
```
# Nightlife Bus Stop Analysis
We then repeated the same process with nightlife locations, except no IDW filtering was used here. A summarised version of the process is written below.
A bandwidth of 750m was used and a buffer of 500m was used.
```{r nightlife}
#Onto finding ideal busstops for nightlife
#Central area of SG
central_area <- base_map %>% filter(PLN_AREA_N %in% c('DOWNTOWN CORE', 'SINGAPORE RIVER', 'MUSEUM','ROCHOR', 'KALLANG','OUTRAM')) %>% filter(!SUBZONE_N %in% c('TANJONG RHU', 'BENDEMEER','KALLANG BAHRU','KAMPONG BUGIS','BOON KENG','GEYLANG BAHRU'))
central_area <- st_make_valid(central_area)
nightlife_points <- as(nightlife, "Spatial")
nightlife_centers <- kde.points(nightlife_points, h = 0.00675) #750mm smoothing
nightlife_centers_sf <- st_as_sf(nightlife_centers)
r_nightlife_centers <- raster(nightlife_centers)
r_nightlife_centers.m <- mask(r_nightlife_centers, base_spatial)
kde_n_first <- tm_shape(r_nightlife_centers.m) +
tm_raster(n=10, stretch.palette = FALSE,title="KDE Values") +
tm_shape(base_map) + tm_borders() + tm_fill(alpha = 0) +
tm_shape(nightlife) + tm_dots(size=0.005) +
tm_legend(legend.outside=TRUE) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="KDE of nightlife locations",
title.position = c('left', 'top'))
kde_n_first
tmap_save(kde_first, filename = "plots/optimal_stops/kde_n_first.png")
#reclassification for polygons
reclass_values_2 <- c(0,max(nightlife_centers$kde) / 6,1, #reclassify kde values from 0-1000 in group 1 and so on
max(nightlife_centers$kde) / 6,2 * max(nightlife_centers$kde) / 6,2,
2* max(nightlife_centers$kde) / 6,3 * max(nightlife_centers$kde) / 6,3,
3* max(nightlife_centers$kde) / 6,4 * max(nightlife_centers$kde) / 6,4,
4* max(nightlife_centers$kde) / 6,5 * max(nightlife_centers$kde) / 6,5,
5* max(nightlife_centers$kde) / 6,6 * max(nightlife_centers$kde) / 6,6)
#6* max(nightlife_centers$kde) / 7, max(nightlife_centers$kde),7)
reclass_nightlife_centers <- reclassify(as(nightlife_centers, "RasterLayer"), reclass_values_2) #reclassify kde values to groups
nightlife_centers_poly <- rasterToPolygons(reclass_nightlife_centers, dissolve = T, digits = 6) #to make a polygon layer
nightlife_centers_poly <- st_as_sf(nightlife_centers_poly) #to make an SF object
nightlife_centers_poly <- nightlife_centers_poly[-c(1,2),] #remove polys with low kde values
nightlife_centers_poly <- st_cast(nightlife_centers_poly,'POLYGON') #to split multipolygon to polygon to obtain centers
nightlife_centers_poly$kde <- factor(nightlife_centers_poly$kde)
kde_n_second <- tm_shape(central_area) + tm_borders() +
tm_shape(nightlife_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(nightlife) + tm_dots(size = 0.2, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Filtered KDEs with nightlifes",
title.position = c('left', 'top'))
kde_n_second
tmap_save(kde_second, filename = "plots/optimal_stops/kde_second.png")
#get the centroids of each polygon
nightlife_centers_points <- st_centroid(nightlife_centers_poly)
#find closest busstop to each centroid
nightlife_centers_points <- st_transform(nightlife_centers_points, crs = 4326)
closest_nightlife_busstops_index <- st_nearest_feature(nightlife_centers_points, bus_spatial)
closest_nightlife_busstops <- bus_spatial[closest_nightlife_busstops_index,]
#bustops closest to each centroid
nightlife_final <- tm_shape(central_area) + tm_borders() +
tm_shape(nightlife_centers_poly) + tm_polygons(col = 'kde', title = 'KDE Group', palette = 'YlOrRd') +
tm_shape(closest_nightlife_busstops) + tm_bubbles(size = 0.25, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "top"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Final Bus Stops \n(Nightlife)",
title.size = 1,
title.position = c('left', 'top'))
nightlife_final
tmap_save(nightlife_final, filename = "plots/optimal_stops/nightlife_final.png")
#assigning location attribute to each busstop
#not needed as all are Central, but for completeness sake
combined_base_map_nightlife <- central_area %>%
group_by(REGION_N) %>%
summarise(geometry = st_combine(geometry))
combined_base_map_nightlife <- st_transform(combined_base_map_nightlife, crs = 4326)
combined_base_map_nightlife <- st_make_valid(combined_base_map_nightlife)
final_closest_busstops_nightlife <- st_intersection(closest_nightlife_busstops,combined_base_map_nightlife) #get the regions for each busstop
final_closest_busstops_nightlife <- distinct(final_closest_busstops_nightlife) #remove duplicated rows
final_closest_busstops_nightlife <- final_closest_busstops_nightlife %>% rename(Region = REGION_N)
#st_write(final_closest_busstops_nightlife, "dataset/processed_datasets/bus_stops_nightlife.geojson")
#Evaluation
#creating buffer
closest_nightlife_busstops_buffer <- st_buffer(final_closest_busstops_nightlife, dist = 500) #500m buffer
closest_nightlife_busstops_buffer <- st_union(closest_nightlife_busstops_buffer) #join the buffers together
closest_nightlife_busstops_buffer <- st_cast(closest_nightlife_busstops_buffer,'POLYGON') #join the buffers together
closest_nightlife_busstops_buffer <- st_make_valid(closest_nightlife_busstops_buffer)
#percentage covered
num_nightlife_captured <- nrow(st_intersection(nightlife, closest_nightlife_busstops_buffer))
percentage_captured <- num_nightlife_captured / nrow(nightlife)
print(percentage_captured)
#500m radius: 71.0% nightlife captured
#plotting the coverage
eval_nightlife <-tm_shape(central_area) + tm_borders() +
tm_shape(closest_nightlife_busstops_buffer) + tm_polygons(col = 'yellow') +
tm_shape(nightlife) + tm_bubbles(size = 0.5, scale = 0.5, col = 'black') +
tm_compass(type="8star", size = 2) +
tm_scale_bar(width = 0.15) +
tm_layout(legend.format = list(digits = 0),
legend.position = c("left", "bottom"),
legend.text.size = 0.5,
legend.title.size = 1,
title="Bus Stop Buffers \nwith Nightlife",
title.size = 1,
title.position = c('left', 'top'))
eval_nightlife
tmap_save(eval_nightlife, filename = "plots/optimal_stops/eval_nightlife.png")
```
The geojson files are then sent to the python code to determine optimal bus routes.
DONE!