-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvelib.py
173 lines (138 loc) · 6.97 KB
/
velib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
Generate all sorts of static and animated charts based
on the available data.
"""
import json
import glob
import re
import tempfile
from bisect import bisect_left
from datetime import datetime, timezone
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
from os.path import join as path_join
from PIL import Image
from matplotlib.gridspec import GridSpec
# box around Paris
paris_extent = [2.14, 2.55, 48.75, 48.96] # TODO zoom juste a little bit more
# tiles server
tiles = cimgt.GoogleTiles() # use projection and transform accordingly
data_crs = ccrs.PlateCarree() # data are in lat/lon coordinates
def load_data():
all_files = ['/home/jean/git/velib/records/201909%02d-velib-records.csv' % i for i in range(5, 24)]
csvheaders = ["timestamp", "station_name", "lon", "lat", "mechanical", "ebike", "capacity", "numdocksavailable"]
df = pd.concat((pd.read_csv(f, parse_dates=[0], index_col=[0], names=csvheaders) for f in all_files))
# add columns with total of bikes in station and ratio of bikes available
df['total_bikes'] = df['mechanical'] + df['ebike']
df['occupation_ratio'] = df['total_bikes'] / df['capacity']
# allow selection of a specific slice of time
df = df["2019-09-16T15:00":"2019-09-16T17:00"]
# allow selection of a specific slice of coordinates
# df filter by lat / lon box
return df
def bikes_distribution_over_time(df):
# distribution over time of the number of bikes available by station
groups = df.groupby(by=df.index)
fig = plt.figure(figsize=(8, 6), dpi=100)
fig.suptitle("Distribution of available bikes per station over time")
locator = mdates.AutoDateLocator()
fig.gca().xaxis.set_major_formatter(mdates.AutoDateFormatter(locator))
fig.gca().xaxis.set_major_locator(locator)
fig.gca().set_xlabel("time")
fig.gca().set_ylabel("bikes")
agg = groups['total_bikes'].agg(['min',
'median',
lambda x: np.percentile(x, q=75),
lambda x: np.percentile(x, q=90),
lambda x: np.percentile(x, q=99),
'max'])
l_min, l_med, l_p75, l_p90, l_p99, l_max = plt.plot(agg)
lines = [l_min, l_med, l_p75, l_p90, l_p99, l_max]
labels = ["min", "med", "p75", "p90", "p99", "max"]
fig.legend(lines[::-1], labels[::-1], loc="upper right")
fig.savefig("bikes_per_station_over_time.png")
def number_of_docks(df):
fig = plt.figure(figsize=(6, 6), dpi=100)
fig.suptitle("Number of docks per station and spatial repartition")
gs = GridSpec(2, 1, height_ratios=[1, 2])
hist_axes = fig.add_subplot(gs[0])
hist_axes.set_xlabel("number of docks")
hist_axes.set_ylabel("stations")
map_axes = fig.add_subplot(gs[1], projection=tiles.crs)
map_axes.set_extent(paris_extent, crs=data_crs)
series = df.groupby(by=['lon', 'lat'])[['capacity']].max()
# some stations report a capacity of 0, but also available bikes... remove them
series = series[series['capacity'] > 0]
hist_axes.hist(series.values, bins=range(0, series['capacity'].max() + 5, 5))
# TODO surround with histogram lat/lon (see gridspec examples)
# https://matplotlib.org/3.2.1/gallery/lines_bars_and_markers/scatter_hist.html#sphx-glr-gallery-lines-bars-and-markers-scatter-hist-py
map_axes.scatter(series.index.get_level_values(0), series.index.get_level_values(1), s=1, c=series['capacity'],
cmap=plt.get_cmap('Reds'), transform=data_crs) # TODO normalize data for cmap between 0,1 ?
fig.savefig("total_docks.png")
def top_busy_stations(df):
# Top 10 des stations les plus utilisées (avec le plus de changecount dans le nombre de vélos dispos)
groups_stations = df[['station_name','total_bikes']].groupby(by=df['station_name'])
return
def carto(df):
station_count = df['station_name'].unique().size
max_total_bikes = df['total_bikes'].max()
groups = df.groupby(by=df.index)
# TODO gridspec layout
gs = GridSpec(2, 2, height_ratios=[3, 1])
fig = plt.figure(figsize=(6, 7), dpi=100, clear=True)
map_axes = fig.add_subplot(gs[0, :], projection=tiles.crs)
map_axes.set_extent(paris_extent, crs=data_crs)
# ax.add_image(tiles, 11)
occupation_histo_axes = fig.add_subplot(gs[1, 0])
occupation_histo_axes.set_title("Occupation")
occupation_histo_axes.set_xlabel("occupation %")
occupation_histo_axes.set_ylabel("stations")
occupation_histo_axes.set_xlim(xmin=0, xmax=1)
occupation_histo_axes.set_ylim(ymin=0, ymax=station_count)
bike_count_histo_axes = fig.add_subplot(gs[1, 1], sharey=occupation_histo_axes)
plt.setp(bike_count_histo_axes.get_yticklabels(), visible=False)
bike_count_histo_axes.set_title("Total bikes in station")
bike_count_histo_axes.set_xlabel("bikes")
bike_count_histo_axes.set_xlim(xmin=0, xmax=max_total_bikes)
frames = []
with tempfile.TemporaryDirectory() as tmpdirname:
for ts, events in groups:
# plot data
map_points = map_axes.scatter(events['lon'], events['lat'], s=1, c=events['occupation_ratio'],
cmap=plt.get_cmap('viridis_r'), transform=data_crs)
occupation_histo_axes.hist(events['occupation_ratio'], bins=10, range=[0, 1])
bike_count_histo_axes.hist(events['total_bikes'], range=[0, max_total_bikes])
# set current date as the title of the figure
plt.suptitle(ts.strftime('%Y-%m-%d %H:%M%z'))
# save figure
figfilename = ts.strftime('%Y%m%d%H%M%S.png')
figfilepath = path_join(tmpdirname, figfilename)
plt.savefig(figfilepath)
# load figure as GIF frame
new_frame = Image.open(figfilepath)
frames.append(new_frame.copy())
# clear subplots before next plot
map_points.remove() # just remove points and not map background
occupation_histo_axes.cla()
occupation_histo_axes.set_xlim(xmin=0, xmax=1)
occupation_histo_axes.set_ylim(ymin=0, ymax=station_count)
occupation_histo_axes.set_title("Occupation")
occupation_histo_axes.set_xlabel("occupation %")
occupation_histo_axes.set_ylabel("stations")
bike_count_histo_axes.cla()
bike_count_histo_axes.set_title("Total bikes in station")
bike_count_histo_axes.set_xlabel("bikes")
bike_count_histo_axes.set_xlim(xmin=0, xmax=max_total_bikes)
plt.setp(bike_count_histo_axes.get_yticklabels(), visible=False)
plt.close()
frames[0].save('velib.gif', save_all=True, append_images=frames[1:], duration=(1000 / 60), loop=0)
if __name__ == "__main__":
df = load_data()
# top_busy_stations(df)
# bikes_distribution_over_time(df)
# number_of_docks(df)
carto(df)