-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgeofence.py
180 lines (165 loc) · 7.81 KB
/
geofence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import geopandas as gpd
import pandas as pd
import os
import osmnx as ox
from download_shapeData import SHAPES_PATH,SHAPE_LEVELS
def check_shapes(year_version = '2019',shapesPath=SHAPES_PATH):
'''
Check if shapes have been downloaded
'''
for shape_level in SHAPE_LEVELS:
if not os.path.isdir(os.path.join(shapesPath,year_version+'_'+shape_level.lower())):
raise NameError('Missing shapefiles for:',shape_level)
def getZips(bounds=None,cbsaCode=None,shapesPath=SHAPES_PATH,zipShapes=None,asList=True,quietly=True):
'''
Get the zipcodes for the given bounds or for the given cbsaCode
Parameters
----------
bounds : shapely.Polygon
Polygon of bounds in WGS84 (EPSG:4326).
cbsaCode : str
CBSA Code of metro area to select
shapesPath : str
Path to shapes: SHAPES_PATH.
> SHAPES_PATH = 'tables/shapes/'
zipShapes : geopandas.GeoDataFrame (optional)
If provided, it will not loaded.
Providing the shapes is preferred when running this function multiple times.
> zipShapes = gpd.read_file(os.path.join(SHAPES_PATH,'2019_zcta5','tl_2019_us_zcta510','tl_2019_us_zcta510.shp'),dtype={'ZCTA5CE10':str})
> getZips(bounds=bounds,zipShapes=zipShapes)
asList : boolean (default=True)
If True, it will return a list of zip codes.
If False, it will return all the columns of the filtered GeoDataFrame.
quietly : boolean (default=True)
Used for debugging.
If true, it will print status.
'''
if not quietly:
print('Checking shapes')
check_shapes()
if (bounds is None)&(cbsaCode is not None):
if not quietly:
print('Loading msa shapes')
msas = gpd.read_file(os.path.join(SHAPES_PATH,'2019_cbsa','tl_2019_us_cbsa','tl_2019_us_cbsa.shp'),dtype={'ZCTA5CE10':str})
bounds = msas[msas['GEOID']==cbsaCode].geometry.values[0]
elif (bounds is None)&(cbsaCode is None):
raise NameError('Must provide either cbsaCode or bounds')
if zipShapes is None:
if not quietly:
print('Loading ZIP shapes')
zipShapes = gpd.read_file(os.path.join(SHAPES_PATH,'2019_zcta5','tl_2019_us_zcta510','tl_2019_us_zcta510.shp'),dtype={'ZCTA5CE10':str})
if not quietly:
print('Filtering zips')
zips = zipShapes[zipShapes.geometry.intersection(bounds).area!=0]
if asList:
zips = zips['ZCTA5CE10'].values.tolist()
return zips
def match_polygons(left_df,right_df,left_id,right_id,quietly=True):
'''
Matches two GeoDataFrames with polygons enforcing a one to many match.
Parameters
----------
left_df : geopandas.GeoDataFrame
Shapefile with polygons to be used as reference. Each polygon has a unique match in right_df.
right_df : geopandas.GeoDataFrame
Shapefile with polygons to be used as groups. Each polygon can have multiple matches in left_df.
left_id : str
Column in left_df to be considered as id.
right_id : str
Column in right_df to be considered as id.
quietly : boolean (default=True)
Used for debugging.
If true, it will print status.
Returns
-------
matched : pandas.DataFrame
Table with left_id,right_id columns.
'''
matched = gpd.sjoin(left_df, right_df)
matched = matched[[left_id,right_id]].drop_duplicates()
count = matched.groupby(left_id).count().reset_index()
repeatedSet = set(count[count[right_id]>1][left_id])
if len(repeatedSet)!=0:
if not quietly:
print('Solving repeated matches')
repeated = matched[matched[left_id].isin(repeatedSet)][[left_id,right_id]]
repeated = pd.merge(repeated,left_df[[left_id,'geometry']].rename(columns={'geometry':'geometry_left'}))
repeated = pd.merge(repeated,right_df[[right_id,'geometry']].rename(columns={'geometry':'geometry_right'}))
repeated['area'] = [gb.intersection(gp).area for gb,gp in repeated[['geometry_left','geometry_right']].values]
repeated = repeated.sort_values(by='area',ascending=False).groupby([left_id,right_id]).first()[[]].reset_index()
matched = pd.concat([matched[~matched[left_id].isin(repeatedSet)],repeated])
return matched
def getOSMWeights(bounds,patches,patchIdCol,buildingsRaw=None,quietly=True):
'''
Gets the weight for each polygon in patches to calculate variables within bounds.
It uses building shapes and heights from Open Street Map to infer the weights.
Parameters
----------
bounds : shapely.Polygon
Bounds of area to calcualte indicators for.
patches : geopandas.GeoDataFrame
Table with shapes of patches for which data is available (e.g. census blocks, zip codes, etc)
patchIdCol : str
Name of column to be considered as the ID of each polygon in patches.
E.g. the zipcode.
buildingsRaw : geopandas.GeoDataFrame (optional)
Raw buildings file downloaded from OSM using osmnx.
If not provided, it will be downloaded, which can take a while.
> buildingsRaw = ox.footprints_from_polygon(limit.buffer(0.001),footprint_type='building')
quietly : boolean (default=True)
Used for debugging.
If true, it will print status.
Returns
-------
weights : pandas.DataFrame
Table with patchIdCol,volume,intersected_volume, and weight.
'''
limit = patches.geometry.unary_union
if buildingsRaw is None:
if not quietly:
print('Downloading OSM data')
buildingsRaw = ox.footprints_from_polygon(limit.buffer(0.001),footprint_type='building')
if not quietly:
print('Downloading OSM data')
buildings = buildingsRaw[buildingsRaw.geometry.within(limit)]
buildings = buildings.reset_index().rename(columns={'index':'OSM_id'})
buildings['area'] = buildings.geometry.area
buildings['building:levels'] = buildings['building:levels'].fillna(1).astype(float)
buildings['volume'] = buildings['area']*buildings['building:levels']
buildings['intersected_area'] = buildings.geometry.intersection(bounds).area
buildings['intersected_volume'] = buildings['volume']*buildings['intersected_area']/buildings['area']
buildings = gpd.GeoDataFrame(buildings[['OSM_id','building:levels','area','volume','intersected_area','intersected_volume']],crs={'init':'epsg:4269'},geometry=buildings['geometry'])
if not quietly:
print('Matching buildings')
matched = match_polygons(buildings, patches,'OSM_id',patchIdCol,quietly=quietly)
matched = pd.merge(matched,buildings)
weights = matched.groupby(patchIdCol).sum()[['volume','intersected_volume']].reset_index()
weights['weight'] = weights['intersected_volume']/weights['volume']
return weights
def getAreaWeights(bounds,patches,patchIdCol,quietly=True):
'''
Gets the weight for each polygon in patches to calculate variables within bounds.
It uses the fraction of the area overlapped by the bounds.
This method is much faster than using OSM buildings, but much less precise.
Parameters
----------
bounds : shapely.Polygon
Bounds of area to calcualte indicators for.
patches : geopandas.GeoDataFrame
Table with shapes of patches for which data is available (e.g. census blocks, zip codes, etc)
patchIdCol : str
Name of column to be considered as the ID of each polygon in patches.
E.g. the zipcode.
quietly : boolean (default=True)
Used for debugging.
If true, it will print status.
Returns
-------
weights : pandas.DataFrame
Table with patchIdCol,arrea,intersected_area, and weight.
'''
patches['area'] = patches.geometry.area
patches['intersected_area'] = patches.geometry.intersection(bounds).area
weights = patches[[patchIdCol,'area','intersected_area']]
weights['weight'] = weights['intersected_area']/weights['area']
return weights