Skip to content

Commit

Permalink
Started making Indicator base class more compatible with traditional …
Browse files Browse the repository at this point in the history
…data science development paths (sklearn models and pipelines in particular). This is the first commit towards this end #13.
  • Loading branch information
crisjf committed May 11, 2020
1 parent e0e7e91 commit 820de0a
Showing 1 changed file with 38 additions and 13 deletions.
51 changes: 38 additions & 13 deletions toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import webbrowser
import json
import Geohash
import joblib
from warnings import warn
from time import sleep
from collections import defaultdict
from shapely.geometry import shape

def is_number(s):
try:
Expand Down Expand Up @@ -407,17 +409,24 @@ def _get_url(self,url,params=None):
warn('FAILED TO RETRIEVE URL: '+url)
return r

def geogrid_data(self,include_geometries=False):
def geogrid_data(self,include_geometries=False,as_df=False):
'''
Returns the geogrid data from:
http://cityio.media.mit.edu/api/table/table_name/GEOGRIDDATA
Parameters
----------
include_geometries : boolean (dafault=False)
include_geometries : boolean (default=False)
If True it will also add the geometry information for each grid unit.
'''
return self._get_grid_data(include_geometries=include_geometries)
as_df: boolean (default=False)
If True, it will return data as a DataFrame.
'''
geogrid_data = self._get_grid_data(include_geometries=include_geometries)
if as_df:
geogrid_data = pd.DataFrame(geogrid_data)
if include_geometries:
geogrid_data = gpd.GeoDataFrame(geogrid_data.drop('geometry',1),geometry=geogrid_data['geometry'].apply(lambda x: shape(x)))
return geogrid_data

def perform_update(self,grid_hash_id=None,append=True):
'''
Expand Down Expand Up @@ -490,24 +499,43 @@ def listen(self,showFront=True,append=False):
self.perform_update(grid_hash_id=grid_hash_id,append=append)

class Indicator:
def __init__(self,*args,requires_geometry=False,indicator_type='numeric',viz_type='radar',**kwargs):
def __init__(self,*args,model_path=None,requires_geometry=False,indicator_type='numeric',viz_type='radar',**kwargs):
self.name = None
self.indicator_type = indicator_type
self.viz_type = viz_type
self.requires_geometry = requires_geometry
self.model_path = model_path
self.pickled_model = None

self.setup(*args,**kwargs)
self.load_module()
if self.indicator_type in ['heatmap','access']:
self.viz_type = None

def _transform_geogrid_data_to_df(self,geogrid_data):
'''
Transform the geogrid_data to a DataFrame to be used by a pickled model.
'''
geogrid_data = pd.DataFrame(geogrid_data)
if 'geometry' in geogrid_data.columns:
geogrid_data = gpd.GeoDataFrame(geogrid_data.drop('geometry',1),geometry=geogrid_data['geometry'].apply(lambda x: shape(x)))
return geogrid_data

def restructure(self,geogrid_data):
geogrid_data_df = self._transform_geogrid_data_to_df(geogrid_data)
return geogrid_data_df

def return_indicator(self,geogrid_data):
'''
Function must return either a dictionary, a list, or a number.
When returning a dict follow the format:
{'name': 'Sea-Shell','value': 1.00}
'''
return {}
if self.pickled_model is not None:
geogrid_data_df = self.restructure(geogrid_data_df)
return {'name': self.name, 'value': self.pickled_model.predict(geogrid_data_df)[0]}
else:
return {}

def return_baseline(self,geogrid_data):
'''
Expand All @@ -521,10 +549,7 @@ def setup(self):
pass

def load_module(self):
pass

def train(self):
self.load_train_data()

def load_train_data(self):
pass
if self.model_path is not None:
self.pickled_model = joblib.load(self.model_path)
if self.name is None:
self.name = self.model_path.split('/')[-1].split('.')[0]

0 comments on commit 820de0a

Please sign in to comment.