Skip to content

Commit

Permalink
Created a base class for both EconomicIndicator and InnoIndicator. Th…
Browse files Browse the repository at this point in the history
…is base class now contains functions that are common to both, such as industries_to_occupations and grid_to_industries. Changed listen accordingly.
  • Loading branch information
crisjf committed May 11, 2020
1 parent 9ea86a7 commit e0e7e91
Show file tree
Hide file tree
Showing 4 changed files with 706 additions and 528 deletions.
122 changes: 60 additions & 62 deletions economic_indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,77 +9,78 @@
import json

from toolbox import Handler, Indicator
from innovation_indicator import InnoIndicator
from indicator_tools import EconomicIndicatorBase

def load_output_per_employee():
industry_ouput=pd.read_csv('./tables/innovation_data/USA_industry_ouput.csv', skiprows=1)
industry_ouput=industry_ouput.set_index('2017 NAICS code')
output_per_employee_by_naics={}
for ind_row, row in industry_ouput.iterrows():
output_per_emp=row['Sales, value of shipments, or revenue ($1,000)']/row['Number of employees']
if '-' in ind_row:
from_code, to_code=ind_row.split('-')
if '(' in to_code:
to_code=to_code.split('(')[0]
for code in range(int(from_code), int(to_code)+1):
output_per_employee_by_naics[str(code)]=output_per_emp
else:
output_per_employee_by_naics[ind_row]=output_per_emp
# if '(' in ind_row:
# ind_row=ind_row.split('(')[0]
# output_per_employee_by_naics[ind_row]=output_per_emp
return output_per_employee_by_naics
# def load_output_per_employee():
# industry_ouput=pd.read_csv('./tables/innovation_data/USA_industry_ouput.csv', skiprows=1)
# industry_ouput=industry_ouput.set_index('2017 NAICS code')
# output_per_employee_by_naics={}
# for ind_row, row in industry_ouput.iterrows():
# output_per_emp=row['Sales, value of shipments, or revenue ($1,000)']/row['Number of employees']
# if '-' in ind_row:
# from_code, to_code=ind_row.split('-')
# if '(' in to_code:
# to_code=to_code.split('(')[0]
# for code in range(int(from_code), int(to_code)+1):
# output_per_employee_by_naics[str(code)]=output_per_emp
# else:
# output_per_employee_by_naics[ind_row]=output_per_emp
# # if '(' in ind_row:
# # ind_row=ind_row.split('(')[0]
# # output_per_employee_by_naics[ind_row]=output_per_emp
# return output_per_employee_by_naics

def get_baseline_employees_by_naics(table_name, table_geoids):
employees_by_naics={}
wac=pd.read_csv('./tables/{}/mi_wac_S000_JT00_2017.csv.gz'.format(table_name))
wac['block_group']=wac.apply(lambda row: str(row['w_geocode'])[:12], axis=1)
wac=wac.loc[wac['block_group'].isin(table_geoids)]
wac_data_full_table=wac.sum(axis=0)
for col in wac:
if 'CNS' in col:
naics=wac_cns_to_naics[col]
if '-' in naics:
naics=naics.split('-')[0]
employees_by_naics[naics]=wac_data_full_table[col]
return employees_by_naics
# def get_baseline_employees_by_naics(table_name, table_geoids):
# employees_by_naics={}
# wac=pd.read_csv('./tables/{}/mi_wac_S000_JT00_2017.csv.gz'.format(table_name))
# wac['block_group']=wac.apply(lambda row: str(row['w_geocode'])[:12], axis=1)
# wac=wac.loc[wac['block_group'].isin(table_geoids)]
# wac_data_full_table=wac.sum(axis=0)
# for col in wac:
# if 'CNS' in col:
# naics=wac_cns_to_naics[col]
# if '-' in naics:
# naics=naics.split('-')[0]
# employees_by_naics[naics]=wac_data_full_table[col]
# return employees_by_naics


wac_cns_to_naics={
'CNS01' : '11',
'CNS02' : '21',
'CNS03' : '22',
'CNS04' : '23',
'CNS05' : '31-33',
'CNS06' : '42',
'CNS07' : '44-45',
'CNS08' : '48-49',
'CNS09' : '51',
'CNS10' : '52',
'CNS11' : '53',
'CNS12' : '54',
'CNS13' : '55',
'CNS14' : '56' ,
'CNS15' : '61',
'CNS16' : '62',
'CNS17' : '71',
'CNS18' : '72',
'CNS19' : '81',
'CNS20' : '92' }
# wac_cns_to_naics={
# 'CNS01' : '11',
# 'CNS02' : '21',
# 'CNS03' : '22',
# 'CNS04' : '23',
# 'CNS05' : '31-33',
# 'CNS06' : '42',
# 'CNS07' : '44-45',
# 'CNS08' : '48-49',
# 'CNS09' : '51',
# 'CNS10' : '52',
# 'CNS11' : '53',
# 'CNS12' : '54',
# 'CNS13' : '55',
# 'CNS14' : '56' ,
# 'CNS15' : '61',
# 'CNS16' : '62',
# 'CNS17' : '71',
# 'CNS18' : '72',
# 'CNS19' : '81',
# 'CNS20' : '92' }

class EconomicIndicator(Indicator):
class EconomicIndicator(EconomicIndicatorBase):
def setup(self,*args,**kwargs):
self.table_name= kwargs['table_name']
self.grid_to_industries=kwargs['grid_to_industries']
self.industries_to_occupations=kwargs['industries_to_occupations']
# self.grid_to_industries=kwargs['grid_to_industries']
# self.industries_to_occupations=kwargs['industries_to_occupations']
self.name=kwargs['name']
sim_zones=json.load(open('./tables/{}/sim_zones.json'.format(self.table_name)))
table_geoids=[z.split('US')[1] for z in sim_zones]
# get the baseline num workers in district by industry NAICS code
self.base_industry_composition=get_baseline_employees_by_naics(self.table_name, table_geoids)
self.base_industry_composition=self.get_baseline_employees_by_naics(self.table_name, table_geoids,return_data=True)
self.base_worker_composition=self.industries_to_occupations(self.base_industry_composition)

self.output_per_employee_by_naics=load_output_per_employee()
# self.output_per_employee_by_naics=self.load_output_per_employee(return_data=True)
self.load_output_per_employee() #This function should load the df without the need of returning it
salary_data=pd.read_excel('./tables/innovation_data/national_M2019_dl.xlsx')
# salary_data=salary_data.set_index('occ_code')
self.code_to_salary={salary_data.iloc[i]['occ_code']: salary_data.iloc[i]['a_mean']
Expand Down Expand Up @@ -140,10 +141,7 @@ def get_total_output(self, industry_composition):
return 1000*total_ouput

def main():
I = InnoIndicator()
E = EconomicIndicator(grid_to_industries=I.grid_to_industries,
industries_to_occupations=I.industries_to_occupations,
table_name='corktown',
E = EconomicIndicator(table_name='corktown',
name='Economic')

H = Handler('corktown', quietly=False)
Expand Down
Loading

0 comments on commit e0e7e91

Please sign in to comment.