Skip to content

Commit

Permalink
Script and results of calculating marginal efects (derivative) for kn…
Browse files Browse the repository at this point in the history
…owledge and skills indicator for all 3-digit NAICS categories. Useful for building scenarios #5
  • Loading branch information
crisjf committed May 22, 2020
1 parent b7cd339 commit e7a1719
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
80 changes: 80 additions & 0 deletions marginal_effect_innovation_indicator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pandas as pd
import numpy as np
import joblib
import os
from innovation_indicator import InnoIndicator
from indicator_tools import DataLoader
from APICalls import CBPCall

def industry_to_skills_knowledge(Xdiff,I):
industry_compositions = Xdiff.to_dict('records')
skill_compositions = []
knowledge_compositions = []
for industry_composition in industry_compositions:
worker_composition = I.industries_to_occupations(industry_composition)
skill_composition = I.occupations_to_skills(worker_composition)
knowledge_composition = I.occupations_to_knowledge(worker_composition)
skill_compositions.append(skill_composition)
knowledge_compositions.append(knowledge_composition)
return skill_compositions,knowledge_compositions

def AME_industry(I,col,X):
'''
Numbers should be interpreted as the change in the average indicator (over all X) as a result of a 0.1 increase in the given NAICS code
'''
dx = np.median(np.diff(sorted(X[X[col]!=0][col])))

X_ = X.reset_index().drop('index',1)
X_[col] = X[col]+dx
Xdiff = pd.concat([X.reset_index().drop('index',1),X_]).sort_index().sort_values(by=col).sort_index()

skill_compositions,knowledge_compositions = industry_to_skills_knowledge(Xdiff,I)

Ypred = I.sks_model.predict(pd.DataFrame(skill_compositions))
if I.normalize:
Ypred = I.normalize_value(Ypred,I.sks_bounds)
dsks = np.diff(Ypred)[::2]
dsksdx = (dsks/dx)*0.1

Ypred = I.kno_model.predict(pd.DataFrame(knowledge_compositions))
if I.normalize:
Ypred = I.normalize_value(Ypred,I.kno_bounds)
dkno = np.diff(Ypred)[::2]
dknodx = (dkno/dx)*0.1

return np.mean(dsksdx),np.mean(dknodx)

def main():
outPath = 'tables/innovation_data'
outfpath = os.path.join(outPath,'innovation_marginal_effect.csv')
if os.path.isfile(outfpath):
print('Marginal effects already stored. To recompute, delete the current results located at: {}'.format(outfpath))
else:
print('Loading indicator and employment by industry for each MSA')
I = InnoIndicator()

data = DataLoader()
data.load_MSA_emp_byInd()

X = pd.pivot_table(data.emp_msa_ind,values='EMP',index='MSA',columns='NAICS2017').fillna(0)
X = X.assign(TOTAL=X.sum(1))
for c in set(data.emp_msa_ind['NAICS2017']):
X[c] = X[c]/X['TOTAL']
X = X.drop('TOTAL',1)
X = X.reset_index().drop('MSA',1)

kno_ames = {}
sks_ames = {}
for col in X.columns:
print('\tDerivating with respect to NAICS: {}'.format(col))
sks_ame,kno_ame = AME_industry(I,col,X)
sks_ames[col] = sks_ame
kno_ames[col] = kno_ame

print('Combining and saving results')
ames = pd.DataFrame(sks_ames.items(),columns=['NAICS','SKS_AME'])
ames = pd.merge(ames,pd.DataFrame(kno_ames.items(),columns=['NAICS','KNO_AME']))
ames.to_csv(outfpath,index=False)

if __name__ == '__main__':
main()
87 changes: 87 additions & 0 deletions tables/innovation_data/innovation_marginal_effect.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
NAICS,SKS_AME,KNO_AME
113,-0.0010081329749168442,0.0008215616681947015
114,0.0,0.0
115,0.005331352440146175,0.011326441277018682
211,-0.0015025807533984355,0.0008727175479176082
212,-0.00035925773436701986,-0.0007390407141070343
213,0.0004994676691068528,-0.0012624333154087786
221,0.0012717248107538392,0.0021578503881758003
236,-0.0009218064507108101,0.004056471430585033
237,-0.0036318999212354964,-0.004087177913570991
238,0.00012192234318997239,0.0014465750123501494
311,-0.0007920641135756543,0.00048727517217120444
312,0.0006912557924184535,0.000378877323935994
313,-0.0008681383864413026,-0.0007415169340151791
314,0.002605409338321021,0.00029021347961229687
315,0.0008351872401136031,-0.0004995439730953404
316,-0.00037784841909284795,0.0006462865831488685
321,0.0007712254688466172,0.0013748053406052701
322,-0.0005742015711983488,0.0001301004731152185
323,-0.0021770199500289887,-0.0014374384276624195
324,-0.0009020907222388108,0.0021950026169347612
325,0.0005900995245826787,5.6282859756566593e-05
326,-0.004050443400593441,0.0022943663727317213
327,-0.0011019174308332397,0.0002722640345279
331,0.0021785895005027142,-0.003180449721115953
332,0.0006309533853245033,-0.0012011461747499944
333,-0.0006700880509807163,0.00033729078076171696
334,0.009457699998955588,0.0026595338159675014
335,-6.812186507010933e-05,0.0001505735932133951
336,0.002684985253620547,-0.0004346476796265838
337,-0.0022194235070368733,0.0007283928577279558
339,0.00024657115478763796,0.0004552130679293148
423,0.000980348626829013,-0.00012240419634919995
424,0.0028585031860447244,-0.0013640729501855245
425,-0.0006581477579017459,0.0029901973101106594
441,-0.005120459202580126,-0.00017373417425580956
442,-0.005579793646584501,-0.0007661136789298016
443,0.002831436438755288,0.0005804020987567821
444,-0.002325733263611379,-0.0004236978234214252
445,0.0007450184549293255,-8.560687701874633e-05
446,0.001976017961614613,0.0008976078209224188
447,-0.0027217517949322165,-3.31348299158954e-06
448,-0.0002201055404436805,-0.0018945687511926143
451,0.0034184030449191725,0.0005028724194009636
452,0.0010993939752942585,0.0005838967574243104
453,-0.00021441928545249086,-0.0008171131682508174
454,-0.001509811042408386,0.000667948223966807
481,1.0731470969637641e-05,0.0008036746906909888
483,-0.0010180284170084053,-0.00029025819656784273
484,-0.0009932937403081285,-0.00021835355195150565
485,-0.005140368085304926,0.0028584966190532885
486,0.00017445928210434923,-0.0017348955172126709
487,0.00034084211476076643,-0.0001860268444278296
488,7.136969839214075e-06,-1.3012943242055966e-05
492,0.002940692225247223,0.0009131003130846011
493,-0.0017003402156584688,-0.0004919760811171008
511,0.006545675562925853,0.005209444915301953
512,0.0018400022425876976,0.00022118277593453534
515,-0.0007572938394203913,0.0001905167884248805
517,0.003780290065944719,0.0018535698429968611
518,0.007066267626448401,0.0017205707794635619
519,0.006340530962522851,0.004909676217487644
521,,
522,0.002989881333536439,-0.0006263415090439653
523,0.010117050315383371,0.005179954710735036
524,0.005970379078332785,0.005942656964698942
525,-0.0024746912944443164,-0.002492728085617204
531,-0.0026761308198427246,0.0011065176571430641
532,0.003057936791174244,-0.0002578914690973307
533,-0.0014302442826843874,-0.001626236364408166
541,0.004129255635239373,0.004002397453786828
551,0.0015038042996112022,0.0019835075020518487
561,0.0007125317435620928,0.00027453783165628734
562,0.00046792847602607305,-5.6425724486557e-06
611,-0.004514909849051968,0.00417515804167604
621,-0.003462907993533807,0.0004542386953435541
622,0.0017909137203057617,-0.00041915996973275146
623,-0.0006887317918564533,-8.012582147660481e-05
624,0.0034739200696393283,-0.0005142657021886784
711,0.0030895218866598744,-0.0002975022669487763
712,0.0017882313496743,0.0008387966154144958
713,1.3569027570167881e-05,0.0001124705443953574
721,5.3942584788079756e-05,-0.0008892446534328178
722,-0.006038700331290567,-0.00035115379473005774
811,0.0009701240128918857,0.0006104933376387998
812,0.0021436587241087197,0.0023389056259735786
813,-0.002415677049105057,-0.0036722262871510857

0 comments on commit e7a1719

Please sign in to comment.