-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStateGraph.py
192 lines (163 loc) · 6.8 KB
/
StateGraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#########ASYNCIO version of StateGraph
# StateGraph - used to pull data from pubmed through an api
from aiohttp import ClientSession
from aiohttp import TCPConnector
import asyncio
import urllib
from urllib.parse import quote
#import concurrent.futures
#import requests
import json
from bokeh.sampledata import us_states as usstat
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool, CustomJS, OpenURL, TapTool, Range1d
from bokeh.models.widgets import Panel, Tabs, Div
from bokeh.layouts import layout
import pickle
#initialize variables
dataDir = "./static/"
moneyFile = "FundingPerState2016.pkl"
# affiliation = AD
searchField = "[AD]"
us_states = usstat.data.copy()
del us_states["HI"]
del us_states["AK"]
state_xs = [us_states[code]["lons"] for code in us_states]
state_ys = [us_states[code]["lats"] for code in us_states]
async def fetchStates(url, session):
async with session.get(url) as response:
return await response.text()
###add states correlating with responses############
async def runStates(states, ss, sd, ed):
tasks = []
# Fetch all responses within one Client session,
# keep connection alive for all requests.
async with ClientSession(connector = TCPConnector(limit=10)) as session:
for state in states:
tu = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=" + state + searchField+"+AND+"+ss+"&mindate="+sd+"&maxdate="+ed+"&usehistory=y&retmode=json"
task = asyncio.ensure_future(fetchStates(tu, session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
#responses can be converted to json, originally were strings
#print(json.loads(responses[0]))
return responses
# gets data from each state while string : ss=searchstring
def getStates(ss,sd,ed):
#start threads and create queue of URLs
loop = asyncio.get_event_loop()
states = [us_states[state]["name"] for state in us_states]
future = asyncio.ensure_future(runStates(states,ss,sd,ed))
res = loop.run_until_complete(future)
#print(res)
for idx, state in enumerate(us_states):
search_data = json.loads(res[idx])
#webenv = search_data["esearchresult"]['webenv']
total_records = int(search_data["esearchresult"]['count'])
us_states[state]["count"] = total_records
#print(total_records)
def stateGraph(si,sd,ed):
ss = quote(si)
##FOR SEARCHING YOU WILL NEED TO ESCAPE SPACES AND SPECIAL CHARS
getStates(ss,sd,ed)
# for state in us_states:
# print(state)
# print(us_states[state]["count"])
# unnormalized to money version
state_counts = [us_states[code]["count"] for code in us_states]
state_names = [us_states[code]["name"] for code in us_states]
state_counts_norm = state_counts
state_raw_counts = state_counts
max_state_counts = max(state_counts)
if(max_state_counts > 0):
state_counts = [x / max_state_counts for x in state_counts]
else:
state_counts = [x for x in state_counts]
# normalized to money
fbs = pickle.load(open(dataDir + moneyFile, "rb"))
state_counts_norm = [us_states[code]["count"] / fbs[us_states[code]["name"]] for code in us_states]
max_state_counts_norm = max(state_counts_norm)
if(max_state_counts_norm > 0):
state_counts_norm = [x / max_state_counts_norm for x in state_counts_norm]
else:
state_counts_norm = [x for x in state_counts_norm]
stateSource = ColumnDataSource(
data=dict(
x=state_xs,
y=state_ys,
state_names = state_names,
state_raw_counts = state_raw_counts,
alphas = state_counts
)
)
stateNormSource = ColumnDataSource(
data=dict(
x=state_xs,
y=state_ys,
state_names = state_names,
state_raw_counts = state_raw_counts,
alphas = state_counts_norm
)
)
hoverState = HoverTool(
tooltips="""
<div>
<div style="max-width: 400px;">
<span style="font-size: 12px; font-weight: bold;">@state_names</span>
</div>
<div style="max-width: 400px;">
<span style="font-size: 12px; color: #966;">Total number of articles:</span>
<span style="font-size: 12px; color: #966;">@state_raw_counts</span>
<div>
</div>
"""
)
hoverStateNorm = HoverTool(
tooltips="""
<div>
<div style="max-width: 400px;">
<span style="font-size: 12px; font-weight: bold;">@state_names</span>
</div>
<div style="max-width: 400px;">
<span style="font-size: 12px; color: #966;">Total Number of Articles:</span>
<span style="font-size: 12px; color: #966;">@state_raw_counts</span>
<div>
<div style="max-width: 400px;">
<span style="font-size: 12px; color: #966;">Fractional publication rate (norm. by funding):</span>
<span style="font-size: 12px; color: #966;">@alphas</span>
<div>
</div>
"""
)
TOOLS = 'pan,wheel_zoom,tap,reset'
p = figure(title="Publications containing: " + si,
toolbar_location="left", plot_width=800, plot_height=510, tools=[TOOLS,hoverState], active_scroll='wheel_zoom')
p2 = figure(title="Publications containing: "+si+" (Normalized by NIH funding)",
toolbar_location="left", plot_width=800, plot_height=510, tools=[TOOLS,hoverStateNorm], active_scroll='wheel_zoom')
p.xaxis.visible = False
p.xgrid.visible = False
p.yaxis.visible = False
p.ygrid.visible = False
p2.xaxis.visible = False
p2.xgrid.visible = False
p2.yaxis.visible = False
p2.ygrid.visible = False
#p.circle('x', 'y',fill_color='colors', fill_alpha='alphas', size=12, source=source)
p.patches('x', 'y', fill_color="#377BA8", fill_alpha='alphas',
line_color="#884444", line_width=1.5, source=stateSource)
p2.patches('x', 'y', fill_color="#377BA8", fill_alpha='alphas',
line_color="#884444", line_width=1.5, source=stateNormSource)
#setting up tabs
t1 = Panel(child=p, title= "Publication Count")
t2 = Panel(child=p2, title= "Publication Count (Normalized)*")
ctabs = Tabs(tabs=[t1,t2],width=800)
tit1 = Div(text="<h1>Geography of "+si+" publications</h1>",width=930)
lt = layout([[tit1],[ctabs]])
#show(p)
#show(p2)
return lt
# p, p2 = stateGraph("prc2")
# show(p)
# show(p2)
# stateGraph("lung")
# stateGraph("breast")