-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetCovidData.sh
executable file
·66 lines (59 loc) · 3.15 KB
/
getCovidData.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
#
# This script pulls all the COVID-19 data for Connecticut data from
# various sources and assembles them in a single data file.
#
# Data sources:
# - The COVID Tracking Project (testing data) using their API
# https://covidtracking.com
# (- JHU CSSE COVID-19 data set, from local clone of their GitHub repository
# https://github.com/CSSEGISandData/COVID-19.git
# no longer being used as format keeps changing)
#
# This script can be adapted for other regions by changing the commands
# specific for Connecticut
#
# Written by Pedro Mendes <[email protected]>
# Copyright 2020 Pedro Mendes and University of Connecticut
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# filename for CT data
TARGET_CT=CT-COVID19.tsv
TARGET_NY=NY-COVID19.tsv
# location of the local JHU data folder
#JHUDIR=COVID-19/csse_covid_19_data/csse_covid_19_daily_reports
# filter Connecticut data from JHU data files, removing duplicate lines
#grep -h Connecticut $JHUDIR/*.csv | awk -F, 'OFS="," {print $3,$4,$5,$6}' | uniq > jhudata.csv
# get data from covidtracking.org just for Connecticut
curl -s https://covidtracking.com/api/states/daily.csv?state=CT | \
awk -F, 'BEGIN {OFS=","} NR == 1 {print $1,$3,$4,$5,$6,$16,$15,$17}; NR > 1 {print $1,$3,$4,$5,$6,$16,$15,$17 | "sort"}' |\
awk -F, 'BEGIN {OFS="\t"} NR == 1 {print "day",$2,$3,$4,$5,$6,$7,$8,$1}; NR > 1 {print NR-3,$2,$3,$4,$5,$6,$7,$8,$1}' \
> $TARGET_CT
# get data from covidtracking.org just for NY
curl -s https://covidtracking.com/api/states/daily.csv?state=NY | \
awk -F, 'BEGIN {OFS=","} NR == 1 {print $1,$3,$4,$5,$6,$16,$15,$17}; NR > 1 {print $1,$3,$4,$5,$6,$16,$15,$17 | "sort"}' |\
awk -F, 'BEGIN {OFS="\t"} NR == 1 {print "day",$2,$3,$4,$5,$6,$7,$8,$1}; NR > 1 {print NR-3,$2,$3,$4,$5,$6,$7,$8,$1}' \
> $TARGET_NY
# get statewide data for Connecticut from data.ct.gov
curl -s https://data.ct.gov/resource/rf3k-f8fg.csv > CTstate.csv
# get county level data for Connecticut from data.ct.gov
curl -s https://data.ct.gov/resource/bfnu-rgqt.csv >CTcounty.csv
# get age group level data for Connecticut from data.ct.gov
curl -s https://data.ct.gov/resource/ypz6-8qyf.csv > CTage.csv
# we're done, isn't unix cute?