Skip to content

Commit f73a587

Browse files
committed
update document according to feedback from @brookslogan
1 parent 6d31320 commit f73a587

File tree

3 files changed

+134
-107
lines changed

3 files changed

+134
-107
lines changed

R/epi_df.R

+22-13
Original file line numberDiff line numberDiff line change
@@ -113,44 +113,53 @@ NULL
113113
#' @export
114114
#' @examples
115115
#' # Convert a `tsibble` that has county code as an extra key
116-
#' ex1 <- tsibble::tibble(
116+
#' # Notice that county code should be a character string to preserve any leading zeroes
117+
#'
118+
#' # `other_keys` are specified in the `key` parameter
119+
#' # in the `as_tsibble()` function, along with the primary key
120+
#' ex1_input <- tibble::tibble(
117121
#' geo_value = rep(c("ca", "fl", "pa"), each = 3),
118-
#' county_code = c(06059,06061,06067,
119-
#' 12111,12113,12117,
120-
#' 42101, 42103,42105),
122+
#' county_code = c("06059","06061","06067",
123+
#' "12111","12113","12117",
124+
#' "42101", "42103","42105"),
121125
#' time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
122126
#' by = "day"), length.out = length(geo_value)),
123127
#' value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value))
124128
#' ) %>%
125129
#' tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code))
126130
#'
127-
#' ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03")
131+
#' ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03")
128132
#' attr(ex1,"metadata")
129133
#'
130-
#' # Dealing with misspecified column names
131-
#' ex2 <- tsibble::tibble(
134+
#' # Dealing with misspecified column names:
135+
#' # Geographical and temporal information must be provided in columns named
136+
#' # `geo_value` and `time_value`; if we start from a data frame with a
137+
#' # different format, it must be converted to use `geo_value` and `time_value`
138+
#' # before calling `as_epi_df`.
139+
#'
140+
#' ex2_input <- tibble::tibble(
132141
#' state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
133142
#' pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
134143
#' reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
135144
#' by = "day"), length.out = length(state)), # misnamed
136145
#' value = 1:length(state) + 0.01 * rnorm(length(state))
137-
#' ) %>% data.frame()
146+
#' )
138147
#'
139-
#' head(ex2)
148+
#' print(ex2_input)
140149
#'
141-
#' ex2 <- ex2 %>% dplyr::rename(geo_value = state, time_value = reported_date) %>%
150+
#' ex2 <- ex2_input %>% dplyr::rename(geo_value = state, time_value = reported_date) %>%
142151
#' as_epi_df(geo_type = "state", as_of = "2020-06-03",
143152
#' additional_metadata = c(other_keys = "pol"))
144153
#'
145154
#' attr(ex2,"metadata")
146155
#'
147156
#' # Adding additional keys to an `epi_df` object
148157
#'
149-
#' ex3 <- jhu_csse_county_level_subset %>%
150-
#' filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
158+
#' ex3_input <- jhu_csse_county_level_subset %>%
159+
#' dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
151160
#' dplyr::slice_tail(n = 6)
152161
#'
153-
#' ex3 <- ex3 %>%
162+
#' ex3 <- ex3_input %>%
154163
#' tsibble::as_tsibble() %>% # needed to add the additional metadata
155164
#' dplyr::mutate(state = rep("MA",6)) %>%
156165
#' as_epi_df(additional_metadata = c(other_keys = "state"))

man/as_epi_df.Rd

+22-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/epiprocess.Rmd

+90-81
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).
5858
library(delphi.epidata)
5959
library(epiprocess)
6060
library(dplyr)
61+
library(withr)
6162
6263
cases <- covidcast(
6364
data_source = "jhu-csse",
@@ -127,6 +128,91 @@ x <- as_epi_df(cases) %>%
127128
attributes(x)$metadata
128129
```
129130

131+
## Using additional key columns in `epi_df`
132+
In the following examples we will show how to create an `epi_df` with additional keys.
133+
134+
### Converting a `tsibble` that has county code as an extra key
135+
```{r}
136+
ex1 <- tibble(
137+
geo_value = rep(c("ca", "fl", "pa"), each = 3),
138+
county_code = c("06059","06061","06067",
139+
"12111","12113","12117",
140+
"42101","42103","42105"),
141+
time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
142+
by = "day"), length.out = length(geo_value)),
143+
value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value)))
144+
) %>%
145+
as_tsibble(index = time_value, key = c(geo_value, county_code))
146+
147+
ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03")
148+
```
149+
150+
The metadata now includes `county_code` as an extra key.
151+
```{r}
152+
attr(ex1,"metadata")
153+
```
154+
155+
156+
### Dealing with misspecified column names
157+
158+
`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error.
159+
```{r, error = TRUE}
160+
data.frame(
161+
state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
162+
pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
163+
reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
164+
by = "day"), length.out = length(geo_value)), # misnamed
165+
value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value)))
166+
) %>% as_epi_df()
167+
```
168+
169+
The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`.
170+
```{r}
171+
ex2 <- tibble(
172+
state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
173+
pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
174+
reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
175+
by = "day"), length.out = length(state)), # misnamed
176+
value = 1:length(state) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(state)))
177+
) %>% data.frame()
178+
179+
head(ex2)
180+
181+
ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>%
182+
as_epi_df(geo_type = "state", as_of = "2020-06-03",
183+
additional_metadata = c(other_keys = "pol"))
184+
185+
attr(ex2,"metadata")
186+
```
187+
188+
189+
### Adding additional keys to an `epi_df` object
190+
191+
In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object.
192+
193+
We use a toy data set included in `epiprocess` prepared using the `covidcast` library and are filtering to a single state for simplicity.
194+
195+
```{r}
196+
ex3 <- jhu_csse_county_level_subset %>%
197+
filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
198+
slice_tail(n = 6)
199+
200+
attr(ex3,"metadata") # geo_type is county currently
201+
```
202+
203+
Now we add state (MA) as a new column and a key to the metadata. Reminder that lower case state name abbreviations are what we would expect if this were a `geo_value` column.
204+
```{r}
205+
206+
ex3 <- ex3 %>%
207+
as_tibble() %>% # needed to add the additional metadata
208+
mutate(state = rep(tolower("MA"),6)) %>%
209+
as_epi_df(additional_metadata = c(other_keys = "state"))
210+
211+
attr(ex3,"metadata")
212+
```
213+
214+
Setting these other keys affects the default `epi_slide` behavior, since the grouping is
215+
130216
## Working with `epi_df` objects downstream
131217

132218
Data in `epi_df` format should be easy to work with downstream, since it is a
@@ -199,88 +285,11 @@ ggplot(x, aes(x = time_value, y = cases)) +
199285
labs(x = "Date", y = "Confirmed cases of Ebola in Sierra Leone")
200286
```
201287

202-
## Examples on Additional Keys in epi_df
203-
In the following examples we will show how to create an `epi_df` with additional keys.
204-
205-
### Convert a `tsibble` that has county code as an extra key
206-
```{r}
207-
ex1 <- tibble(
208-
geo_value = rep(c("ca", "fl", "pa"), each = 3),
209-
county_code = c(06059,06061,06067,
210-
12111,12113,12117,
211-
42101, 42103,42105),
212-
time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
213-
by = "day"), length.out = length(geo_value)),
214-
value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value))
215-
) %>%
216-
as_tsibble(index = time_value, key = c(geo_value, county_code))
217-
218-
ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03")
219-
```
220-
221-
The metadata now includes `county_code` as an extra key.
222-
```{r}
223-
attr(ex1,"metadata")
224-
```
225-
226-
227-
### Dealing with misspecified column names
228-
229-
`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error.
230-
```{r, error = TRUE}
231-
data.frame(
232-
state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
233-
pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
234-
reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
235-
by = "day"), length.out = length(geo_value)), # misnamed
236-
value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value))
237-
) %>% as_epi_df()
238-
```
239-
240-
The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`.
241-
```{r}
242-
ex2 <- tibble(
243-
state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
244-
pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
245-
reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
246-
by = "day"), length.out = length(state)), # misnamed
247-
value = 1:length(state) + 0.01 * rnorm(length(state))
248-
) %>% data.frame()
249-
250-
head(ex2)
251-
252-
ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>%
253-
as_epi_df(geo_type = "state", as_of = "2020-06-03",
254-
additional_metadata = c(other_keys = "pol"))
255-
256-
attr(ex2,"metadata")
257-
```
258-
259-
260-
### Adding additional keys to an `epi_df` object
261288

262-
In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object.
263-
264-
We use a subset dataset from the the `covidcast` library.
265-
266-
```{r}
267-
ex3 <- jhu_csse_county_level_subset %>%
268-
filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
269-
slice_tail(n = 6)
270-
271-
attr(ex3,"metadata") # geo_type is county currently
272-
```
273-
274-
Now we add state (MA) as a new column and a key to the metadata.
275-
```{r}
276-
277-
ex3 <- ex3 %>%
278-
as_tsibble() %>% # needed to add the additional metadata
279-
mutate(state = rep("MA",6)) %>%
280-
as_epi_df(additional_metadata = c(other_keys = "state"))
281-
282-
attr(ex3,"metadata")
283-
```
284289

290+
## Attribution
291+
This document contains dataset that is a modified part of the [COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University](https://github.com/CSSEGISandData/COVID-19) as [republished in the COVIDcast Epidata API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html). This data set is licensed under the terms of the [Creative Commons Attribution 4.0 International license](https://creativecommons.org/licenses/by/4.0/) by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. Copyright Johns Hopkins University 2020.
285292

293+
[From the COVIDcast Epidata API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html):
294+
These signals are taken directly from the JHU CSSE [COVID-19 GitHub repository](https://github.com/CSSEGISandData/COVID-19) without changes.
286295

0 commit comments

Comments
 (0)