Skip to content

Commit b145eb5

Browse files
author
skaltman
committed
update
1 parent 41d94c5 commit b145eb5

31 files changed

+512
-6
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Book
1+
# Data Wrangling
22

3-
This repository contains the source for the book [Book](https://dcl-book.stanford.edu/).
3+
This repository contains the source for the book [Data Wrangling](https://dcl-wrangle.stanford.edu/).
44

55
We welcome suggestions for improvements.
66

_bookdown.yml

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ delete_merged_file: true
33

44
rmd_files: [
55
"index.Rmd",
6+
"tidy.Rmd",
7+
"pivot.Rmd",
68
"references.Rmd"
79
]
810

_output.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ bookdown::gitbook:
44
toc:
55
collapse: section
66
before: |
7-
<li><strong><a href="./">Modeling</a></strong></li>
7+
<li><strong><a href="./">Data Wrangling</a></strong></li>
88
edit:
9-
link: https://github.com/dcl-docs/model/edit/master/%s
9+
link: https://github.com/dcl-docs/wrangle/edit/master/%s
1010
download: null
1111
sharing: null

data/acs/acs.R

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Description
2+
3+
# Author: Sara Altman
4+
# Version: 2019-11-14
5+
6+
# Libraries
7+
library(tidyverse)
8+
library(tidycensus)
9+
10+
# Parameters
11+
vars <-
12+
c(
13+
"income" = "B06011_001",
14+
"rent" = "B25064_001",
15+
"n_poverty" = "B05010_001",
16+
"n_renters" = "B25008_003"
17+
)
18+
file_out <- here::here("data/acs/acs_2012_2017.rds")
19+
#===============================================================================
20+
21+
get_acs_vars <- function(variables, year) {
22+
get_acs(
23+
geography = "state",
24+
variables = variables,
25+
year = year
26+
)
27+
}
28+
# B25008_003 - renters for v17
29+
# B25008_003
30+
# Retrieve the data
31+
v <-
32+
get_acs_vars(vars, 2012) %>%
33+
bind_rows("2012" = ., "2017" = get_acs_vars(vars, 2017), .id = "year") %>%
34+
select(GEOID, NAME, year, variable, estimate) %>%
35+
rename_all(str_to_lower) %>%
36+
write_rds(file_out)

data/acs/acs_2012_2017.rds

25.3 KB
Binary file not shown.

data/eagles/eagle_nests.rds

344 Bytes
Binary file not shown.

data/eagles/eagle_nests_tidy.rds

487 Bytes
Binary file not shown.

data/eagles/eagle_nests_tidy_ci.rds

621 Bytes
Binary file not shown.

data/eagles/eagle_pairs.rds

3.53 KB
Binary file not shown.

data/eagles/nests.R

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Eagle nests data from US Fish and Wildlife
2+
3+
# Source: https://www.fws.gov/migratorybirds/pdf/management/EagleRuleRevisions-StatusReport.pdf#page=47&zoom=100,0,700
4+
5+
# Author: Sara Altman
6+
# Version: 2019-11-13
7+
8+
# Libraries
9+
library(tidyverse)
10+
11+
# Parameters
12+
file_eagle_nests <- here::here("data/eagles/eagle_nests.rds")
13+
file_eagle_nests_tidy <- here::here("data/eagles/eagle_nests_tidy.rds")
14+
file_eagle_nests_tidy_ci <- here::here("data/eagles/eagle_nests_tidy_ci.rds")
15+
#===============================================================================
16+
17+
se_2009 <-
18+
c(Pacific = 514, Southwest = 57, `Rocky Mountains and Plains` = 57)
19+
20+
eagle_nests <-
21+
tribble(
22+
~region, ~`2007`, ~`2009`,
23+
"Pacific", 1039, 2587,
24+
"Southwest", 51, 176,
25+
"Rocky Mountains and Plains", 200, 338
26+
) %>%
27+
write_rds(file_eagle_nests)
28+
29+
eagle_nests_tidy <-
30+
eagle_nests %>%
31+
pivot_longer(cols = -region, names_to = "year", values_to = "num_nests") %>%
32+
write_rds(file_eagle_nests_tidy)
33+
34+
eagle_nests_tidy %>%
35+
mutate(se = if_else(year == 2009, se_2009[region], NA_real_)) %>%
36+
write_rds(file_eagle_nests_tidy_ci)

data/eagles/pairs.R

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Eagle pairs data from US Fish and Wildlife
2+
3+
# Author: Sara Altman
4+
# Version: 2019-11-14
5+
6+
# Libraries
7+
library(tidyverse)
8+
library(rvest)
9+
10+
# Parameters
11+
url_data <- "https://www.fws.gov/midwest/eagle/NestingData/nos_state_tbl.html"
12+
css_selector <- "#rightCol > table > tr:nth-child(2) > td > table"
13+
file_out <- here::here("data/eagles/eagle_pairs.rds")
14+
years <- vars(`1997`:`2006`)
15+
#===============================================================================
16+
state_abbreviations <-
17+
state.abb %>%
18+
discard(~ . %in% c("AK", "HI"))
19+
20+
v <-
21+
url_data %>%
22+
read_html() %>%
23+
html_node(css = css_selector) %>%
24+
html_table() %>%
25+
as_tibble() %>%
26+
janitor::row_to_names(row_number = 1) %>%
27+
rename(state = nn) %>%
28+
filter(state != "") %>%
29+
arrange(state) %>%
30+
mutate(state_abb = state_abbreviations) %>%
31+
mutate_at(
32+
vars(-starts_with("state")),
33+
~ na_if(., "") %>%
34+
str_replace(pattern = '(".*")|(b.*)', replacement = NA_character_) %>%
35+
as.integer()
36+
) %>%
37+
select(state, state_abb, !!!years) %>%
38+
write_rds(file_out)

data/migration/migration.R

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Migration data
2+
3+
# Author: Sara Altman
4+
# Version: 2019-11-14
5+
6+
# Libraries
7+
library(tidyverse)
8+
library(readxl)
9+
10+
# Parameters
11+
file_migration_answers <-
12+
fs::path(
13+
admin::dir_github(),
14+
"stanford-datalab",
15+
"data",
16+
"migration",
17+
"answers.rds"
18+
)
19+
20+
YEAR <- 2017
21+
22+
destinations <- c("Albania", "Bulgaria", "Romania")
23+
origins <- vars("Afghanistan", "Canada", "India", "Japan", "South Africa")
24+
file_out <- here::here("data/migration/migration_2017.rds")
25+
#===============================================================================
26+
27+
migration <- read_rds(file_migration_answers)$q1
28+
29+
v <-
30+
migration %>%
31+
mutate_all(na_if, "..") %>%
32+
filter(dest %in% destinations, year == YEAR) %>%
33+
select(dest, !!!origins) %>%
34+
arrange(dest) %>%
35+
write_rds(file_out)

data/migration/migration.rds

494 Bytes
Binary file not shown.

data/migration/migration_2017.rds

494 Bytes
Binary file not shown.
494 Bytes
Binary file not shown.

images/pivot/Untitled.png

29.7 KB
Loading

images/pivot/longer_1.png

147 KB
Loading

images/pivot/longer_2.png

166 KB
Loading

images/pivot/longer_3.png

65.5 KB
Loading

images/pivot/longer_names.png

71.6 KB
Loading

images/pivot/longer_names_values.png

65.8 KB
Loading

images/pivot/longer_values.png

71.8 KB
Loading

images/pivot/missing_values.png

110 KB
Loading
70.5 KB
Loading

images/pivot/tidy_elements.png

123 KB
Loading

images/pivot/wider_1.png

167 KB
Loading

images/pivot/wider_names.png

152 KB
Loading

images/pivot/wider_values.png

165 KB
Loading

index.Rmd

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
---
22
knitr: bookdown::render_book
3-
title: "Book"
3+
title: "Data Wrangling"
44
author: "Sara Altman, Bill Behrman"
55
date: "`r Sys.Date()`"
66
description: "Description."
7-
url: 'https\://dcl-book.stanford.edu'
7+
url: 'https\://dcl-wrangle.stanford.edu'
88
github-repo: dcl-docs/book
99
site: bookdown::bookdown_site
1010
documentclass: book

0 commit comments

Comments
 (0)