-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_plots_and_stats.R
153 lines (135 loc) · 6.24 KB
/
create_plots_and_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
library(here)
library(tidyverse)
library(zoo)
library(ggpubr)
library(MetBrewer)
setwd(here::here("data"))
d <- readr::read_csv( "ecology_statistical_ecology_classification_v01.csv" )
d |>
dplyr::filter( omit == "keep" ) |>
dplyr::group_by( year ) |>
dplyr::add_count( ) |>
dplyr::mutate( n_statistical = sum( statistical == "yes")) |>
dplyr::summarise( prop_statistical = n_statistical / n ) |>
dplyr::distinct() |>
dplyr::filter( year < 2023 ) |>
dplyr::ungroup() |>
dplyr::mutate(prop_ra10 = zoo::rollmean(prop_statistical, 10, na.pad = TRUE)) |>
ggplot2::ggplot(aes(x = year)) +
ggplot2::geom_line(aes(y = prop_statistical), linewidth = 0.5, color = "gray40", linetype = "dashed") +
ggplot2::geom_line(aes(y = prop_ra10), linewidth = 1.5, color = "darkred") +
ggplot2::theme_classic() +
ggplot2::labs( x = "Year",
y = expression( paste("Proportion of statistical ecology papers in", italic(" Ecology")))) +
ggplot2::theme( axis.text = element_text(size = 10, color = "black"),
axis.title = element_text(size = 11, color = "black"),
axis.line = element_line(size = 0.2, color = "gray30"),
axis.ticks = element_line(size = 0.2, color = "gray30"))
setwd(here::here("figures"))
ggsave(
"figure_01.png",
width = 4.75,
height = 4,
units = "in",
dpi = 300
)
pal <- MetBrewer::MetPalettes$Greek[[1]][1:4]
d |>
dplyr::filter(omit == "keep") |>
dplyr::filter( statistical == "yes") |>
dplyr::filter(citations >= q75_citations) |>
tidyr::pivot_longer(individual:tools_practices, names_to = "category", values_to = "value") |>
dplyr::filter(value == 1) |>
dplyr::mutate( category = ifelse(category == "validation_selection", "Model validation & selection",
ifelse(category == "tools_practices", "Tools & best practices", category))) |>
dplyr::mutate(category = stringr::str_to_sentence(category)) |>
dplyr::mutate(category = factor(category,
levels = c("Individual",
"Population",
"Community",
"Ecosystem",
"Spatial",
"Model validation & selection",
"Tools & best practices"))) |>
dplyr::mutate(time_period = ifelse(year <= 1950, "1920-1950",
ifelse(year > 1950 & year <= 1975, "1951-1975",
ifelse(year > 1975 & year <= 2000, "1976-2000",
"2001-2023")))) |>
dplyr::mutate(time_period = factor(time_period,
levels = c(
"2001-2023",
"1976-2000",
"1951-1975",
"1920-1950"))) |>
ggplot2::ggplot(aes(x = category, fill = time_period)) +
ggplot2::geom_bar() +
ggplot2::theme_classic() +
ggplot2::scale_fill_manual("Time period",
values = pal) +
ggplot2::ylab("Number of papers") +
ggpubr::rotate_x_text(angle = 45) +
ggplot2::theme(strip.background = element_blank(),
axis.title.x = element_blank(),
legend.title = element_text(size = 11, color = "black"),
legend.position = c(0.85, 0.85),
axis.text = element_text(size = 10, color = "black"),
axis.title = element_text(size = 11, color = "black"),
axis.line = element_line(size = 0.2, color = "gray30"),
axis.ticks = element_line(size = 0.2, color = "gray30"),
legend.text = element_text(size = 10, color = "black"))
setwd(here::here("figures"))
ggsave(
"figure_02.png",
width = 4.25,
height = 5,
units = "in",
dpi = 300
)
# Review stats
# total number of records
nrow(d)
( n_total <- d |>
filter(omit == "keep") |>
nrow() )
# number of statistcal ecology papers
( n_statistical <- d |>
filter(omit == "keep") |>
filter(statistical == "yes") |>
nrow() )
# proportion of statistical ecology papers
round( ( n_statistical / n_total ) * 100, 1 )
# number of statistcal ecology papers with more citations than the 75th percentile of citations for that year
( n_statistical_q75 <- d |>
filter(omit == "keep") |>
filter(statistical == "yes") |>
filter(citations >= q75_citations) |>
nrow() )
( n_q75 <- d |>
filter(omit == "keep") |>
# filter(statistical == "yes") |>
filter(citations >= q75_citations) |>
nrow() )
# percent of highly cited stat ecology papers of the statistical ecology papers
round( ( n_statistical_q75 / n_statistical) * 100, 1)
# percent of highly cited stat ecology papers out of the broader pop of papers with more citations than the 75th percentile
round( ( n_statistical_q75 / n_q75 ) * 100, 1)
# breakdown: percent of statistical ecology papers classified w/ each category
d |>
dplyr::filter(omit == "keep") |>
dplyr::filter( statistical == "yes") |>
dplyr::filter(citations >= q75_citations) |>
tidyr::pivot_longer(individual:tools_practices, names_to = "category", values_to = "value") |>
dplyr::filter(value == 1) |>
dplyr::mutate( category = ifelse(category == "validation_selection", "Model validation & selection",
ifelse(category == "tools_practices", "Tools & best practices", category))) |>
dplyr::mutate(category = stringr::str_to_sentence(category)) |>
dplyr::mutate(category = factor(category,
levels = c("Individual",
"Population",
"Community",
"Ecosystem",
"Spatial",
"Model validation & selection",
"Tools & best practices"))) |>
dplyr::count(category) |>
dplyr::mutate(prop = sprintf("%.1f", round( ( n / 407) * 100, 1) ))