-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathair_temp.R
77 lines (56 loc) · 2.32 KB
/
air_temp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# AIR TEMPERATURE
# Load packages ----------------------------------------------------------------
if (!require(readxl, quietly = TRUE)) {
install.packages("readxl")
library(readxl)}
if (!require(tidyverse, quietly = TRUE)) {
install.packages("tidyverse")
library(tidyverse)}
## Import data -----------------------------------------------------------------
# Read Excel file and combine data sheets
CSM_file_path <- "./data/air_temp/CSM_raw_air_temp.xlsx" # define file path
CSM_sheet_names <- excel_sheets(CSM_file_path) # define sheet names
CSM_list <- lapply(CSM_sheet_names, function(CSM_sheet_names) { # read each sheet and store in a list
read_excel(CSM_file_path, sheet = CSM_sheet_names)
})
CSM_combined_data <- bind_rows(CSM_list) # combine all data frames
View(CSM_combined_data)
## Data prep -------------------------------------------------------------------
# Adjust precision so all values have 2 decimal places
CSM_combined_data <- CSM_combined_data %>%
mutate(Value = round(Value, 2))
# Check for missing values
sum(is.na(CSM_combined_data))
# Identify which years are missing, from 1996-2023
year_range <- as.character(1996:2023)
years_CSM <- unique(stringr::str_extract(CSM_sheet_names, "\\d{4}"))
missing_CSM <- (setdiff(year_range, years_CSM))
print(missing_CSM)
# Convert timestamp and extract year and month
CSM_combined_data <- CSM_combined_data %>%
mutate(Timestamp = ymd_hms(Timestamp),
Year = year(Timestamp),
Month = month(Timestamp))
# Aggregate data by year and month, and get metrics
CSM_results <- CSM_combined_data %>%
group_by(Year, Month) %>%
summarise(Minimum = min(Value, na.rm = TRUE),
Maximum = max(Value, na.rm = TRUE),
Average = mean(Value, na.rm = TRUE),
Num_Samples = n(),
.groups = 'drop')
# Export results as csv files into the R project
write.csv(CSM_results, "./data/air_temp/CSM_summarized_air_temp.csv")
# Most extreme values ----------------------------------------------------------
# Identify 5 highest average temp months
hot5_CSM <- CSM_results %>%
arrange(desc(Average)) %>%
slice_head(n = 5) %>%
select(Year, Month, Average)
print(hot5_CSM)
# Identify 5 lowest average temp months
cold5_CSM <- CSM_results %>%
arrange(Average) %>%
slice_head(n = 5) %>%
select(Year, Month, Average)
print(cold5_CSM)