-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy path00_1_prep_fao.R
103 lines (82 loc) · 5.74 KB
/
00_1_prep_fao.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# FAO data ----------------------------------------------------------------
library("data.table")
source("R/00_prep_functions.R")
path_fao <- "input/fao/"
# Settings ----------------------------------------------------------------
files <- c(
"prod" = "Production_Crops_Livestock_E_All_Data_(Normalized).zip", #"Production_Crops_E_All_Data_(Normalized).zip",
#"crop_proc" = "Production_CropsProcessed_E_All_Data_(Normalized).zip",
#"live_prod" = "Production_Livestock_E_All_Data_(Normalized).zip",
#"live_prim" = "Production_LivestockPrimary_E_All_Data_(Normalized).zip",
#"live_proc" = "Production_LivestockProcessed_E_All_Data_(Normalized).zip",
# "trade_1" = "Trade_Crops_Livestock_E_All_Data_(Normalized).zip",
"trad" = "Trade_CropsLivestock_E_All_Data_(Normalized).zip", #"Trade_LiveAnimals_E_All_Data_(Normalized).zip",
"btd_prod" = "Trade_DetailedTradeMatrix_E_All_Data_(Normalized).zip",
#"cbs_crop" = "CommodityBalances_Crops_E_All_Data_(Normalized).zip",
#"cbs_live" = "CommodityBalances_LivestockFish_E_All_Data_(Normalized).zip",
"cbs_food_new" = "FoodBalanceSheets_E_All_Data_(Normalized).zip",
"cbs_food_old" = "FoodBalanceSheetsHistoric_E_All_Data_(Normalized).zip",
"cbs_nonfood_old" = "CommodityBalances_(non-food)_(-2013_old_methodology)_E_All_Data_(Normalized).zip",
"cbs_nonfood_new" = "CommodityBalances_(non-food)_(2010-)_E_All_Data_(Normalized).zip",
"sua" = "SUA_Crops_Livestock_E_All_Data_(Normalized).zip",
#"fore_prod" = "Forestry_E_All_Data_(Normalized).zip",
#"fore_trad" = "Forestry_Trade_Flows_E_All_Data_(Normalized).zip",
"prices" = "Prices_E_All_Data_(Normalized).zip",
"fish_prod" = "GlobalProduction_2022.1.1.zip") #"GlobalProduction_2019.1.0.zip")
# Files to extract from the ZIP archives
#extr <- c(rep(NA, length(files) - 1), "TS_FI_PRODUCTION.csv")
extr <- c(rep(NA, length(files) - 1), "Global_production_Quantity.csv")
name <- names(files)
# Links to the files
links <- c(rep("http://fenixservices.fao.org/faostat/static/bulkdownloads/",
length(files) - 1), "http://www.fao.org/fishery/static/Data/")
# Column types to possibly skip some
col_types <- list(
"prod" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character", "character"),
"trad" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character"),
"btd_prod" = c("numeric", "character", "character", "numeric", "character", "character", "numeric", "character", "character",
"numeric", "character", "numeric", "numeric", "character", "numeric", "character"),
"cbs_food_new" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character", "logical"),
"cbs_food_old" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character"),
"cbs_nonfood_old" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character"),
"cbs_nonfood_new" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character", "character"),
"sua" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "character", "numeric", "character", "character"),
"prices" = c("numeric", "character", "character", "numeric", "character", "character", "numeric",
"character", "numeric", "numeric", "numeric", "character", "character", "numeric", "character"),
"fish_prod" = c("integer", "character", "integer", "character", "character", "integer", "numeric", "NULL")
)
# update: add read_method as there are some issues in the trad csv file (probably a missing quote somewhere) that fread cannot deal with, but readr::read_csv can.
read_method = files
read_method[] <- "fread"
read_method[c("trad", "btd_prod")] <- "read_csv"
# Execute -----------------------------------------------------------------
fa_dl(file = files, link = links, path = path_fao)
fa_extract(path_in = path_fao, files = files,
path_out = path_fao, name = name, extr = extr, col_types = col_types, read_method = read_method,
rm = FALSE)
# Add primary crop production ---------------------------------------------
# This file is no longer downloadable from the FAO and needs to be requested.
if(!file.exists(paste0(path_fao, "Production_Crops_Primary.zip"))) {
stop("The file `Production_Crops_Primary.zip` is no longer available",
"online. Please request the file and provide it to continue.")
}
x <- unzip(paste0(path_fao, "Production_Crops_Primary.zip"),
exdir = gsub("(.*)/", "\\1", path_fao))
y <- fread(x,
colClasses = c("character", "character", "numeric", "character", "numeric", "character",
"numeric", "character", "numeric", "numeric", "character", "character", "character"))
file.remove(x)
saveRDS(y, paste0(path_fao, "crop_prim_14.rds"))
# add non-public data from 2015 to 2019 (requested from FAO by Finn)
file <- "Production Fodder Crops 2015_2019 internal working system official data only.xlsx"
fodder19 <- as.data.table(openxlsx::read.xlsx(paste0(path_fao, file), cols = 1:9))
saveRDS(fodder19, paste0(path_fao, "crop_prim_19.rds"))
# download fbs-sua concordance into inst
#fa_dl(file = "", link = "https://fenixservices.fao.org/faostat/static/documents/SCL/FBS%20and%20SUA%20list.xlsx", path = "inst/" )