Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
martinbruckner committed Jun 27, 2024
1 parent 325b76b commit 6672ce3
Show file tree
Hide file tree
Showing 11 changed files with 181 additions and 180 deletions.
4 changes: 2 additions & 2 deletions R/01_tidy_fao.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ cbs_food_new[Unit == "1000 tonnes", `:=`(Value = ifelse(is.na(Value), 0, Value*1
# NOTE: this is inconsistently defined (sometimes correct, sometimes wrong), so it is corrected further below in the balancing section

# nonfood: remove items contained in food balances
cbs_nonfood_old <- readRDS("input/fao/cbs_nonfood_old.rds")[Year <= 2013,]
cbs_nonfood_new <- readRDS("input/fao/cbs_nonfood_new.rds")[Year > 2013,]
cbs_nonfood_old <- readRDS("input/fao/cbs_nonfood_old.rds")[Year < 2010,]
cbs_nonfood_new <- readRDS("input/fao/cbs_nonfood_new.rds")[Year >= 2010,]
cbs_nonfood <- rbind(cbs_nonfood_old, cbs_nonfood_new[, 1:(ncol(cbs_nonfood_new)-1)])
cbs_nonfood[Element == "Food supply quantity (tonnes)", Element := "Food"]
cbs_nonfood <- merge(cbs_nonfood, cbs_food_old[, .SD, .SDcols = c("Area Code", "Item Code", "Element", "Year Code", "Value")],
Expand Down
7 changes: 2 additions & 5 deletions R/03_build_cbs.R
Original file line number Diff line number Diff line change
Expand Up @@ -622,10 +622,8 @@ cbs <- dt_replace(cbs, function(x) {`<`(x, 0)}, value = 0,
cbs[, balancing := na_sum(production, imports, stock_withdrawal,
-exports, -food, -feed, -seed, -losses, -processing, -other, -unspecified, -tourist, -residuals)]

# TODO: For now, I move negative residuals to balancing and then neutralize negative balancing via other elements
# in the future, we might consider to keep them as completely separate categories, or merge them completely
# in any case, the sum of balancing and residuals should never be <0 in the end
# as long as we do not introduce an "unknown source" region, we need to adapt the other cbs items accordingly
# Note: the sum of balancing and residuals should never be <0 in the end
# as long as we do not introduce an "unknown source" region, we need to adapt the other cbs use elements accordingly

# # neutralize negative balancing via other items
# cat("\nAdjust 'residuals' for ", cbs[balancing < 0 &
Expand Down Expand Up @@ -696,7 +694,6 @@ cbs[na_sum(balancing, residuals) < 0 & !is.na(stock_addition) & stock_addition >

cat("\nAdjust uses proportionally for ", cbs[na_sum(balancing, residuals) < 0, .N],
" observations, where `na_sum(balancing, residuals) < 0`", sep = "")
# TODO: include residuals here?
cbs[, divisor := na_sum(exports, other, processing, seed, food, feed, stock_addition, tourist)]
cbs[na_sum(balancing, residuals) < 0 & divisor >= -na_sum(balancing, residuals),
`:=`(stock_addition = round(na_sum(stock_addition, (na_sum(balancing, residuals) / divisor * stock_addition))),
Expand Down
2 changes: 1 addition & 1 deletion R/06_re-exports.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ btd <- readRDS("data/btd_bal.rds")
cbs <- readRDS("data/cbs_full.rds")

areas <- sort(unique(cbs$area_code))
items <- sort(unique(btd$item_code))
items <- fread("inst/items_full.csv")$item_code


# Prepare reallocation of re-exports --------------------------------------
Expand Down
14 changes: 13 additions & 1 deletion R/08_use.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ library("parallel")
source("R/01_tidy_functions.R")

# should the feedstock optimization be run or should previously stored results be used?
run_optim <- TRUE
run_optim <- FALSE
regions <- fread("inst/regions_full.csv")
items <- fread("inst/items_full.csv")

Expand All @@ -32,6 +32,18 @@ use[, use := NA_real_]
# correct comm_codes
use$comm_code <- items$comm_code[match(use$item_code, items$item_code)]

# correct milk processing
## butter production has a TCF of 3.5-6%, i.e. 100 t of milk are processed into 3.5-6 t of butter
## expressed the other way around, 1 t of butter needs between 16.67 and 28.57 t of milk
## if more than 28.57 t of milk are reported under 'processing' per tonne of butter produced,
## the excess quantity is move from processing to food
butter <- cbs[item=="Butter, Ghee", .(area_code,area,item_code,item,year,production)]
butter <- merge(butter, cbs[item=="Milk - Excluding Butter", .(area_code,area,year,processing)])
butter[, `:=`(max = production / 0.035,
min = production / 0.06)]
cbs <- merge(cbs, butter[, .(area_code, area, year, item = "Milk - Excluding Butter", max)], all.x = TRUE)
cbs[!is.na(max) & max < processing, `:=`(food = food + processing - max, processing = max)]

# 100% processes
cat("Allocating crops going directly to a process. Applies to items:\n\t",
paste0(unique(use[type == "100%", item]), collapse = "; "),
Expand Down
6 changes: 3 additions & 3 deletions R/09_mrsut.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ use <- readRDS("data/use_final.rds")
use_fd <- readRDS("data/use_fd_final.rds")

years <- seq(1986, 2021)
areas <- unique(cbs$area_code)
processes <- unique(use$proc_code)
commodities <- unique(use$comm_code)
areas <- sort(unique(cbs$area_code))
processes <- sort(unique(use$proc_code))
commodities <- sort(unique(use$comm_code))


# Supply ---
Expand Down
182 changes: 139 additions & 43 deletions R/10_mrio.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ library(parallel)
library(data.table)
library(readr)

agg <- function(x) { as.matrix(x) %*% sapply(unique(colnames(x)),"==",colnames(x)) }


# MRIO Table ---

mr_sup_m <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/mr_sup_mass.rds")
Expand Down Expand Up @@ -60,7 +63,7 @@ for(i in seq_along(Z_m)){

for(j in which(X < 0)){
reg <- j %/% nrcom + 1
print(paste0(regions[reg, name], " / ", X[j]))
# print(paste0(regions[reg, name], " / ", X[j]))
Y[[i]][j, paste0(regions[reg, code], "_balancing")] <-
Y[[i]][j, paste0(regions[reg, code], "_balancing")] - X[j]
}
Expand Down Expand Up @@ -95,6 +98,50 @@ X <- mapply(function(x, y) {
}, x = Z_m, y = Y)




# PROBLEM: There are some products with only zeros in the rows, except of the main diagonal
# i.e. the value on the main diagonal equals total output
# this is mainly due to reporting issues in FAOSTAT, where some countries report seed = production
# SOLUTION: We move 80% of the value to final demand, equally spreading over all fd-categories
io_codes <- read_csv("/mnt/nfs_fineprint/tmp/fabio/v1.2/io_codes.csv")

years <- seq(1986, 2021)

# year <- 2019
for(year in years){

print(year)

Zmi <- Z_m[[as.character(year)]]
Zvi <- Z_v[[as.character(year)]]
Yi <- Y[[as.character(year)]]
Xi <- X[,as.character(year)]

colnames(Yi) <- fd_codes$fd
Y_global <- t(agg(t(agg(Yi))))

for(i in 1:nrow(io_codes)){
if(Xi[i]!=0 & Zmi[i,i] >= Xi[i]) {
# print(paste0(io_codes[i,]))
# print(as.numeric(mean(Zmi[i,i], Zvi[i,i])))
temp <- Yi[i, fd_codes$area_code==io_codes$area_code[i]]
if(sum(temp)==0){ temp <- Y_global[rownames(Y_global)==io_codes$comm_code[i],] }
Yi[i, fd_codes$area_code==io_codes$area_code[i]] <- temp + mean(Zmi[i,i], Zvi[i,i]) * 0.8 / sum(temp) * temp
Zmi[i,i] <- Zvi[i,i] <- mean(Zmi[i,i], Zvi[i,i]) * 0.2
Xi[i] <- sum(Zmi[i,]) + sum(Yi[i,])
}
}

Z_m[[as.character(year)]] <- Zmi
Z_v[[as.character(year)]] <- Zvi
Y[[as.character(year)]] <- Yi
X[,as.character(year)] <- Xi

}



# Store X, Y, Z variables
saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_mass.rds")
saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_value.rds")
Expand Down Expand Up @@ -147,55 +194,103 @@ for(year in years){
# remove losses from Y
Yi <- Y[[as.character(year)]]
losses <- as.matrix(Yi[, grepl("losses", colnames(Yi))])
Yi[, grepl("losses", colnames(Yi))] <- 0
Yi <- Yi[, !grepl("losses", colnames(Yi))]

# remove balancing from Y
balancing <- as.matrix(Yi[, grepl("balancing", colnames(Yi))])
Yi <- Yi[, !grepl("balancing", colnames(Yi))]

Y[[as.character(year)]] <- Yi

# remove this and instead subtract losses from output X:
# # reshape losses for adding them later to the main diagonals of each submatrix of Z
# ## Get the number of rows and columns in the data matrix
# num_rows <- nrow(losses)
# num_cols <- nrow(losses) / ncol(losses)
#
# ## Define a function for reshaping
# reshape_column <- function(v) {
# m <- matrix(0, ncol = num_cols, nrow = num_rows)
# indices <- ((seq_len(length(v)) - 1) %% num_cols) + 1
# m[cbind(seq_len(length(v)), indices)] <- v
# return(m)
# }
#
# ## Apply the reshape_column function to each column using lapply
# matrix_list <- lapply(1:ncol(losses), function(i) {
# v <- losses[, i]
# reshape_column(v)
# })
#
# ## Combine the matrices in the list using cbind()
# combined_matrix <- do.call(cbind, matrix_list)
# combined_matrix <- as(combined_matrix, "dgCMatrix")
#
#
# # add losses to the main diagonals of each submatrix of Z_m
# Zi <- Z_m[[as.character(year)]]
# Zi <- Zi + combined_matrix
# Z_m[[as.character(year)]] <- Zi
#
# # add losses to the main diagonals of each submatrix of Z_v
# Zi <- Z_v[[as.character(year)]]
# Zi <- Zi + combined_matrix
# Z_v[[as.character(year)]] <- Zi

X[,as.character(year)] <- X[,as.character(year)] - rowSums(losses)
# reshape losses + balancing for adding them later to the main diagonals of each submatrix of Z
## Get the number of rows and columns in the data matrix
num_rows <- nrow(losses)
num_cols <- nrow(losses) / ncol(losses)

## Define a function for reshaping
reshape_column <- function(v) {
m <- matrix(0, ncol = num_cols, nrow = num_rows)
indices <- ((seq_len(length(v)) - 1) %% num_cols) + 1
m[cbind(seq_len(length(v)), indices)] <- v
return(m)
}

## Apply the reshape_column function to each column using lapply
matrix_list <- lapply(1:ncol(losses), function(i) {
v <- losses[, i] + balancing[, i]
reshape_column(v)
})

## Combine the matrices in the list using cbind()
combined_matrix <- do.call(cbind, matrix_list)
combined_matrix <- as(combined_matrix, "dgCMatrix")


# add losses to the main diagonals of each submatrix of Z_m
Zi <- Z_m[[as.character(year)]]
Zi <- Zi + combined_matrix
Z_m[[as.character(year)]] <- Zi

# add losses to the main diagonals of each submatrix of Z_v
Zi <- Z_v[[as.character(year)]]
Zi <- Zi + combined_matrix
Z_v[[as.character(year)]] <- Zi

# X[,as.character(year)] <- X[,as.character(year)] - rowSums(losses) - rowSums(balancing)

}

saveRDS(X, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/X.rds")
saveRDS(Y, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
# saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_mass.rds")
# saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_value.rds")

fd_codes <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/fd_codes.csv")
fd_codes <- fd_codes[!fd %in% c("losses", "balancing")]
fwrite(fd_codes, file="/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/fd_codes.csv")



# PROBLEM: There are some products with only zeros in the rows, except of the main diagonal
# i.e. the value on the main diagonal equals total output
# this is mainly due to reporting issues in FAOSTAT, where some countries report seed = production
# SOLUTION: We move 80% of the value to final demand, equally spreading over all fd-categories

# year <- 2019
for(year in years){

print(year)

Zmi <- Z_m[[as.character(year)]]
Zvi <- Z_v[[as.character(year)]]
Yi <- Y[[as.character(year)]]
Xi <- X[,as.character(year)]

colnames(Yi) <- fd_codes$fd
Y_global <- t(agg(t(agg(Yi))))

for(i in 1:nrow(io_codes)){
if(Xi[i]!=0 & Zmi[i,i] >= Xi[i]) {
# print(paste0(io_codes[i,]))
# print(as.numeric(mean(Zmi[i,i], Zvi[i,i])))
temp <- Yi[i, fd_codes$area_code==io_codes$area_code[i]]
if(sum(temp)==0){ temp <- Y_global[rownames(Y_global)==io_codes$comm_code[i],] }
Yi[i, fd_codes$area_code==io_codes$area_code[i]] <- temp + mean(Zmi[i,i], Zvi[i,i]) * 0.8 / sum(temp) * temp
Zmi[i,i] <- Zvi[i,i] <- mean(Zmi[i,i], Zvi[i,i]) * 0.2
Xi[i] <- sum(Zmi[i,]) + sum(Yi[i,])
}
}

Z_m[[as.character(year)]] <- Zmi
Z_v[[as.character(year)]] <- Zvi
Y[[as.character(year)]] <- Yi
X[,as.character(year)] <- Xi

}



saveRDS(X, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/X.rds")
saveRDS(Y, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_mass.rds")
saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_value.rds")



# allocate ghg emissions to products --------------------------------------------------------------
Expand All @@ -211,7 +306,8 @@ write_csv(data.frame(ghg_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/ghg_names.cs
write_csv(data.frame(gwp_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/gwp_names.csv")
write_csv(data.frame(luh_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/luh_names.csv")

range <- rep(c(1:97,99:116,118:120),192)+rep(((0:191)*121), each=118)
# range <- rep(c(1:97,99:116,118:120),192)+rep(((0:191)*121), each=118)
range <- rep(c(1:97,99:116,118:121),192)+rep(((0:191)*121), each=119) # adding butter production

ghg_m <- mapply(function(x, y) { as.matrix(x[,-1][,range]) %*% y }, x = ghg, y = trans_m[1:28])
gwp_m <- mapply(function(x, y) { as.matrix(x[,-1][,range]) %*% y }, x = gwp, y = trans_m[1:28])
Expand Down
4 changes: 2 additions & 2 deletions R/11_leontief_inverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ prep_solve <- function(year, Z, X,


years <- seq(1986, 2021)
years_singular <- c(1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2011) # 2013 #c(1994,2002,2009)
years_singular_losses <- c(1989, 1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2010, 2011, 2013, 2018, 2019) # c(2013,2019) #c(1990,2010,2019) #c(1994,2002,2009)
years_singular <- 0 #c(1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2011) # 2013 #c(1994,2002,2009)
years_singular_losses <- 0 #c(1989, 1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2010, 2011, 2013, 2018, 2019) # c(2013,2019) #c(1990,2010,2019) #c(1994,2002,2009)

Z_m <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_mass.rds")
Z_v <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_value.rds")
Expand Down
29 changes: 17 additions & 12 deletions R/13_ex_post_corrections.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,18 @@ io <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/io_codes.csv")
su <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/su_codes.csv")
fd <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/fd_codes.csv")
Y <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Y.rds")
fd_l <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/fd_codes.csv")
Y_l <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")

# Chinese edible oil statistics
# Source: China National Grain & Oils Information Center, Comprehensive balance analysis of China's edible oil market, http://www.grainoil.com.cn/, accessed on 01/03/2023
oil <- fread("inst/oils_china.csv")
# Sources:
# - China National Grain & Oils Information Center, Comprehensive balance analysis of China's edible oil market, http://www.grainoil.com.cn/, accessed on 01/03/2023
# - USDA, Oilseeds and Products Update, Attaché Report (GAIN), https://fas.usda.gov/data/search?keyword=Oilseeds%20and%20Products%20Annual&reports%5B0%5D=report_commodities%3A27&reports%5B1%5D=report_commodities%3A28&reports%5B2%5D=report_regions%3A420&reports%5B3%5D=report_type%3A10251&page=0
oil <- fread("input/oils_china.csv")

years <- names(Y)
Y_new <- Y
Y_l_new <- Y_l

# correct food and other use of veg. oils for China
i = 1
Expand All @@ -31,11 +36,17 @@ for(i in seq_along(Y)){

data <- cbind(data, as.matrix(Y[[i]][,fd$area=="China, mainland"]))
# data[, food_share_fao := `41_food` / (`41_food` + `41_other`)]
data[, `:=`(food = `41_food`, other = `41_other`)]
data[!is.na(food_share), `:=`(food = round((`41_food` + `41_other`) * food_share),
other = round((`41_food` + `41_other`) * (1-food_share)))]
# data[, `:=`(food = `41_food`, other = `41_other`)]
data[!is.na(food_share), `:=`(food = round((food + other) * food_share),
other = round((food + other) * (1-food_share)))]
Y_new[[i]][, fd$area_code==41 & fd$fd=="food"] <- data$food
Y_new[[i]][, fd$area_code==41 & fd$fd=="other"] <- data$other

data_l <- cbind(data, as.matrix(Y_l[[i]][,fd_l$area=="China, mainland"]))
data_l[!is.na(food_share), `:=`(food = round((food + other) * food_share),
other = round((food + other) * (1-food_share)))]
Y_l_new[[i]][, fd_l$area_code==41 & fd_l$fd=="food"] <- data_l$food
Y_l_new[[i]][, fd_l$area_code==41 & fd_l$fd=="other"] <- data_l$other
}

# compare old and new values
Expand All @@ -51,12 +62,6 @@ for(i in seq_along(Y)){
}

saveRDS(Y_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Y.rds")
saveRDS(Y_l_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")



# create the losses version of fabio ---

for(i in seq_along(Y_new)){
Y_new[[i]][, grepl("losses", colnames(Y_new[[i]]))] <- 0
}
saveRDS(Y_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# FABIO v1.2

### The Food and Agriculture Biomass Input-Output database
FABIO provides a set of multi-regional physical supply-use and input-output tables covering global agriculture and forestry. The work is based on mostly freely available data from FAOSTAT, IEA, EIA, and UN Comtrade/BACI. FABIO currently covers 191 countries + RoW, 117 processes and 123 commodities for 1986-2020.
FABIO provides a set of multi-regional physical supply-use and input-output tables covering global agriculture and forestry. The work is based on mostly freely available data from FAOSTAT, IEA, EIA, and UN Comtrade/BACI. FABIO currently covers 191 countries + RoW, 119 processes and 123 commodities for 1986-2020.

This repository provides all codes used to generate the FABIO database.

Expand Down
6 changes: 1 addition & 5 deletions inst/items_supply-shares.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ proc_code,proc,comm_code,item_code,item,source,base_code,base
p077,Ricebran Oil extraction,c090,2598,"Oilseed Cakes, Other",supply,2581,Ricebran Oil
p078,Maize Germ Oil extraction,c090,2598,"Oilseed Cakes, Other",supply,2582,Maize Germ Oil
p079,"Oilcrops Oil extraction, Other",c090,2598,"Oilseed Cakes, Other",supply,2586,"Oilcrops Oil, Other"
p099,Dairy cattle husbandry,c111,2740,"Butter, Ghee",live,882,"Milk, whole fresh cow"
p100,Dairy buffaloes husbandry,c111,2740,"Butter, Ghee",live,951,"Milk, whole fresh buffalo"
p101,Dairy sheep husbandry,c111,2740,"Butter, Ghee",live,982,"Milk, whole fresh sheep"
p102,Dairy goats husbandry,c111,2740,"Butter, Ghee",live,1020,"Milk, whole fresh goat"
p103,Dairy camels husbandry,c111,2740,"Butter, Ghee",live,1130,"Milk, whole fresh camel"
p121,Butter production,c111,2740,"Butter, Ghee",live,2740,"Butter, Ghee"
p099,Dairy cattle husbandry,c110,2848,Milk - Excluding Butter,live,882,"Milk, whole fresh cow"
p100,Dairy buffaloes husbandry,c110,2848,Milk - Excluding Butter,live,951,"Milk, whole fresh buffalo"
p101,Dairy sheep husbandry,c110,2848,Milk - Excluding Butter,live,982,"Milk, whole fresh sheep"
Expand Down
Loading

0 comments on commit 6672ce3

Please sign in to comment.