bug fixes

fineprint-global · Jun 27, 2024 · 6672ce3 · 6672ce3
1 parent 325b76b
commit 6672ce3
Show file tree

Hide file tree

Showing 11 changed files with 181 additions and 180 deletions.
diff --git a/R/01_tidy_fao.R b/R/01_tidy_fao.R
@@ -87,8 +87,8 @@ cbs_food_new[Unit == "1000 tonnes", `:=`(Value = ifelse(is.na(Value), 0, Value*1
 # NOTE: this is inconsistently defined (sometimes correct, sometimes wrong), so it is corrected further below in the balancing section
 
 # nonfood: remove items contained in food balances
-cbs_nonfood_old <- readRDS("input/fao/cbs_nonfood_old.rds")[Year <= 2013,]
-cbs_nonfood_new <- readRDS("input/fao/cbs_nonfood_new.rds")[Year > 2013,]
+cbs_nonfood_old <- readRDS("input/fao/cbs_nonfood_old.rds")[Year < 2010,]
+cbs_nonfood_new <- readRDS("input/fao/cbs_nonfood_new.rds")[Year >= 2010,]
 cbs_nonfood <- rbind(cbs_nonfood_old, cbs_nonfood_new[, 1:(ncol(cbs_nonfood_new)-1)])
 cbs_nonfood[Element == "Food supply quantity (tonnes)", Element := "Food"]
 cbs_nonfood <- merge(cbs_nonfood, cbs_food_old[, .SD, .SDcols = c("Area Code", "Item Code", "Element", "Year Code", "Value")],

diff --git a/R/03_build_cbs.R b/R/03_build_cbs.R
@@ -622,10 +622,8 @@ cbs <- dt_replace(cbs, function(x) {`<`(x, 0)}, value = 0,
 cbs[, balancing := na_sum(production, imports, stock_withdrawal,
         -exports, -food, -feed, -seed, -losses, -processing, -other, -unspecified, -tourist, -residuals)]
 
-# TODO: For now, I move negative residuals to balancing and then neutralize negative balancing via other elements
-# in the future, we might consider to keep them as completely separate categories, or merge them completely
-# in any case, the sum of balancing and residuals should never be <0 in the end
-# as long as we do not introduce an "unknown source" region, we need to adapt the other cbs items accordingly
+# Note: the sum of balancing and residuals should never be <0 in the end
+# as long as we do not introduce an "unknown source" region, we need to adapt the other cbs use elements accordingly
 
 # # neutralize negative balancing via other items
 # cat("\nAdjust 'residuals' for ", cbs[balancing < 0 &
@@ -696,7 +694,6 @@ cbs[na_sum(balancing, residuals) < 0 & !is.na(stock_addition) & stock_addition >
 
 cat("\nAdjust uses proportionally for ", cbs[na_sum(balancing, residuals) < 0, .N],
     " observations, where `na_sum(balancing, residuals) < 0`", sep = "")
-# TODO: include residuals here?
 cbs[, divisor := na_sum(exports, other, processing, seed, food, feed, stock_addition, tourist)]
 cbs[na_sum(balancing, residuals) < 0 & divisor >= -na_sum(balancing, residuals),
     `:=`(stock_addition = round(na_sum(stock_addition, (na_sum(balancing, residuals) / divisor * stock_addition))),

diff --git a/R/06_re-exports.R b/R/06_re-exports.R
@@ -14,7 +14,7 @@ btd <- readRDS("data/btd_bal.rds")
 cbs <- readRDS("data/cbs_full.rds")
 
 areas <- sort(unique(cbs$area_code))
-items <- sort(unique(btd$item_code))
+items <- fread("inst/items_full.csv")$item_code
 
 
 # Prepare reallocation of re-exports --------------------------------------

diff --git a/R/08_use.R b/R/08_use.R
@@ -5,7 +5,7 @@ library("parallel")
 source("R/01_tidy_functions.R")
 
 # should the feedstock optimization be run or should previously stored results be used?
-run_optim <- TRUE
+run_optim <- FALSE
 regions <- fread("inst/regions_full.csv")
 items <- fread("inst/items_full.csv")
 
@@ -32,6 +32,18 @@ use[, use := NA_real_]
 # correct comm_codes
 use$comm_code <- items$comm_code[match(use$item_code, items$item_code)]
 
+# correct milk processing
+## butter production has a TCF of 3.5-6%, i.e. 100 t of milk are processed into 3.5-6 t of butter
+## expressed the other way around, 1 t of butter needs between 16.67 and 28.57 t of milk
+## if more than 28.57 t of milk are reported under 'processing' per tonne of butter produced,
+## the excess quantity is move from processing to food
+butter <- cbs[item=="Butter, Ghee", .(area_code,area,item_code,item,year,production)]
+butter <- merge(butter, cbs[item=="Milk - Excluding Butter", .(area_code,area,year,processing)])
+butter[, `:=`(max = production / 0.035,
+              min = production / 0.06)]
+cbs <- merge(cbs, butter[, .(area_code, area, year, item = "Milk - Excluding Butter", max)], all.x = TRUE)
+cbs[!is.na(max) & max < processing, `:=`(food = food + processing - max, processing = max)]
+
 # 100% processes
 cat("Allocating crops going directly to a process. Applies to items:\n\t",
   paste0(unique(use[type == "100%", item]), collapse = "; "),

diff --git a/R/09_mrsut.R b/R/09_mrsut.R
@@ -17,9 +17,9 @@ use <- readRDS("data/use_final.rds")
 use_fd <- readRDS("data/use_fd_final.rds")
 
 years <- seq(1986, 2021)
-areas <- unique(cbs$area_code)
-processes <- unique(use$proc_code)
-commodities <- unique(use$comm_code)
+areas <- sort(unique(cbs$area_code))
+processes <- sort(unique(use$proc_code))
+commodities <- sort(unique(use$comm_code))
 
 
 # Supply ---

diff --git a/R/10_mrio.R b/R/10_mrio.R
@@ -4,6 +4,9 @@ library(parallel)
 library(data.table)
 library(readr)
 
+agg <- function(x) { as.matrix(x) %*% sapply(unique(colnames(x)),"==",colnames(x)) }
+
+
 # MRIO Table ---
 
 mr_sup_m <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/mr_sup_mass.rds")
@@ -60,7 +63,7 @@ for(i in seq_along(Z_m)){
 
   for(j in which(X < 0)){
     reg <- j %/% nrcom + 1
-    print(paste0(regions[reg, name], " / ", X[j]))
+    # print(paste0(regions[reg, name], " / ", X[j]))
     Y[[i]][j, paste0(regions[reg, code], "_balancing")] <-
       Y[[i]][j, paste0(regions[reg, code], "_balancing")] - X[j]
   }
@@ -95,6 +98,50 @@ X <- mapply(function(x, y) {
 }, x = Z_m, y = Y)
 
 
+
+
+# PROBLEM: There are some products with only zeros in the rows, except of the main diagonal
+# i.e. the value on the main diagonal equals total output
+# this is mainly due to reporting issues in FAOSTAT, where some countries report seed = production
+# SOLUTION: We move 80% of the value to final demand, equally spreading over all fd-categories
+io_codes <- read_csv("/mnt/nfs_fineprint/tmp/fabio/v1.2/io_codes.csv")
+
+years <- seq(1986, 2021)
+
+# year <- 2019
+for(year in years){
+
+  print(year)
+
+  Zmi <- Z_m[[as.character(year)]]
+  Zvi <- Z_v[[as.character(year)]]
+  Yi <- Y[[as.character(year)]]
+  Xi <- X[,as.character(year)]
+
+  colnames(Yi) <- fd_codes$fd
+  Y_global <- t(agg(t(agg(Yi))))
+
+  for(i in 1:nrow(io_codes)){
+    if(Xi[i]!=0 & Zmi[i,i] >= Xi[i]) { 
+      # print(paste0(io_codes[i,]))
+      # print(as.numeric(mean(Zmi[i,i], Zvi[i,i])))
+      temp <- Yi[i, fd_codes$area_code==io_codes$area_code[i]]
+      if(sum(temp)==0){ temp <- Y_global[rownames(Y_global)==io_codes$comm_code[i],] }
+      Yi[i, fd_codes$area_code==io_codes$area_code[i]] <- temp + mean(Zmi[i,i], Zvi[i,i]) * 0.8 / sum(temp) * temp
+      Zmi[i,i] <- Zvi[i,i] <- mean(Zmi[i,i], Zvi[i,i]) * 0.2
+      Xi[i] <- sum(Zmi[i,]) + sum(Yi[i,])
+    }
+  }
+
+  Z_m[[as.character(year)]] <- Zmi
+  Z_v[[as.character(year)]] <- Zvi
+  Y[[as.character(year)]] <- Yi
+  X[,as.character(year)] <- Xi
+
+}
+
+
+
 # Store X, Y, Z variables
 saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_mass.rds")
 saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_value.rds")
@@ -147,55 +194,103 @@ for(year in years){
   # remove losses from Y
   Yi <- Y[[as.character(year)]]
   losses <- as.matrix(Yi[, grepl("losses", colnames(Yi))])
-  Yi[, grepl("losses", colnames(Yi))] <- 0
+  Yi <- Yi[, !grepl("losses", colnames(Yi))]
+
+  # remove balancing from Y
+  balancing <- as.matrix(Yi[, grepl("balancing", colnames(Yi))])
+  Yi <- Yi[, !grepl("balancing", colnames(Yi))]
+
   Y[[as.character(year)]] <- Yi
 
-  # remove this and instead subtract losses from output X:
-  # # reshape losses for adding them later to the main diagonals of each submatrix of Z
-  # ## Get the number of rows and columns in the data matrix
-  # num_rows <- nrow(losses)
-  # num_cols <- nrow(losses) / ncol(losses)
-  # 
-  # ## Define a function for reshaping
-  # reshape_column <- function(v) {
-  #   m <- matrix(0, ncol = num_cols, nrow = num_rows)
-  #   indices <- ((seq_len(length(v)) - 1) %% num_cols) + 1
-  #   m[cbind(seq_len(length(v)), indices)] <- v
-  #   return(m)
-  # }
-  # 
-  # ## Apply the reshape_column function to each column using lapply
-  # matrix_list <- lapply(1:ncol(losses), function(i) {
-  #   v <- losses[, i]
-  #   reshape_column(v)
-  # })
-  # 
-  # ## Combine the matrices in the list using cbind()
-  # combined_matrix <- do.call(cbind, matrix_list)
-  # combined_matrix <- as(combined_matrix, "dgCMatrix")
-  # 
-  # 
-  # # add losses to the main diagonals of each submatrix of Z_m
-  # Zi <- Z_m[[as.character(year)]]
-  # Zi <- Zi + combined_matrix
-  # Z_m[[as.character(year)]] <- Zi
-  # 
-  # # add losses to the main diagonals of each submatrix of Z_v
-  # Zi <- Z_v[[as.character(year)]]
-  # Zi <- Zi + combined_matrix
-  # Z_v[[as.character(year)]] <- Zi
-
-  X[,as.character(year)] <- X[,as.character(year)] - rowSums(losses)
+  # reshape losses + balancing for adding them later to the main diagonals of each submatrix of Z
+  ## Get the number of rows and columns in the data matrix
+  num_rows <- nrow(losses)
+  num_cols <- nrow(losses) / ncol(losses)
+
+  ## Define a function for reshaping
+  reshape_column <- function(v) {
+    m <- matrix(0, ncol = num_cols, nrow = num_rows)
+    indices <- ((seq_len(length(v)) - 1) %% num_cols) + 1
+    m[cbind(seq_len(length(v)), indices)] <- v
+    return(m)
+  }
+
+  ## Apply the reshape_column function to each column using lapply
+  matrix_list <- lapply(1:ncol(losses), function(i) {
+    v <- losses[, i] + balancing[, i]
+    reshape_column(v)
+  })
+
+  ## Combine the matrices in the list using cbind()
+  combined_matrix <- do.call(cbind, matrix_list)
+  combined_matrix <- as(combined_matrix, "dgCMatrix")
+
+
+  # add losses to the main diagonals of each submatrix of Z_m
+  Zi <- Z_m[[as.character(year)]]
+  Zi <- Zi + combined_matrix
+  Z_m[[as.character(year)]] <- Zi
+
+  # add losses to the main diagonals of each submatrix of Z_v
+  Zi <- Z_v[[as.character(year)]]
+  Zi <- Zi + combined_matrix
+  Z_v[[as.character(year)]] <- Zi
+
+  # X[,as.character(year)] <- X[,as.character(year)] - rowSums(losses) - rowSums(balancing)
 
 }
 
-saveRDS(X, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/X.rds")
-saveRDS(Y, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
-# saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_mass.rds")
-# saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_value.rds")
+
+fd_codes <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/fd_codes.csv")
+fd_codes <- fd_codes[!fd %in% c("losses", "balancing")]
+fwrite(fd_codes, file="/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/fd_codes.csv")
+
+
+
+# PROBLEM: There are some products with only zeros in the rows, except of the main diagonal
+# i.e. the value on the main diagonal equals total output
+# this is mainly due to reporting issues in FAOSTAT, where some countries report seed = production
+# SOLUTION: We move 80% of the value to final demand, equally spreading over all fd-categories
+
+# year <- 2019
+for(year in years){
+
+  print(year)
+
+  Zmi <- Z_m[[as.character(year)]]
+  Zvi <- Z_v[[as.character(year)]]
+  Yi <- Y[[as.character(year)]]
+  Xi <- X[,as.character(year)]
+
+  colnames(Yi) <- fd_codes$fd
+  Y_global <- t(agg(t(agg(Yi))))
+
+  for(i in 1:nrow(io_codes)){
+    if(Xi[i]!=0 & Zmi[i,i] >= Xi[i]) { 
+      # print(paste0(io_codes[i,]))
+      # print(as.numeric(mean(Zmi[i,i], Zvi[i,i])))
+      temp <- Yi[i, fd_codes$area_code==io_codes$area_code[i]]
+      if(sum(temp)==0){ temp <- Y_global[rownames(Y_global)==io_codes$comm_code[i],] }
+      Yi[i, fd_codes$area_code==io_codes$area_code[i]] <- temp + mean(Zmi[i,i], Zvi[i,i]) * 0.8 / sum(temp) * temp
+      Zmi[i,i] <- Zvi[i,i] <- mean(Zmi[i,i], Zvi[i,i]) * 0.2
+      Xi[i] <- sum(Zmi[i,]) + sum(Yi[i,])
+    }
+  }
+
+  Z_m[[as.character(year)]] <- Zmi
+  Z_v[[as.character(year)]] <- Zvi
+  Y[[as.character(year)]] <- Yi
+  X[,as.character(year)] <- Xi
+
+}
 
 
 
+saveRDS(X, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/X.rds")
+saveRDS(Y, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
+saveRDS(Z_m, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_mass.rds")
+saveRDS(Z_v, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Z_value.rds")
+
 
 
 # allocate ghg emissions to products --------------------------------------------------------------
@@ -211,7 +306,8 @@ write_csv(data.frame(ghg_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/ghg_names.cs
 write_csv(data.frame(gwp_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/gwp_names.csv")
 write_csv(data.frame(luh_names), "/mnt/nfs_fineprint/tmp/fabio/v1.2/luh_names.csv")
 
-range <- rep(c(1:97,99:116,118:120),192)+rep(((0:191)*121), each=118)
+# range <- rep(c(1:97,99:116,118:120),192)+rep(((0:191)*121), each=118)
+range <- rep(c(1:97,99:116,118:121),192)+rep(((0:191)*121), each=119) # adding butter production
 
 ghg_m <- mapply(function(x, y) { as.matrix(x[,-1][,range]) %*% y }, x = ghg, y = trans_m[1:28])
 gwp_m <- mapply(function(x, y) { as.matrix(x[,-1][,range]) %*% y }, x = gwp, y = trans_m[1:28])

diff --git a/R/11_leontief_inverse.R b/R/11_leontief_inverse.R
@@ -34,8 +34,8 @@ prep_solve <- function(year, Z, X,
 
 
 years <- seq(1986, 2021)
-years_singular <- c(1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2011) # 2013 #c(1994,2002,2009)
-years_singular_losses <- c(1989, 1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2010, 2011, 2013, 2018, 2019) #  c(2013,2019) #c(1990,2010,2019) #c(1994,2002,2009)
+years_singular <- 0 #c(1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2011) # 2013 #c(1994,2002,2009)
+years_singular_losses <- 0 #c(1989, 1990, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2006, 2007, 2010, 2011, 2013, 2018, 2019) #  c(2013,2019) #c(1990,2010,2019) #c(1994,2002,2009)
 
 Z_m <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_mass.rds")
 Z_v <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Z_value.rds")

diff --git a/R/13_ex_post_corrections.R b/R/13_ex_post_corrections.R
@@ -9,13 +9,18 @@ io <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/io_codes.csv")
 su <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/su_codes.csv")
 fd <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/fd_codes.csv")
 Y <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/Y.rds")
+fd_l <- fread("/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/fd_codes.csv")
+Y_l <- readRDS("/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
 
 # Chinese edible oil statistics
-# Source: China National Grain & Oils Information Center, Comprehensive balance analysis of China's edible oil market, http://www.grainoil.com.cn/, accessed on 01/03/2023
-oil <- fread("inst/oils_china.csv")
+# Sources: 
+# - China National Grain & Oils Information Center, Comprehensive balance analysis of China's edible oil market, http://www.grainoil.com.cn/, accessed on 01/03/2023
+# - USDA, Oilseeds and Products Update, Attaché Report (GAIN), https://fas.usda.gov/data/search?keyword=Oilseeds%20and%20Products%20Annual&reports%5B0%5D=report_commodities%3A27&reports%5B1%5D=report_commodities%3A28&reports%5B2%5D=report_regions%3A420&reports%5B3%5D=report_type%3A10251&page=0
+oil <- fread("input/oils_china.csv")
 
 years <- names(Y)
 Y_new <- Y
+Y_l_new <- Y_l
 
 # correct food and other use of veg. oils for China
 i = 1
@@ -31,11 +36,17 @@ for(i in seq_along(Y)){
 
   data <- cbind(data, as.matrix(Y[[i]][,fd$area=="China, mainland"]))
   # data[, food_share_fao := `41_food` / (`41_food` + `41_other`)]
-  data[, `:=`(food = `41_food`, other = `41_other`)]
-  data[!is.na(food_share), `:=`(food = round((`41_food` + `41_other`) * food_share),
-              other = round((`41_food` + `41_other`) * (1-food_share)))]
+  # data[, `:=`(food = `41_food`, other = `41_other`)]
+  data[!is.na(food_share), `:=`(food = round((food + other) * food_share),
+              other = round((food + other) * (1-food_share)))]
   Y_new[[i]][, fd$area_code==41 & fd$fd=="food"] <- data$food
   Y_new[[i]][, fd$area_code==41 & fd$fd=="other"] <- data$other
+
+  data_l <- cbind(data, as.matrix(Y_l[[i]][,fd_l$area=="China, mainland"]))
+  data_l[!is.na(food_share), `:=`(food = round((food + other) * food_share),
+                                other = round((food + other) * (1-food_share)))]
+  Y_l_new[[i]][, fd_l$area_code==41 & fd_l$fd=="food"] <- data_l$food
+  Y_l_new[[i]][, fd_l$area_code==41 & fd_l$fd=="other"] <- data_l$other
 }
 
 # compare old and new values
@@ -51,12 +62,6 @@ for(i in seq_along(Y)){
 }
 
 saveRDS(Y_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/Y.rds")
+saveRDS(Y_l_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
 
 
-
-# create the losses version of fabio ---
-
-for(i in seq_along(Y_new)){
-  Y_new[[i]][, grepl("losses", colnames(Y_new[[i]]))] <- 0
-}
-saveRDS(Y_new, "/mnt/nfs_fineprint/tmp/fabio/v1.2/losses/Y.rds")
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # FABIO v1.2
 
 ### The Food and Agriculture Biomass Input-Output database
-FABIO provides a set of multi-regional physical supply-use and input-output tables covering global agriculture and forestry. The work is based on mostly freely available data from FAOSTAT, IEA, EIA, and UN Comtrade/BACI. FABIO currently covers 191 countries + RoW, 117 processes and 123 commodities for 1986-2020.
+FABIO provides a set of multi-regional physical supply-use and input-output tables covering global agriculture and forestry. The work is based on mostly freely available data from FAOSTAT, IEA, EIA, and UN Comtrade/BACI. FABIO currently covers 191 countries + RoW, 119 processes and 123 commodities for 1986-2020.
 
 This repository provides all codes used to generate the FABIO database.
 

diff --git a/inst/items_supply-shares.csv b/inst/items_supply-shares.csv
@@ -2,11 +2,7 @@ proc_code,proc,comm_code,item_code,item,source,base_code,base
 p077,Ricebran Oil extraction,c090,2598,"Oilseed Cakes, Other",supply,2581,Ricebran Oil
 p078,Maize Germ Oil extraction,c090,2598,"Oilseed Cakes, Other",supply,2582,Maize Germ Oil
 p079,"Oilcrops Oil extraction, Other",c090,2598,"Oilseed Cakes, Other",supply,2586,"Oilcrops Oil, Other"
-p099,Dairy cattle husbandry,c111,2740,"Butter, Ghee",live,882,"Milk, whole fresh cow"
-p100,Dairy buffaloes husbandry,c111,2740,"Butter, Ghee",live,951,"Milk, whole fresh buffalo"
-p101,Dairy sheep husbandry,c111,2740,"Butter, Ghee",live,982,"Milk, whole fresh sheep"
-p102,Dairy goats husbandry,c111,2740,"Butter, Ghee",live,1020,"Milk, whole fresh goat"
-p103,Dairy camels husbandry,c111,2740,"Butter, Ghee",live,1130,"Milk, whole fresh camel"
+p121,Butter production,c111,2740,"Butter, Ghee",live,2740,"Butter, Ghee"
 p099,Dairy cattle husbandry,c110,2848,Milk - Excluding Butter,live,882,"Milk, whole fresh cow"
 p100,Dairy buffaloes husbandry,c110,2848,Milk - Excluding Butter,live,951,"Milk, whole fresh buffalo"
 p101,Dairy sheep husbandry,c110,2848,Milk - Excluding Butter,live,982,"Milk, whole fresh sheep"