still a bug in the extraction data feed regarding aggregated mother r…

…egions. As a next step testing different options with aggregation vectors.
fineprint-global · Mar 6, 2020 · a025664 · a025664
1 parent ad170c5
commit a025664
Show file tree

Hide file tree

Showing 13 changed files with 513 additions and 83 deletions.
diff --git a/ConcordanceLibrary/BACI_Sec_Concordance.csv b/ConcordanceLibrary/BACI_Sec_Concordance.csv
diff --git a/ConcordanceLibrary/EoLPauliuk_Sec_Concordance.csv b/ConcordanceLibrary/EoLPauliuk_Sec_Concordance.csv
@@ -0,0 +1,2 @@
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1
diff --git a/ConcordanceLibrary/IRPextraction_Sec_Concordance.csv b/ConcordanceLibrary/IRPextraction_Sec_Concordance.csv
@@ -0,0 +1,2 @@
+1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
diff --git a/ConcordanceLibrary/Sector Aggregators/20IndV1_SectorAggregatorIndustries.csv b/ConcordanceLibrary/Sector Aggregators/20IndV1_SectorAggregatorIndustries.csv
@@ -2,7 +2,7 @@
 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_BACI.R b/Rscripts/datafeeds_code/datafeed_PIOLab_BACI.R
@@ -27,45 +27,87 @@ data <- select(data,-value)
 
 # Set variables
 reg_max <- nrow(root$region)
-n_yea <- "1"
+n_yea <- as.character(year-2007)
 n_she <- "1"
+n_pro <- nrow(root$product)
+n_ind <- nrow(root$industry)
 
 # Create empty ALANG table with header
 source(paste0(path$Subroutines,"/makeALANGheadline.R"))
 
-reg <- list("From" = list("Name" = root$region$Name[data$From], 
-                          "Num" = as.character(data$From)),
-            "To" = list("Name" = root$region$Name[data$To],
-                        "Num" = as.character(data$To)))
-
-prod <- list("Name" = root$product$Name[data$Product],
-             "Num" = as.character(data$Product))
+# Check if folder with processed data exists, in case delete and create empty one
+path_set <- paste0(path$root,"ProcessedData/",datafeed_name)
+if(dir.exists(path_set)) unlink(path_set,recursive = TRUE) 
+dir.create(path_set)
 
-value <- list("Quantity" = as.character(round(data$Quantity,digits = 2)),
-              "SE" = as.character(round(data$SE,digits = 2)))
+mat <- matrix(0,nrow = n_pro,ncol = 1) # Empty matrix to put numbers in
 
-# Set length of ALANG file
-ALANG_new <- as.data.frame(matrix(0,nrow = nrow(data),ncol = ncol(ALANG)))
-colnames(ALANG_new) <- colnames(ALANG)
-ALANG <- ALANG_new
-remove(ALANG_new)
+a <- 1 # Set starting value for alang line index
+for(i in unique(data$From)) # Loop over the exporting regions
+{
+  data_sel <- filter(data,From == i)  # Filter exporting region
+
+  for(j in unique(data_sel$To)) # Loop over trade partners
+  {
+    # Add empty line with tag
+    ALANG <- add_row(ALANG,'1' = paste0("DataFeed BACI from ",root$region$Name[i]," to ",root$region$Name[j]))
+
+    trader_sel <- filter(data_sel,To == j) %>% select(-From,-To) # select data for trade partners
+
+    values <- mat # Create empty column vector 
+    values[trader_sel$Product,1] <- trader_sel$Quantity  # Write values
+
+    filename_value <- paste0("BACI_",year,"_Values_",root$region$Name[i],"-",
+                       root$region$Name[j],".csv")      # Set name of the file
+
+    write.table(values,row.names = FALSE,col.names = FALSE, sep = ",",
+                file = paste0(path_set,"/",filename_value))   # Write array to folder
+
+    SE <- mat # Create empty column vector and write SE
+
+    SE[trader_sel$Product,1] <- trader_sel$SE # Write SE numbers into array
+
+    filename_SE <- paste0("BACI_",year,"_SE_",root$region$Name[i],"-",
+                       root$region$Name[j],".csv")      # Set name of the file
+
+    write.table(SE,row.names = FALSE,col.names = FALSE, sep = ",",
+                file = paste0(path_set,"/",filename_SE))   # Write array to folder
+
+    ALANG$Value[a] <- paste0("DATAPATH/",filename_value)
+    ALANG$S.E.[a] <- paste0("DATAPATH/",filename_SE)
+
+    ALANG$`Row parent`[a] <- as.character(i)
+    ALANG$`Column parent`[a] <- as.character(j)
+
+    a <- a + 1  # Increase ALANG line index
+  }
+}
 
 # Add commands
-ALANG$`1` <- paste0("DataFeed BACI product ",prod$Num," from ",reg$From$Name," to ",reg$To$Name)
-ALANG$Value <- value$Quantity
-ALANG$S.E. <- value$SE
+ALANG$`Pre-map` <- ""
+ALANG$`Post-map` <- ""
+ALANG$`Pre-Map` <- ""
+ALANG$`Post-Map` <- ""
 ALANG$Coef1 <- "1"
 ALANG$Incl <- "Y"
 ALANG$`#` <- as.character(1:nrow(ALANG))
 ALANG$Parts <- "1"
 ALANG$Years <- n_yea
 ALANG$Margin <- n_she
-
-ALANG$`Row parent` <- reg$From$Num
 ALANG$`Row child` <- "2"
-ALANG$`Row grandchild` <- prod$Num
-ALANG$`Column parent` <- reg$To$Num
+ALANG$`Row grandchild` <- "1:e"
 ALANG$`Column child` <- "1"
+
+# # Create and write sector concordance to file
+# Concord <- matrix(1,nrow = n_pro,ncol = n_ind) # block matrix required for aggregation
+# 
+# # Set name and path to concordance and write to folder
+# Concorda_name <- "BACI_Sec_Concordance"
+# Concord_path <- paste0(path$Concordance,"/",Concorda_name,".csv")
+# write.table(Concord,file = Concord_path,row.names = FALSE,col.names = FALSE,sep = ",")
+
+# Add path to concordance to ALANG commands
+#ALANG$`Column grandchild` <- paste0("1-e t2 CONCPATH/",Concorda_name,".csv")
 ALANG$`Column grandchild` <- "1-e"
 
 # Call script that writes the ALANG file to the repsective folder in the root

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_IRPexports.R b/Rscripts/datafeeds_code/datafeed_PIOLab_IRPexports.R
@@ -1,7 +1,5 @@
 ################################################################################
-# datafeed_PIOLab_IRPexports
-#
-#
+
 datafeed_name <- "IRPexports"
 print(paste0("datafeed_PIOLab_",datafeed_name," initiated."))
 

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_IRPextraction.R b/Rscripts/datafeeds_code/datafeed_PIOLab_IRPextraction.R
@@ -22,12 +22,17 @@ source(paste0(path$Subroutines,"/SE_LogRegression.R"))
 RSE <- filter(read.xlsx(path$RSE_settings),Item == datafeed_name)
 data <- SE_LogRegression(data,RSE$Minimum,RSE$Maximum)
 
-# Create identity matrix to be used as concordance when doing t command 
-Concord <- diag(length(root$region$Code))
-
 # Create empty ALANG table with header
 source(paste0(path$Subroutines,"/makeALANGheadline.R"))
-# Extend table with additional columns
+
+# Check if folder with processed data exists, in case delete and create empty one
+path_set <- paste0(path$root,"ProcessedData/",datafeed_name)
+if(dir.exists(path_set)) unlink(path_set,recursive = TRUE) 
+dir.create(path_set)
+
+
+################################################################################
+# Add lines for regions with non-zero data
 
 for(i in 1:nrow(data))
 { 
@@ -36,48 +41,74 @@ for(i in 1:nrow(data))
   reg_name <- as.character(root$region$Name[reg_num])
   reg_num <- as.character(reg_num)
 
+  # Write data to processed folder
+  # First, value RHS
+  export_value <- matrix(c(data$Quantity[i],0),nrow = 2,ncol = 1)
+  filename_value <- paste0(datafeed_name,"_Value_",year,"_",reg_name,".csv")
+  write.table(export_value,row.names = FALSE,col.names = FALSE,sep = ",",
+              file = paste0(path_set,"/",filename_value))  
+
+  # Second, standard errors
+  export_SE <- matrix(c(data$SE[i],0),nrow = 2,ncol = 1)
+  filename_SE <- paste0(datafeed_name,"_SE_",year,"_",reg_name,".csv")
+  write.table(export_SE,row.names = FALSE,col.names = FALSE,sep = ",",
+              file = paste0(path_set,"/",filename_SE))  
+
   # Read extraction value
-  value <- paste0("[",data$Quantity[i],";","NaN","]")
+  value <- paste0("DATAPATH/",datafeed_name,"/",filename_value)
   # Set SE
-  SE <-  paste0("[",as.character(data$SE[i]),";","NaN","]") 
+  SE <-  paste0("DATAPATH/",datafeed_name,"/",filename_SE) 
 
   # Add command for domestic Use table
   ALANG <- add_row(ALANG,'1' = paste0("DataFeed IRP Extraction ",reg_name),
                    Value = value,'Row parent' = reg_num,'Column parent' = reg_num,S.E. = SE)
 }
 
-# Add NaN to all other regions
-for(i in setdiff(root$region$Code,data$Code))
+################################################################################
+# Add zero for all other regions
+
+AllOtherRegions <- setdiff(root$region$Code,data$Code)
+
+export <- matrix(0,nrow = 2,ncol = 1) # set values
+filename <- paste0(datafeed_name,"_Value&SE_",year,"_AllOtherRegions.csv")
+SE <- value <- paste0("DATAPATH/",datafeed_name,"/",filename)
+
+# Write values/SE to file
+write.table(export,row.names = FALSE,col.names = FALSE,sep = ",",
+            file = paste0(path_set,"/",filename))
+
+for(i in AllOtherRegions)
 { 
   # Get root_code of region 
-  reg_num <- root$region$Code[i]
+  reg_num <- i
   reg_name <- as.character(root$region$Name[reg_num])
   reg_num <- as.character(reg_num)
 
-  # Read extraction value
-  value <- SE <- paste0("[NaN;NaN]")
-
   # Add command for domestic Use table
   ALANG <- add_row(ALANG,'1' = paste0("DataFeed IRP Extraction ",reg_name),
                    Value = value,'Row parent' = reg_num,'Column parent' = reg_num,S.E. = SE)
 }
 
-# Add other variables
+################################################################################
 
 # Create industry concordance 
 max_ind <- length(root$industry$Code)
 Concord <-  matrix(0,nrow = 2, ncol = max_ind)
 Concord[1,1:4] <- 1
 Concord[2,5:max_ind] <- 1
 
-# Write to folder
-Concord_path <- paste0(path$Concordance,"/IRPextraction_Sec_Concordance.csv")
+# Set name and path to concordance and write to folder
+Concorda_name <- "IRPextraction_Sec_Concordance"
+Concord_path <- paste0(path$Concordance,"/",Concorda_name,".csv")
 write.table(Concord,file = Concord_path,row.names = FALSE,col.names = FALSE,sep = ",")
 
+# Add path to concordance to ALANG commands
+ALANG$`Column grandchild` <- paste0("1:e a CONCPATH/",Concorda_name,".csv")
+
+# Add other variables for regions with data
 ALANG$`Row child` <- "3"
 ALANG$`Row grandchild` <- "1"
 ALANG$`Column child` <- "1"
-ALANG$`Column grandchild` <- "1:e t1 CONCPATH/IRPextraction_Sec_Concordance.csv"
 ALANG$`#` <- as.character(1:nrow(ALANG))
 ALANG$Incl <- "Y"
 ALANG$Parts <- "1"

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_IRPextractionHARD.R b/Rscripts/datafeeds_code/datafeed_PIOLab_IRPextractionHARD.R
@@ -17,33 +17,91 @@ source(paste0(root_folder,"Rscripts/Subroutines/InitializationR.R"))
 # Loading raw data
 source(paste0(path$Subroutines,"/Read_ExtractionIRP.R"))
 
-# Loading function for estimating SE with linear regression
-source(paste0(path$Subroutines,"/SE_LogRegression.R"))
-
 # Create empty ALANG table with header
 source(paste0(path$Subroutines,"/makeALANGheadline.R"))
 # Extend table with additional columns
 
+# Check if folder with processed data exists, in case delete and create empty one
+path_set <- paste0(path$root,"ProcessedData/",datafeed_name)
+if(dir.exists(path_set)) unlink(path_set,recursive = TRUE) 
+dir.create(path_set)
+
+################################################################################
+# Add lines for regions with known data
+
+# First, set and write standard errors to file
+export_SE <- matrix(c(0,0),nrow = 2,ncol = 1)
+filename_SE <- paste0(datafeed_name,"_SE_",year,".csv")
+write.table(export_SE,row.names = FALSE,col.names = FALSE,sep = ",",
+            file = paste0(path_set,"/",filename_SE))  
+# Set SE path
+SE <-  paste0("DATAPATH/",datafeed_name,"/",filename_SE) 
+
 for(i in 1:nrow(data))
 { 
-  # Get root_code of region 
   reg_num <- data$Code[i]
   reg_name <- as.character(root$region$Name[reg_num])
   reg_num <- as.character(reg_num)
+
+  # Write value to processed folder
+  export_value <- matrix(c(data$Quantity[i],0),nrow = 2,ncol = 1)
+  filename_value <- paste0(datafeed_name,"_Value_",year,"_",reg_name,".csv")
+  write.table(export_value,row.names = FALSE,col.names = FALSE,sep = ",",
+              file = paste0(path_set,"/",filename_value))  
+
   # Read extraction value
-  value <- as.character(data$Quantity[i])
+  value <- paste0("DATAPATH/",datafeed_name,"/",filename_value)
 
   # Add command for domestic Use table
   ALANG <- add_row(ALANG,'1' = paste0("DataFeed IRP Extraction ",reg_name),
-                   Value = value,'Row parent' = reg_num,'Column parent' = reg_num)
+                   Value = value,'Row parent' = reg_num,'Column parent' = reg_num,S.E. = SE)
 }
-# Add other variables
 
-ALANG$S.E. <- "0"
-ALANG$`Column child` <- "1"
-ALANG$`Column grandchild` <- "1-5"
+################################################################################
+# Add NaN for all other regions
+
+AllOtherRegions <- setdiff(root$region$Code,data$Code)
+
+export <- matrix("NaN",nrow = 2,ncol = 1) # set values
+filename <- paste0(datafeed_name,"_Value&SE_",year,"_AllOtherRegions.csv")
+SE <- value <- paste0("DATAPATH/",datafeed_name,"/",filename)
+
+# Write values/SE to file
+write.table(export,row.names = FALSE,col.names = FALSE,sep = ",",
+            file = paste0(path_set,"/",filename))
+
+for(i in AllOtherRegions)
+{ 
+  # Get root_code of region 
+  reg_num <- i
+  reg_name <- as.character(root$region$Name[reg_num])
+  reg_num <- as.character(reg_num)
+
+  # Add command for domestic Use table
+  ALANG <- add_row(ALANG,'1' = paste0("DataFeed IRP Extraction ",reg_name),
+                   Value = value,'Row parent' = reg_num,'Column parent' = reg_num,S.E. = SE)
+}
+
+################################################################################
+
+# Create industry concordance 
+max_ind <- length(root$industry$Code)
+Concord <-  matrix(0,nrow = 2, ncol = max_ind)
+Concord[1,1:4] <- 1
+Concord[2,5:max_ind] <- 1
+
+# Set name and path to concordance and write to folder
+Concorda_name <- "IRPextraction_Sec_Concordance"
+Concord_path <- paste0(path$Concordance,"/",Concorda_name,".csv")
+write.table(Concord,file = Concord_path,row.names = FALSE,col.names = FALSE,sep = ",")
+
+# Add path to concordance to ALANG commands
+ALANG$`Column grandchild` <- paste0("1:e t2 CONCPATH/",Concorda_name,".csv")
+
+# Add other variables for regions with data
 ALANG$`Row child` <- "3"
 ALANG$`Row grandchild` <- "1"
+ALANG$`Column child` <- "1"
 ALANG$`#` <- as.character(1:nrow(ALANG))
 ALANG$Incl <- "Y"
 ALANG$Parts <- "1"
@@ -54,7 +112,7 @@ ALANG$`Post-Map` <- ""
 ALANG$Years <- "1"
 ALANG$Margin <- "1"
 ALANG$Coef1 <- "1"
-  
+
 # Call script that writes the ALANG file to the repsective folder in the root
 source(paste0(path$root,"Rscripts/datafeeds_code/datafeed_subroutines/WriteALANG2Folder.R"))
 

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_IRPimports.R b/Rscripts/datafeeds_code/datafeed_PIOLab_IRPimports.R
@@ -82,7 +82,7 @@ for(i in 1:nrow(data))
 
 ALANG$`Row child` <- "2"
 ALANG$`Row grandchild` <- "1-e"
-ALANG$`Column child` <- "1"
+ALANG$`Column child` <- "[1,3]"
 ALANG$`Column grandchild` <- "1-e"
 ALANG$`#` <- as.character(1:nrow(ALANG))
 ALANG$Incl <- "Y"

diff --git a/Rscripts/datafeeds_code/datafeed_PIOLab_KrausmannTotalsEoL.R b/Rscripts/datafeeds_code/datafeed_PIOLab_KrausmannTotalsEoL.R
@@ -41,7 +41,7 @@ ALANG$`Row grandchild` <- "2"
 
 ALANG$`Column parent` <- "1-e"
 ALANG$`Column child` <- "1"
-ALANG$`Column grandchild` <- "64-65"
+ALANG$`Column grandchild` <- "1-e"
 
 ALANG$`#` <- as.character(1:nrow(ALANG))
 ALANG$Incl <- "Y"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
		0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1