From 80aa7be1c2d3952f6caf18d878dc600d445c6069 Mon Sep 17 00:00:00 2001
From: DavZim <david-zimmermann@hotmail.de>
Date: Fri, 6 Dec 2024 13:40:43 +0000
Subject: [PATCH 1/3] fix sharedString deletion

---
 R/wrappers.R                   |  65 +++++++++++-------
 tests/testthat/test-wrappers.R | 117 +++++++++++++++++++++++++++++++--
 2 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/R/wrappers.R b/R/wrappers.R
index e2d323b9..2d2aa4b4 100644
--- a/R/wrappers.R
+++ b/R/wrappers.R
@@ -1900,45 +1900,66 @@ deleteDataColumn <- function(wb, sheet, col) {
   }
 
   a <- wb$worksheets[[sheet]]$sheet_data
+  
+  # t: if a shared string is used or if the string is a value itself
+  # v: the shared string index or the string itself (if t == 0)
+  # in wb$sharedStrings we find the values of the shared strings by index + 1 (0 indexed!)
+  
 
   # check which elements to delete
   keep <- a$cols != col
   # if there is no column to delete, exit early
   if (all(keep)) return(invisible(0))
 
-  # delete cols in cols "col" move higher cols one down
+  # delete cols in cols "col", move higher cols one down
   a$cols <- as.integer(a$cols[keep] - 1 * (a$cols[keep] > col))
   a$rows <- a$rows[keep]
 
   # reduce the shared strings pointers if they are not used anymore
-  has_t <- !is.na(a$t) & a$t == 1
-  used_shared <- a$v[has_t] # a reference to all shared strings
-  keep_t <- keep[has_t] # these shared strings are kept
-  keep_t[is.na(keep_t)] <- FALSE
-  keep_shared <- used_shared[keep_t]
-  rem_shared <- setdiff(unique(used_shared[!keep_t]), unique(keep_shared))
-  for (v in rem_shared) {
-    to_reduce <- as.numeric(keep_shared) > as.numeric(v)
-    to_reduce[is.na(to_reduce)] <- FALSE
-    if (any(to_reduce))
-      keep_shared[to_reduce] <- as.character(as.numeric(keep_shared[to_reduce]) - 1)
-  }
-  used_shared[keep_t] <- keep_shared
-  a$v[has_t] <- used_shared
-
+  ss <- data.frame(
+    # the old index 0 indexed, as used in a$v
+    old = as.numeric(seq(length(wb$sharedStrings)) - 1),
+    # will hold the new index 0 indexed, as used in a$v
+    new = NA,
+    # the actual strings
+    string = wb$sharedStrings
+  )
+  
+  # 1. remove the values from sheet_data (a)
   a$v <- a$v[keep]
   a$t <- a$t[keep]
-
+  
+  # update the shared strings map (ss) with the new indices
+  
+  # v_this_sheet etc are the indices that are still used
+  v_this_sheet <- as.numeric(a$v[!is.na(a$t) & a$t == 1])
+  # get all string indices from other sheets, so that strings used in other sheets are not deleted!
+  v_other_sheets <- unlist(lapply(setdiff(seq_along(wb$worksheets), sheet), function(sh) {
+    a <- wb$worksheets[[sh]]$sheet_data
+    as.numeric(a$v[!is.na(a$t) & a$t == 1])
+  }))
+  
+  idx <- sort(unique(c(v_this_sheet, v_other_sheets)))
+  ss$new[ss$old %in% idx] <- seq_along(idx) - 1
+  
+  # 2. remove the values from the sharedStrings object
+  wb$sharedStrings <- wb$sharedStrings[idx + 1]
+  attr(wb$sharedStrings, "uniqueCount") <- length(idx)
+  
+  # 3. reindex the values from the sheet_data to use new shared strings indices
+  a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])
+  
+  # update the shared strings for all other sheets
+  for (s in setdiff(seq_along(wb$worksheets), sheet)) {
+    a <- wb$worksheets[[s]]$sheet_data
+    a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])
+  }
+  
   a$f <- updateFormula(a$f[keep], col = col)
   a$n_elements <- sum(keep)
 
   if ("data_count" %in% names(a)) a$data_count <- length(unique(a$v))
 
-  # remove the unneeded strings from sharedStrings
-  rv <- as.numeric(rem_shared) + 1
-  wb$sharedStrings <- wb$sharedStrings[-rv]
-  attr(wb$sharedStrings, "uniqueCount") <- length(unique(wb$sharedStrings))
-
   # adjust styles
   sheet_name <- wb$sheet_names[[sheet]]
   this_sheet <- sapply(wb$styleObjects, function(o) {
diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
index 146c5411..20f54faf 100644
--- a/tests/testthat/test-wrappers.R
+++ b/tests/testthat/test-wrappers.R
@@ -17,7 +17,7 @@ test_that("int2col and col2int", {
 test_that("deleteDataColumn basics", {
   wb <- createWorkbook()
   addWorksheet(wb, "tester")
-
+  
   for (i in seq(5)) {
     mat <- data.frame(x = rep(paste0(int2col(i), i), 10))
     writeData(wb, sheet = 1, startRow = 1, startCol = i, mat)
@@ -29,8 +29,8 @@ test_that("deleteDataColumn basics", {
     c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", "<f>=COUNTA(C2:C11)</f>", 
       "<f>=COUNTA(D2:D11)</f>", "<f>=COUNTA(E2:E11)</f>")
   )
-
-
+  
+  
   deleteDataColumn(wb, 1, col = 3)
   expect_equal(read.xlsx(wb),
                data.frame(x = rep("A1", 10), x = "B2", x = "D4", x = "E5", # no C3!
@@ -130,7 +130,7 @@ test_that("deleteDataColumn with formatting data", {
   
   st <- openxlsx::createStyle(textDecoration = "Bold", fontSize = 20, fontColour = "red")
   openxlsx::addStyle(wb, 1, style = st, rows = 1, cols = seq(ncol(df)))
-
+  
   sst <- wb$styleObjects[[1]]
   sst$rows <- c(1, 1)
   sst$cols <- c(1, 2)
@@ -140,3 +140,112 @@ test_that("deleteDataColumn with formatting data", {
   expect_equal(wb$styleObjects[[1]],
                sst)
 })
+
+test_that("deleteDataColumn with shared strings does not crash or change inputs", {
+  df <- data.frame("Col 1" = "Row 2 Col 1",
+                   "Col 2" = "Row 2 Col 2",
+                   "Col 3" = "Row 2 Col 3",
+                   check.names = FALSE)
+  
+  wb <- createWorkbook()
+  addWorksheet(wb, "tester")
+  writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
+  
+  deleteDataColumn(wb, sheet =  1, col =  2)
+  
+  expect_equal(
+    wb$sharedStrings,
+    structure(
+      list(
+        "<si><t xml:space=\"preserve\">Col 1</t></si>",
+        "<si><t xml:space=\"preserve\">Col 3</t></si>",
+        "<si><t xml:space=\"preserve\">Row 2 Col 1</t></si>",
+        "<si><t xml:space=\"preserve\">Row 2 Col 3</t></si>"
+      ),
+      uniqueCount = 4L
+    )
+  )
+  expect_equal(
+    read.xlsx(wb),
+    data.frame(
+      "Col 1" = "Row 2 Col 1",
+      "Col 3" = "Row 2 Col 3"
+    )
+  )
+})
+
+
+test_that("deleteDataColumn with shared strings on other sheets", {
+  df <- data.frame("ABC" = "I am a shared string with sheet 2!")
+  df2 <- data.frame("AB" = "I am a shared string with sheet 2!")
+  
+  wb <- createWorkbook()
+  addWorksheet(wb, "tester")
+  writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
+  
+  simplify <- function(sd) data.frame(rows = sd$rows, cols = sd$cols, t = sd$t, v = sd$v)
+  expect_equal(
+    simplify(wb$worksheets[[1]]$sheet_data),
+    data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("0", "1"))
+  )
+  expect_equal(
+    wb$sharedStrings,
+    structure(
+      list(
+        "<si><t xml:space=\"preserve\">ABC</t></si>",
+        "<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>"
+      ),
+      uniqueCount = 2L
+    )
+  )
+  
+  addWorksheet(wb, "tester2")
+  writeData(wb, sheet = 2, startRow = 1, startCol = 1, x = df2, colNames = TRUE)
+  
+  expect_equal(
+    simplify(wb$worksheets[[2]]$sheet_data),
+    data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("2", "1"))
+  )
+  expect_equal(
+    wb$sharedStrings,
+    structure(
+      list(
+        "<si><t xml:space=\"preserve\">ABC</t></si>",
+        "<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
+        "<si><t xml:space=\"preserve\">AB</t></si>"
+      ),
+      uniqueCount = 3L
+    )
+  )
+  
+  
+  # deleting from sheet 1 does not delete the string from sheet 2!
+  deleteDataColumn(wb, sheet =  1, col =  1)
+  
+  expect_equal(
+    simplify(wb$worksheets[[1]]$sheet_data),
+    data.frame(rows = numeric(0), cols = numeric(0), t = numeric(0), v = character(0))
+  )
+  
+  # note on sheet 2, the indices v to the shared strings have to change as well!
+  expect_equal(
+    simplify(wb$worksheets[[2]]$sheet_data),
+    data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("1", "0"))
+  )
+  
+  expect_equal(
+    wb$sharedStrings,
+    structure(
+      list(
+        "<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
+        "<si><t xml:space=\"preserve\">AB</t></si>"
+      ),
+      uniqueCount = 2L
+    )
+  )
+  
+  expect_equal(
+    read.xlsx(wb, sheet = 2),
+    data.frame(AB = "I am a shared string with sheet 2!")
+  )
+})

From a722578b400b92ed1dc46e075baff4744078a6e9 Mon Sep 17 00:00:00 2001
From: DavZim <david-zimmermann@hotmail.de>
Date: Fri, 6 Dec 2024 14:00:56 +0000
Subject: [PATCH 2/3] add safeguard against workbooks without any shared
 strings

---
 R/wrappers.R | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/R/wrappers.R b/R/wrappers.R
index 2d2aa4b4..81914479 100644
--- a/R/wrappers.R
+++ b/R/wrappers.R
@@ -1916,14 +1916,17 @@ deleteDataColumn <- function(wb, sheet, col) {
   a$rows <- a$rows[keep]
 
   # reduce the shared strings pointers if they are not used anymore
-  ss <- data.frame(
-    # the old index 0 indexed, as used in a$v
-    old = as.numeric(seq(length(wb$sharedStrings)) - 1),
-    # will hold the new index 0 indexed, as used in a$v
-    new = NA,
-    # the actual strings
-    string = wb$sharedStrings
-  )
+  ss <- data.frame(old = numeric(0), new = numeric(0), string = character(0))
+  if (length(wb$sharedStrings) > 0) {
+    ss <- data.frame(
+      # the old index 0 indexed, as used in a$v
+      old = as.numeric(seq(length(wb$sharedStrings)) - 1),
+      # will hold the new index 0 indexed, as used in a$v
+      new = NA,
+      # the actual strings
+      string = wb$sharedStrings
+    )
+  }
   
   # 1. remove the values from sheet_data (a)
   a$v <- a$v[keep]

From d438e0636b3d8d8bb2555c88ebca8b6a698a44c7 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@rub.de>
Date: Wed, 15 Jan 2025 18:30:24 +0100
Subject: [PATCH 3/3] cleanup whitespaces in test

---
 tests/testthat/test-wrappers.R | 72 +++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
index 20f54faf..bd66c0d2 100644
--- a/tests/testthat/test-wrappers.R
+++ b/tests/testthat/test-wrappers.R
@@ -2,22 +2,22 @@
 context("Test wrappers")
 
 test_that("int2col and col2int", {
-  
+
   nums <- 2:27
-  
+
   chrs <- c("B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",  "N",
             "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA")
-  
+
   expect_equal(chrs, int2col(nums))
   expect_equal(nums, col2int(chrs))
-  
+
 })
 
 
 test_that("deleteDataColumn basics", {
   wb <- createWorkbook()
   addWorksheet(wb, "tester")
-  
+
   for (i in seq(5)) {
     mat <- data.frame(x = rep(paste0(int2col(i), i), 10))
     writeData(wb, sheet = 1, startRow = 1, startCol = i, mat)
@@ -29,8 +29,8 @@ test_that("deleteDataColumn basics", {
     c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", "<f>=COUNTA(C2:C11)</f>", 
       "<f>=COUNTA(D2:D11)</f>", "<f>=COUNTA(E2:E11)</f>")
   )
-  
-  
+
+
   deleteDataColumn(wb, 1, col = 3)
   expect_equal(read.xlsx(wb),
                data.frame(x = rep("A1", 10), x = "B2", x = "D4", x = "E5", # no C3!
@@ -40,8 +40,8 @@ test_that("deleteDataColumn basics", {
     c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", 
       "<f>=COUNTA(C2:C11)</f>", "<f>=COUNTA(D2:D11)</f>")
   )
-  
-  
+
+
   deleteDataColumn(wb, 1, col = 2)
   expect_equal(read.xlsx(wb),
                data.frame(x = rep("A1", 10), x = "D4", x = "E5", # no B2!
@@ -50,7 +50,7 @@ test_that("deleteDataColumn basics", {
     setdiff(wb$worksheets[[1]]$sheet_data$f, NA),
     c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", "<f>=COUNTA(C2:C11)</f>")
   )
-  
+
   deleteDataColumn(wb, 1, col = 1)
   expect_equal(read.xlsx(wb),
                data.frame(x = rep("D4", 10), x = "E5", # no A1!
@@ -68,22 +68,22 @@ test_that("deleteDataColumn with more complicated formulae", {
   addWorksheet(wb, "tester")
   writeData(wb, sheet = 1, startRow = 1, startCol = 1,
             x = matrix(c(1, 1), ncol = 1), colNames = FALSE)
-  
+
   for (c in 2:10)
     writeFormula(wb, 1, sprintf("%s1 + 1", int2col(c - 1)),
                  startRow = 1, startCol = c)
-  
+
   for (c in 2:10)
     writeFormula(wb, 1, sprintf("%s1 + %s2", int2col(c), int2col(c - 1)),
                  startRow = 2, startCol = c)
-  
+
   for (c in 2:10)
     writeFormula(wb, 1, sprintf("%s2 + %s2", int2col(c), int2col(c + 1)),
                  startRow = 3, startCol = c)
-  
+
   deleteDataColumn(wb, 1, 3)
   # saveWorkbook(wb, "tester.xlsx") # and inspect by hand: expect lots of #REF!
-  
+
   expect_equal(read.xlsx(wb), data.frame(`1` = 1, check.names = FALSE))
   expect_equal(
     wb$worksheets[[1]]$sheet_data$f,
@@ -103,19 +103,19 @@ test_that("deleteDataColumn with wide data", {
   colnames(df) <- int2col(seq(ncols))
   writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
   expect_equal(read.xlsx(wb), df)
-  
+
   deleteDataColumn(wb, 1, 2)
   expect_equal(read.xlsx(wb), df[, -2])
-  
+
   deleteDataColumn(wb, 1, 100)
   expect_equal(read.xlsx(wb), df[, -2])
-  
+
   deleteDataColumn(wb, 1, 55)
   expect_equal(read.xlsx(wb), df[, -c(2, 56)]) # 56 b.c. one col was already taken out
-  
+
   deleteDataColumn(wb, 1, 1)
   expect_equal(read.xlsx(wb), df[, -c(1, 2, 56)])
-  
+
   # delete all data
   for (i in seq(ncols - 2))
     deleteDataColumn(wb, 1, 1)
@@ -127,15 +127,15 @@ test_that("deleteDataColumn with formatting data", {
   addWorksheet(wb, "tester")
   df <- data.frame(x = 1:10, y = letters[1:10], z = 10:1)
   writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
-  
+
   st <- openxlsx::createStyle(textDecoration = "Bold", fontSize = 20, fontColour = "red")
   openxlsx::addStyle(wb, 1, style = st, rows = 1, cols = seq(ncol(df)))
-  
+
   sst <- wb$styleObjects[[1]]
   sst$rows <- c(1, 1)
   sst$cols <- c(1, 2)
   deleteDataColumn(wb, 1, 2)
-  
+
   expect_length(wb$styleObjects, 1)
   expect_equal(wb$styleObjects[[1]],
                sst)
@@ -146,13 +146,13 @@ test_that("deleteDataColumn with shared strings does not crash or change inputs"
                    "Col 2" = "Row 2 Col 2",
                    "Col 3" = "Row 2 Col 3",
                    check.names = FALSE)
-  
+
   wb <- createWorkbook()
   addWorksheet(wb, "tester")
   writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
-  
+
   deleteDataColumn(wb, sheet =  1, col =  2)
-  
+
   expect_equal(
     wb$sharedStrings,
     structure(
@@ -178,11 +178,11 @@ test_that("deleteDataColumn with shared strings does not crash or change inputs"
 test_that("deleteDataColumn with shared strings on other sheets", {
   df <- data.frame("ABC" = "I am a shared string with sheet 2!")
   df2 <- data.frame("AB" = "I am a shared string with sheet 2!")
-  
+
   wb <- createWorkbook()
   addWorksheet(wb, "tester")
   writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)
-  
+
   simplify <- function(sd) data.frame(rows = sd$rows, cols = sd$cols, t = sd$t, v = sd$v)
   expect_equal(
     simplify(wb$worksheets[[1]]$sheet_data),
@@ -198,10 +198,10 @@ test_that("deleteDataColumn with shared strings on other sheets", {
       uniqueCount = 2L
     )
   )
-  
+
   addWorksheet(wb, "tester2")
   writeData(wb, sheet = 2, startRow = 1, startCol = 1, x = df2, colNames = TRUE)
-  
+
   expect_equal(
     simplify(wb$worksheets[[2]]$sheet_data),
     data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("2", "1"))
@@ -217,22 +217,22 @@ test_that("deleteDataColumn with shared strings on other sheets", {
       uniqueCount = 3L
     )
   )
-  
-  
+
+
   # deleting from sheet 1 does not delete the string from sheet 2!
   deleteDataColumn(wb, sheet =  1, col =  1)
-  
+
   expect_equal(
     simplify(wb$worksheets[[1]]$sheet_data),
     data.frame(rows = numeric(0), cols = numeric(0), t = numeric(0), v = character(0))
   )
-  
+
   # note on sheet 2, the indices v to the shared strings have to change as well!
   expect_equal(
     simplify(wb$worksheets[[2]]$sheet_data),
     data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("1", "0"))
   )
-  
+
   expect_equal(
     wb$sharedStrings,
     structure(
@@ -243,7 +243,7 @@ test_that("deleteDataColumn with shared strings on other sheets", {
       uniqueCount = 2L
     )
   )
-  
+
   expect_equal(
     read.xlsx(wb, sheet = 2),
     data.frame(AB = "I am a shared string with sheet 2!")