From 4199452f97a72188b9cfa8805bea6dce572b46bb Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Fri, 10 Feb 2023 14:39:30 +0100 Subject: [PATCH 1/8] Proposed solution for issue 293 (https://github.com/OHDSI/WhiteRabbit/issues/293) --- rabbit-core/pom.xml | 9 +- whiterabbit/pom.xml | 7 ++ .../whiteRabbit/scan/SourceDataScan.java | 41 +++++++++- .../whiterabbit/scan/TestSourceDataScan.java | 82 +++++++++++++++++-- 4 files changed, 128 insertions(+), 11 deletions(-) diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml index f6c0a05d..9554827e 100644 --- a/rabbit-core/pom.xml +++ b/rabbit-core/pom.xml @@ -14,6 +14,7 @@ UTF-8 + 4.1.2 @@ -41,22 +42,22 @@ org.apache.poi poi - 4.1.2 + ${apache-poi-version} org.apache.poi poi-ooxml - 4.1.2 + ${apache-poi-version} org.apache.poi poi-excelant - 4.1.2 + ${apache-poi-version} org.apache.poi poi-ooxml-schemas - 4.1.2 + 4.1.2 diff --git a/whiterabbit/pom.xml b/whiterabbit/pom.xml index 0472b28f..f86e7321 100644 --- a/whiterabbit/pom.xml +++ b/whiterabbit/pom.xml @@ -76,6 +76,13 @@ slf4j-simple 1.7.30 + + + commons-io + commons-io + 2.11.0 + + diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java index 3bfe9637..0b17d537 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java @@ -18,6 +18,9 @@ package org.ohdsi.whiteRabbit.scan; import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.sql.ResultSet; import java.sql.SQLException; import java.time.LocalDate; @@ -29,11 +32,13 @@ import com.epam.parso.SasFileProperties; import com.epam.parso.SasFileReader; import com.epam.parso.impl.SasFileReaderImpl; +import org.apache.commons.lang.StringUtils; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.xssf.streaming.SXSSFWorkbook; +import org.apache.commons.io.FileUtils; import org.ohdsi.databases.DbType; import org.ohdsi.databases.RichConnection; import org.ohdsi.databases.RichConnection.QueryResult; @@ -70,6 +75,15 @@ public class SourceDataScan { private LocalDateTime startTimeStamp; + static final String poiTmpPath; + + static { + try { + poiTmpPath = setUniqueTempDirStrategyForApachePoi(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } public void setSampleSize(int sampleSize) { // -1 if sample size is not restricted @@ -117,6 +131,32 @@ public void process(DbSettings dbSettings, String outputFileName) { generateReport(outputFileName); } + public static String setUniqueTempDirStrategyForApachePoi() throws IOException { + // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal + final String poiFilesDir = "poifiles"; + Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + Path myTmpPath = defaultTmpPath; + if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { + // problem: someone else (?) already created the default tmp path for poi, but we cannot use it: it's readonly for us. + // solution: create our own tmp directory, taking property "org.ohdsi.whiterabbit.poi.tmpdir" into account if set. + String poiFilesDirProperty = System.getProperty("org.ohdsi.whiterabbit.poi.tmpdir"); + if (!StringUtils.isEmpty(poiFilesDirProperty)) { + // use what the user configured. + myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDirProperty, UUID.randomUUID().toString()); + } + else { + // avoid the poifiles directory entirely + myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), UUID.randomUUID().toString()); + } + File tmpDir = Files.createDirectories(myTmpPath).toFile(); + org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(tmpDir)); + } + + // return the path that is actually being used + // not needed for production code, by handy for unit testing this + return myTmpPath.toFile().getAbsolutePath(); + } + private void processDatabase(DbSettings dbSettings) { // GBQ requires database. Put database value into domain var if (dbSettings.dbType == DbType.BIGQUERY) { @@ -735,7 +775,6 @@ public void processValue(String value) { samplingReservoir.add(DateUtilities.parseDate(trimValue)); } } - } public List> getSortedValuesWithoutSmallValues() { diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java index 078454a0..b7ac043b 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java @@ -1,5 +1,6 @@ package org.ohdsi.whiterabbit.scan; +import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -20,10 +21,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.nio.file.Paths; +import java.util.*; import static org.junit.jupiter.api.Assertions.*; @@ -92,9 +91,9 @@ void testProcess(@TempDir Path tempDir) throws IOException { sourceDataScan.process(dbSettings, outFile.toString()); assertTrue(Files.exists(outFile)); - // + // // Verify the contents of the generated xlsx file - // + // FileInputStream file = new FileInputStream(new File(outFile.toUri())); ReadXlsxFileWithHeader sheet = new ReadXlsxFileWithHeader(file); @@ -125,6 +124,62 @@ void testProcess(@TempDir Path tempDir) throws IOException { expectRowNIsLike(242, data, "visit_occurrence", "visit_type_concept_id", "", "integer", "0", "55261"); } + @Test + void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException { + // intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293 + + /* + * This tests a fix that assumes that the bug referenced here occurs in a multi-user situation where the + * first user running the scan, and causing /tmp/poifiles to created, does so by creating it read-only + * for everyone else. This directory is not automatically cleaned up, so every following user on the same + * system running the scan encounters the problem that /tmp/poifiles already exists and is read-only, + * causing a crash when the Apacho poi library attemps to create the xslx file. + * + * The class SourceDataScan has been extended with a static method, called implicitly once through a static{} + * block, to create a TempDir strategy to create a + * unique directory for each instance/run of WhiteRabbit. This effectively solves the assumes error + * situation. + * + * This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a + * read-only state after the first scan, and then confirming that a second scan fails. After that, + * a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(), + * and a new scan now runs successfully. + */ + // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal + final String poiFilesDir = "poifiles"; + Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + + // attempt to resolve an already existing unworkable situation where the default tmp dir for poi files is readonly + if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { + assertTrue(defaultTmpPath.toFile().setWritable(true), + String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable", + defaultTmpPath.toFile().getAbsolutePath())); + } + + // process should pass without problem, and afterwards the default tmp dir should exist + testProcess(tempDir); + assertTrue(Files.exists(defaultTmpPath)); + + // provoke the problem situation. make the default tmp dir readonly, try to process again + assertTrue(defaultTmpPath.toFile().setReadOnly()); + RuntimeException thrown = assertThrows(RuntimeException.class, () -> { + testProcess(tempDir); + }); + assertTrue(thrown.getMessage().endsWith("Permission denied")); + + // invoke the static method to set a new tmp dir, process again (should succeed) and verify that + // the new tmpdir is indeed different from the default + String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi(); + testProcess(tempDir); + assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir); + + // we might have left behind an unworkable situation; attempt to solve that + if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { + assertTrue(defaultTmpPath.toFile().setWritable(true)); + } + + } + private boolean expectRowNIsLike(int n, List rows, String... expectedValues) { assert expectedValues.length == 6; testColumnValue(n, rows.get(n), "Table", expectedValues[0]); @@ -160,4 +215,19 @@ private DbSettings getTestDbSettings() { return dbSettings; } + + private boolean deleteDir(File file) { + if (Files.exists(file.toPath())) { + File[] contents = file.listFiles(); + if (contents != null) { + for (File f : contents) { + if (!Files.isSymbolicLink(f.toPath())) { + deleteDir(f); + } + } + } + return file.delete(); + } + return true; + } } From c2ab3ccb6ee2286476f387a3f4840840e7c25a13 Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Sat, 25 Mar 2023 08:35:21 +0100 Subject: [PATCH 2/8] Add test, comments, some small corrections --- whiterabbit/pom.xml | 6 + .../whiteRabbit/scan/SourceDataScan.java | 8 +- .../ohdsi/whiterabbit/scan/ScanTestUtils.java | 26 +++ .../whiterabbit/scan/TestSourceDataScan.java | 184 ++++++++++++++++++ .../scan/TestSourceDataScanPostgreSQL.java | 28 +-- 5 files changed, 226 insertions(+), 26 deletions(-) create mode 100644 whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java diff --git a/whiterabbit/pom.xml b/whiterabbit/pom.xml index fba5e8ee..b7bee799 100644 --- a/whiterabbit/pom.xml +++ b/whiterabbit/pom.xml @@ -66,6 +66,12 @@ pom import + + + commons-io + commons-io + 2.11.0 + diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java index 0b17d537..6c964acb 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java @@ -131,6 +131,12 @@ public void process(DbSettings dbSettings, String outputFileName) { generateReport(outputFileName); } + /* + * Implements a strategy for the mp dir to ise for files for apache poi + * Attemps to solve an issue where some users report not having write access to the poi tmp dir + * (see https://github.com/OHDSI/WhiteRabbit/issues/293). Vry likely this is caused by the poi tmp dir + * being created on a multi-user system by a user with a too restrictive file mask. + */ public static String setUniqueTempDirStrategyForApachePoi() throws IOException { // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal final String poiFilesDir = "poifiles"; @@ -152,8 +158,6 @@ public static String setUniqueTempDirStrategyForApachePoi() throws IOException { org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(tmpDir)); } - // return the path that is actually being used - // not needed for production code, by handy for unit testing this return myTmpPath.toFile().getAbsolutePath(); } diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java index c44903af..26776c48 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java @@ -1,9 +1,12 @@ package org.ohdsi.whiterabbit.scan; import org.ohdsi.databases.DbType; +import org.ohdsi.databases.RichConnection; import org.ohdsi.ooxml.ReadXlsxFileWithHeader; import org.ohdsi.utilities.files.Row; import org.ohdsi.utilities.files.RowUtilities; +import org.ohdsi.whiteRabbit.DbSettings; +import org.testcontainers.containers.PostgreSQLContainer; import java.io.File; import java.io.FileInputStream; @@ -98,4 +101,27 @@ else if (dbType == DbType.ORACLE){ throw new RuntimeException("Unsupported DBType: " + dbType); } } + + static DbSettings getTestPostgreSQLSettings(PostgreSQLContainer container) { + DbSettings dbSettings = new DbSettings(); + dbSettings.dbType = DbType.POSTGRESQL; + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + dbSettings.server = container.getJdbcUrl(); + dbSettings.database = "public"; // yes, really + dbSettings.user = container.getUsername(); + dbSettings.password = container.getPassword(); + dbSettings.tables = getTableNamesPostgreSQL(dbSettings); + + return dbSettings; + } + + static List getTableNamesPostgreSQL(DbSettings dbSettings) { + try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + return richConnection.getTableNames("public"); + } + } + + + + } diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java new file mode 100644 index 00000000..e01840e2 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java @@ -0,0 +1,184 @@ +package org.ohdsi.whiterabbit.scan; + +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.DbType; +import org.ohdsi.databases.RichConnection; +import org.ohdsi.ooxml.ReadXlsxFileWithHeader; +import org.ohdsi.utilities.files.Row; +import org.ohdsi.utilities.files.RowUtilities; +import org.ohdsi.whiteRabbit.DbSettings; +import org.ohdsi.whiteRabbit.scan.SourceDataScan; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + + +@Testcontainers +@Tag("DockerRequired") +class TestSourceDataScan { + + @Container + public static PostgreSQLContainer postgreSQL; + + static { + /* + * Since the database is only read, setting it up once suffices. + * + * Note that the init script is read locally, but accesses the CSV files from + * the resource mapped into the container. + * + * The data used in this test are actually OMOP data. One reason for this is convenience: the DDL + * for this data is know and could simply be copied instead of composed. + * Also, for the technical correctness of WhiteRabbit (does it open the database, get the table + * names and scan those tables), the actual nature of the source data does not matter. + */ + try { + postgreSQL = new PostgreSQLContainer<>("postgres:13.1") + .withUsername("test") + .withPassword("test") + .withDatabaseName("test") + .withClasspathResourceMapping( + "scan_data", + "/scan_data", + BindMode.READ_ONLY) + .withInitScript("scan_data/create_data_postgresql.sql"); + + postgreSQL.start(); + + } finally { + if (postgreSQL != null) { + postgreSQL.stop(); + } + } + } + + void testProcess(@TempDir Path tempDir) throws IOException { + Path outFile = tempDir.resolve("scanresult.xslx"); + SourceDataScan sourceDataScan = new SourceDataScan(); + DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); + + sourceDataScan.process(dbSettings, outFile.toString()); + ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType); + } + + @Test + void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException { + // intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293 + + /* + * This tests a fix that assumes that the bug referenced here occurs in a multi-user situation where the + * first user running the scan, and causing /tmp/poifiles to created, does so by creating it read-only + * for everyone else. This directory is not automatically cleaned up, so every following user on the same + * system running the scan encounters the problem that /tmp/poifiles already exists and is read-only, + * causing a crash when the Apacho poi library attemps to create the xslx file. + * + * The class SourceDataScan has been extended with a static method, called implicitly once through a static{} + * block, to create a TempDir strategy to create a + * unique directory for each instance/run of WhiteRabbit. This effectively solves the assumes error + * situation. + * + * This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a + * read-only state after the first scan, and then confirming that a second scan fails. After that, + * a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(), + * and a new scan now runs successfully. + */ + // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal + final String poiFilesDir = "poifiles"; + Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + + // attempt to resolve an already existing unworkable situation where the default tmp dir for poi files is readonly + if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { + assertTrue(defaultTmpPath.toFile().setWritable(true), + String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable", + defaultTmpPath.toFile().getAbsolutePath())); + } + + // process should pass without problem, and afterwards the default tmp dir should exist + testProcess(tempDir); + assertTrue(Files.exists(defaultTmpPath)); + + // provoke the problem situation. make the default tmp dir readonly, try to process again + assertTrue(defaultTmpPath.toFile().setReadOnly()); + RuntimeException thrown = assertThrows(RuntimeException.class, () -> { + testProcess(tempDir); + }); + assertTrue(thrown.getMessage().endsWith("Permission denied")); + + // invoke the static method to set a new tmp dir, process again (should succeed) and verify that + // the new tmpdir is indeed different from the default + String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi(); + testProcess(tempDir); + assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir); + + // we might have left behind an unworkable situation; attempt to solve that + if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { + assertTrue(defaultTmpPath.toFile().setWritable(true)); + } + + } + + private boolean expectRowNIsLike(int n, List rows, String... expectedValues) { + assert expectedValues.length == 6; + testColumnValue(n, rows.get(n), "Table", expectedValues[0]); + testColumnValue(n, rows.get(n), "Field", expectedValues[1]); + testColumnValue(n, rows.get(n), "Description", expectedValues[2]); + testColumnValue(n, rows.get(n), "Type", expectedValues[3]); + testColumnValue(n, rows.get(n), "Max length", expectedValues[4]); + testColumnValue(n, rows.get(n), "N rows", expectedValues[5]); + return true; + } + + private void testColumnValue(int i, Row row, String fieldName, String expected) { + if (!expected.equals(row.get(fieldName))) { + fail(String.format("In row %d, value '%s' was expected for column '%s', but '%s' was found", + i, expected, fieldName, row.get(fieldName))); + } + } + private List getTableNames(DbSettings dbSettings) { + try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + return richConnection.getTableNames("public"); + } + } + + private DbSettings getTestDbSettings() { + DbSettings dbSettings = new DbSettings(); + dbSettings.dbType = DbType.POSTGRESQL; + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + dbSettings.server = postgreSQL.getJdbcUrl(); + dbSettings.database = "public"; // yes, really + dbSettings.user = postgreSQL.getUsername(); + dbSettings.password = postgreSQL.getPassword(); + dbSettings.tables = getTableNames(dbSettings); + + return dbSettings; + } + + private boolean deleteDir(File file) { + if (Files.exists(file.toPath())) { + File[] contents = file.listFiles(); + if (contents != null) { + for (File f : contents) { + if (!Files.isSymbolicLink(f.toPath())) { + deleteDir(f); + } + } + } + return file.delete(); + } + return true; + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java index b81cda33..0678d308 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java @@ -2,7 +2,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.ohdsi.databases.DbType; import org.ohdsi.databases.RichConnection; import org.ohdsi.whiteRabbit.DbSettings; import org.ohdsi.whiteRabbit.scan.SourceDataScan; @@ -59,7 +58,7 @@ class TestSourceDataScanPostgreSQL { @Test public void connectToDatabase() { // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker - DbSettings dbSettings = getTestDbSettings(); + DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { // do nothing, connection will be closed automatically because RichConnection implements interface Closeable } @@ -68,36 +67,17 @@ public void connectToDatabase() { @Test public void testGetTableNames() { // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker - DbSettings dbSettings = getTestDbSettings(); - List tableNames = getTableNames(dbSettings); + DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); + List tableNames = ScanTestUtils.getTableNamesPostgreSQL(dbSettings); assertEquals(2, tableNames.size()); } @Test void testSourceDataScan(@TempDir Path tempDir) throws IOException { Path outFile = tempDir.resolve("scanresult.xslx"); SourceDataScan sourceDataScan = new SourceDataScan(); - DbSettings dbSettings = getTestDbSettings(); + DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); sourceDataScan.process(dbSettings, outFile.toString()); ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType); } - - private List getTableNames(DbSettings dbSettings) { - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { - return richConnection.getTableNames("public"); - } - } - - private DbSettings getTestDbSettings() { - DbSettings dbSettings = new DbSettings(); - dbSettings.dbType = DbType.POSTGRESQL; - dbSettings.sourceType = DbSettings.SourceType.DATABASE; - dbSettings.server = postgreSQL.getJdbcUrl(); - dbSettings.database = "public"; // always for PostgreSQL - dbSettings.user = postgreSQL.getUsername(); - dbSettings.password = postgreSQL.getPassword(); - dbSettings.tables = getTableNames(dbSettings); - - return dbSettings; - } } From 2ee18de75a88ff3ff0a24218c888794d34f3c27b Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Mon, 24 Apr 2023 10:49:56 +0200 Subject: [PATCH 3/8] Add missing dependency --- rabbit-core/pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml index 1b725e5e..ee112149 100644 --- a/rabbit-core/pom.xml +++ b/rabbit-core/pom.xml @@ -107,6 +107,11 @@ commons-logging 1.2 + + org.apache.commons + commons-compress + 1.23.0 + org.hsqldb hsqldb @@ -228,6 +233,5 @@ ant 1.10.13 - From 28631f907d1035690ae437f00396b441dfbd21de Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Mon, 8 May 2023 10:37:43 +0200 Subject: [PATCH 4/8] Better test for writable directory, refactor stratety for tmp dir, add tests --- .../whiteRabbit/scan/SourceDataScan.java | 79 ++++++++++------ .../whiterabbit/scan/TestSourceDataScan.java | 93 +++++++++++-------- 2 files changed, 106 insertions(+), 66 deletions(-) diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java index 6c964acb..d82dfa29 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java @@ -21,6 +21,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.rmi.RemoteException; import java.sql.ResultSet; import java.sql.SQLException; import java.time.LocalDate; @@ -54,10 +55,13 @@ public class SourceDataScan { - public static int MAX_VALUES_IN_MEMORY = 100000; - public static int MIN_CELL_COUNT_FOR_CSV = 1000000; - public static int N_FOR_FREE_TEXT_CHECK = 1000; - public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100; + public static int MAX_VALUES_IN_MEMORY = 100000; + public static int MIN_CELL_COUNT_FOR_CSV = 1000000; + public static int N_FOR_FREE_TEXT_CHECK = 1000; + public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100; + + public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR"; + public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir"; private SXSSFWorkbook workbook; private char delimiter = ','; @@ -132,35 +136,59 @@ public void process(DbSettings dbSettings, String outputFileName) { } /* - * Implements a strategy for the mp dir to ise for files for apache poi - * Attemps to solve an issue where some users report not having write access to the poi tmp dir + * Implements a strategy for the tmp dir to ise for files for apache poi + * Attempts to solve an issue where some users report not having write access to the poi tmp dir * (see https://github.com/OHDSI/WhiteRabbit/issues/293). Vry likely this is caused by the poi tmp dir * being created on a multi-user system by a user with a too restrictive file mask. */ public static String setUniqueTempDirStrategyForApachePoi() throws IOException { // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal final String poiFilesDir = "poifiles"; - Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); - Path myTmpPath = defaultTmpPath; - if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { - // problem: someone else (?) already created the default tmp path for poi, but we cannot use it: it's readonly for us. - // solution: create our own tmp directory, taking property "org.ohdsi.whiterabbit.poi.tmpdir" into account if set. - String poiFilesDirProperty = System.getProperty("org.ohdsi.whiterabbit.poi.tmpdir"); - if (!StringUtils.isEmpty(poiFilesDirProperty)) { - // use what the user configured. - myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDirProperty, UUID.randomUUID().toString()); - } - else { - // avoid the poifiles directory entirely - myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), UUID.randomUUID().toString()); - } - File tmpDir = Files.createDirectories(myTmpPath).toFile(); - org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(tmpDir)); + Path myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + String userConfiguredPoiTmpDir = getUserConfiguredPoiTmpDir(); + if (!StringUtils.isEmpty(userConfiguredPoiTmpDir)) { + myTmpPath = Paths.get(userConfiguredPoiTmpDir); + } + Files.createDirectories(myTmpPath); + if (isNotWritable(myTmpPath)) { + // avoid the poi files directory entirely + myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), UUID.randomUUID().toString()); + Files.createDirectories(myTmpPath); + org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(myTmpPath.toFile())); + } + + if (isNotWritable(myTmpPath)){ + String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", myTmpPath.toFile().getAbsolutePath()); + System.out.println(message); + throw new RemoteException(message); } return myTmpPath.toFile().getAbsolutePath(); } + private static String getUserConfiguredPoiTmpDir() { + // search for a user configured dir for poi tmp files. Env.var. overrules Java property. + String userConfiguredDir = System.getenv(POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME); + if (StringUtils.isEmpty(userConfiguredDir)) { + userConfiguredDir = System.getProperty(POI_TMP_DIR_PROPERTY_NAME); + } + return userConfiguredDir; + } + + private static boolean isNotWritable(Path path) { + final Path testFile = path.resolve("test.txt"); + if (Files.exists(path) && Files.isDirectory(path)) { + try { + Files.createFile(testFile); + Files.delete(testFile); + } catch (IOException e) { + return true; + } + return false; + } + return true; + } + private void processDatabase(DbSettings dbSettings) { // GBQ requires database. Put database value into domain var if (dbSettings.dbType == DbType.BIGQUERY) { @@ -530,11 +558,11 @@ else if (dbType == DbType.MSSQL || dbType == DbType.PDW) { trimmedDatabase = database.substring(1, database.length() - 1); String[] parts = table.split("\\."); query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts[0] + - "' AND TABLE_NAME='" + parts[1] + "';"; + "' AND TABLE_NAME='" + parts[1] + "';"; } else if (dbType == DbType.AZURE) { String[] parts = table.split("\\."); query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts[0] + - "' AND TABLE_NAME='" + parts[1] + "';"; + "' AND TABLE_NAME='" + parts[1] + "';"; } else if (dbType == DbType.MYSQL) query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table + "';"; @@ -544,8 +572,7 @@ else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) else if (dbType == DbType.TERADATA) { query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database.toLowerCase() + "' AND TableName = '" + table.toLowerCase() + "';"; - } - else if (dbType == DbType.BIGQUERY) { + } else if (dbType == DbType.BIGQUERY) { query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \"" + table + "\";"; } diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java index e01840e2..efef9cbf 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java @@ -19,6 +19,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -66,7 +67,7 @@ class TestSourceDataScan { } } - void testProcess(@TempDir Path tempDir) throws IOException { + void testProcess(Path tempDir) throws IOException { Path outFile = tempDir.resolve("scanresult.xslx"); SourceDataScan sourceDataScan = new SourceDataScan(); DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); @@ -76,7 +77,7 @@ void testProcess(@TempDir Path tempDir) throws IOException { } @Test - void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException { + void testApachePoiTmpFileProblemWithAutomaticResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException { // intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293 /* @@ -87,25 +88,26 @@ void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException { * causing a crash when the Apacho poi library attemps to create the xslx file. * * The class SourceDataScan has been extended with a static method, called implicitly once through a static{} - * block, to create a TempDir strategy to create a - * unique directory for each instance/run of WhiteRabbit. This effectively solves the assumes error - * situation. + * block, to create a TempDir strategy that will create a unique directory for each instance/run of WhiteRabbit. + * This effectively solves the assumed error situation. * * This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a * read-only state after the first scan, and then confirming that a second scan fails. After that, * a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(), * and a new scan now runs successfully. */ + + // Make sure the scenarios are tested without a user configured tmp dir, so set environment variable and + // system property to an empty value + System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, ""); + updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, ""); // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal final String poiFilesDir = "poifiles"; Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); - // attempt to resolve an already existing unworkable situation where the default tmp dir for poi files is readonly - if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { - assertTrue(defaultTmpPath.toFile().setWritable(true), - String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable", - defaultTmpPath.toFile().getAbsolutePath())); - } + // attempt to resolve an already existing unworkable situation where the default tmp dir for poi files + // exists, and is possibly readonly + cleanTmpDir(defaultTmpPath); // process should pass without problem, and afterwards the default tmp dir should exist testProcess(tempDir); @@ -121,32 +123,46 @@ void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException { // invoke the static method to set a new tmp dir, process again (should succeed) and verify that // the new tmpdir is indeed different from the default String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi(); - testProcess(tempDir); + testProcess(Paths.get(myTmpDir)); assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir); // we might have left behind an unworkable situation; attempt to solve that if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { assertTrue(defaultTmpPath.toFile().setWritable(true)); } - } - private boolean expectRowNIsLike(int n, List rows, String... expectedValues) { - assert expectedValues.length == 6; - testColumnValue(n, rows.get(n), "Table", expectedValues[0]); - testColumnValue(n, rows.get(n), "Field", expectedValues[1]); - testColumnValue(n, rows.get(n), "Description", expectedValues[2]); - testColumnValue(n, rows.get(n), "Type", expectedValues[3]); - testColumnValue(n, rows.get(n), "Max length", expectedValues[4]); - testColumnValue(n, rows.get(n), "N rows", expectedValues[5]); - return true; + @Test + void testApachePoiTmpFileProblemWithUserConfiguredResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException { + // 1. Verify that the poi tmp dir property is used, if set + Path tmpDirFromProperty = tempDir.resolve("setByProperty"); + System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, tmpDirFromProperty.toFile().getAbsolutePath()); + cleanTmpDir(tmpDirFromProperty); + + SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the property + testProcess(tmpDirFromProperty); + assertTrue(Files.exists(tmpDirFromProperty)); + + cleanTmpDir(tmpDirFromProperty); + + // 2. Verify that the poi tmp dir environment variable is used, if set, and overrules the property set above + Path tmpDirFromEnvironmentVariable = tempDir.resolve("setByEnvVar"); + updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, tmpDirFromEnvironmentVariable.toFile().getAbsolutePath()); + cleanTmpDir(tmpDirFromEnvironmentVariable); + + SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the env. var. + testProcess(tmpDirFromEnvironmentVariable); + assertFalse(Files.exists(tmpDirFromProperty)); + assertTrue(Files.exists(tmpDirFromEnvironmentVariable)); + cleanTmpDir(tmpDirFromEnvironmentVariable); } - private void testColumnValue(int i, Row row, String fieldName, String expected) { - if (!expected.equals(row.get(fieldName))) { - fail(String.format("In row %d, value '%s' was expected for column '%s', but '%s' was found", - i, expected, fieldName, row.get(fieldName))); - } + @SuppressWarnings({ "unchecked" }) + private static void updateEnv(String name, String val) throws ReflectiveOperationException { + Map env = System.getenv(); + Field field = env.getClass().getDeclaredField("m"); + field.setAccessible(true); + ((Map) field.get(env)).put(name, val); } private List getTableNames(DbSettings dbSettings) { try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { @@ -154,20 +170,17 @@ private List getTableNames(DbSettings dbSettings) { } } - private DbSettings getTestDbSettings() { - DbSettings dbSettings = new DbSettings(); - dbSettings.dbType = DbType.POSTGRESQL; - dbSettings.sourceType = DbSettings.SourceType.DATABASE; - dbSettings.server = postgreSQL.getJdbcUrl(); - dbSettings.database = "public"; // yes, really - dbSettings.user = postgreSQL.getUsername(); - dbSettings.password = postgreSQL.getPassword(); - dbSettings.tables = getTableNames(dbSettings); - - return dbSettings; + private static void cleanTmpDir(Path path) { + if (Files.exists(path)) { + if (!Files.isWritable(path)) { + assertTrue(path.toFile().setWritable(true), + String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable", + path.toFile().getAbsolutePath())); + } + assertTrue(deleteDir(path.toFile())); + } } - - private boolean deleteDir(File file) { + private static boolean deleteDir(File file) { if (Files.exists(file.toPath())) { File[] contents = file.listFiles(); if (contents != null) { From e762fa67379b8e8c549b1d4d8ac65f3c072f5a74 Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Wed, 10 May 2023 13:48:21 +0200 Subject: [PATCH 5/8] Cleanup, review tests, avoid repeating string constant(s) --- .../ohdsi/whiteRabbit/WhiteRabbitMain.java | 6 +-- .../whiteRabbit/scan/SourceDataScan.java | 52 +++++++++++++------ .../whiterabbit/scan/TestSourceDataScan.java | 29 ++++++----- 3 files changed, 56 insertions(+), 31 deletions(-) diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java index b0a44fd8..13b34149 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java @@ -265,7 +265,7 @@ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("BigQuery")) { sourceDataScan.setMaxValues(maxValues); sourceDataScan.setCalculateNumericStats(calculateNumericStats); sourceDataScan.setNumStatsSamplerSize(numericStatsSamplerSize); - sourceDataScan.process(dbSettings, iniFile.get("WORKING_FOLDER") + "/ScanReport.xlsx"); + sourceDataScan.process(dbSettings, iniFile.get("WORKING_FOLDER") + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME); } private JComponent createTabsPanel() { @@ -543,7 +543,7 @@ private JPanel createFakeDataPanel() { folderPanel.setLayout(new BoxLayout(folderPanel, BoxLayout.X_AXIS)); folderPanel.setBorder(BorderFactory.createTitledBorder("Scan report file")); scanReportFileField = new JTextField(); - scanReportFileField.setText((new File("ScanReport.xlsx").getAbsolutePath())); + scanReportFileField.setText((new File(SourceDataScan.SCAN_REPORT_FILE_NAME).getAbsolutePath())); scanReportFileField.setToolTipText("The path to the scan report that will be used as a template to generate the fake data"); folderPanel.add(scanReportFileField); JButton pickButton = new JButton("Pick file"); @@ -1051,7 +1051,7 @@ public void run() { table = folderField.getText() + "/" + table; dbSettings.tables.add(table); } - sourceDataScan.process(dbSettings, folderField.getText() + "/ScanReport.xlsx"); + sourceDataScan.process(dbSettings, folderField.getText() + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME); } } catch (Exception e) { handleError(e); diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java index d82dfa29..55cefff1 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java @@ -60,6 +60,8 @@ public class SourceDataScan { public static int N_FOR_FREE_TEXT_CHECK = 1000; public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100; + public final static String SCAN_REPORT_FILE_NAME = "ScanReport.xlsx"; + public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR"; public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir"; @@ -142,28 +144,46 @@ public void process(DbSettings dbSettings, String outputFileName) { * being created on a multi-user system by a user with a too restrictive file mask. */ public static String setUniqueTempDirStrategyForApachePoi() throws IOException { - // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal - final String poiFilesDir = "poifiles"; - Path myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + Path myTmpDir = getDefaultPoiTmpPath(FileUtils.getTempDirectory().toPath()); String userConfiguredPoiTmpDir = getUserConfiguredPoiTmpDir(); if (!StringUtils.isEmpty(userConfiguredPoiTmpDir)) { - myTmpPath = Paths.get(userConfiguredPoiTmpDir); + myTmpDir = setupTmpDir(Paths.get(userConfiguredPoiTmpDir)); + } else { + if (isNotWritable(myTmpDir)) { + // avoid the poi files directory entirely by creating a separate directory in the standard tmp dir + myTmpDir = setupTmpDir(FileUtils.getTempDirectory().toPath()); + } } - Files.createDirectories(myTmpPath); - if (isNotWritable(myTmpPath)) { - // avoid the poi files directory entirely - myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), UUID.randomUUID().toString()); - Files.createDirectories(myTmpPath); - org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(myTmpPath.toFile())); + + String tmpDir = myTmpDir.toFile().getAbsolutePath(); + checkWritableTmpDir(tmpDir); + return tmpDir; + } + + public static Path getDefaultPoiTmpPath(Path tmpRoot) { + // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal + final String poiFilesDir = "poifiles"; // copied from poi implementation 4.x + return tmpRoot.resolve(poiFilesDir); + } + + private static Path setupTmpDir(Path tmpDir) { + checkWritableTmpDir(tmpDir.toFile().getAbsolutePath()); + Path myTmpDir = Paths.get(tmpDir.toFile().getAbsolutePath(), UUID.randomUUID().toString()); + try { + Files.createDirectory(myTmpDir); + org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(myTmpDir.toFile())); + } catch (IOException ioException) { + throw new RuntimeException(String.format("Exception while creating directory %s", myTmpDir), ioException); } + return myTmpDir; + } - if (isNotWritable(myTmpPath)){ - String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", myTmpPath.toFile().getAbsolutePath()); + private static void checkWritableTmpDir(String dir) { + if (isNotWritable(Paths.get(dir))) { + String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", dir); System.out.println(message); - throw new RemoteException(message); + throw new RuntimeException(message); } - - return myTmpPath.toFile().getAbsolutePath(); } private static String getUserConfiguredPoiTmpDir() { @@ -175,7 +195,7 @@ private static String getUserConfiguredPoiTmpDir() { return userConfiguredDir; } - private static boolean isNotWritable(Path path) { + public static boolean isNotWritable(Path path) { final Path testFile = path.resolve("test.txt"); if (Files.exists(path) && Files.isDirectory(path)) { try { diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java index efef9cbf..ecdfed92 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java @@ -68,7 +68,7 @@ class TestSourceDataScan { } void testProcess(Path tempDir) throws IOException { - Path outFile = tempDir.resolve("scanresult.xslx"); + Path outFile = tempDir.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME); SourceDataScan sourceDataScan = new SourceDataScan(); DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); @@ -101,24 +101,29 @@ void testApachePoiTmpFileProblemWithAutomaticResolution(@TempDir Path tempDir) t // system property to an empty value System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, ""); updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, ""); - // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal - final String poiFilesDir = "poifiles"; - Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir); + Path defaultTmpPath = SourceDataScan.getDefaultPoiTmpPath(tempDir); - // attempt to resolve an already existing unworkable situation where the default tmp dir for poi files - // exists, and is possibly readonly - cleanTmpDir(defaultTmpPath); + if (!Files.exists(defaultTmpPath)) { + Files.createDirectory(defaultTmpPath); + } else { + if (Files.exists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))) { + Files.delete(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME)); + } + } // process should pass without problem, and afterwards the default tmp dir should exist - testProcess(tempDir); + testProcess(defaultTmpPath); assertTrue(Files.exists(defaultTmpPath)); // provoke the problem situation. make the default tmp dir readonly, try to process again + assertTrue(Files.deleteIfExists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))); // or Apache Poi will happily reuse it assertTrue(defaultTmpPath.toFile().setReadOnly()); + System.out.println("defaultTmpPath: " + defaultTmpPath.toFile().getAbsolutePath()); RuntimeException thrown = assertThrows(RuntimeException.class, () -> { - testProcess(tempDir); + testProcess(defaultTmpPath); + System.out.println("Hold it!"); }); - assertTrue(thrown.getMessage().endsWith("Permission denied")); + assertTrue(thrown.getMessage().contains("Permission denied")); // invoke the static method to set a new tmp dir, process again (should succeed) and verify that // the new tmpdir is indeed different from the default @@ -137,7 +142,7 @@ void testApachePoiTmpFileProblemWithUserConfiguredResolution(@TempDir Path tempD // 1. Verify that the poi tmp dir property is used, if set Path tmpDirFromProperty = tempDir.resolve("setByProperty"); System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, tmpDirFromProperty.toFile().getAbsolutePath()); - cleanTmpDir(tmpDirFromProperty); + Files.createDirectories(tmpDirFromProperty); SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the property testProcess(tmpDirFromProperty); @@ -148,7 +153,7 @@ void testApachePoiTmpFileProblemWithUserConfiguredResolution(@TempDir Path tempD // 2. Verify that the poi tmp dir environment variable is used, if set, and overrules the property set above Path tmpDirFromEnvironmentVariable = tempDir.resolve("setByEnvVar"); updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, tmpDirFromEnvironmentVariable.toFile().getAbsolutePath()); - cleanTmpDir(tmpDirFromEnvironmentVariable); + Files.createDirectories(tmpDirFromEnvironmentVariable); SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the env. var. testProcess(tmpDirFromEnvironmentVariable); From bbfbb8ffe22084008ab6177c75063ecae087e6a3 Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Wed, 10 May 2023 13:48:56 +0200 Subject: [PATCH 6/8] Update documentation for Apachie poi tmp dir workaround --- docs/WhiteRabbit.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/WhiteRabbit.md b/docs/WhiteRabbit.md index a5b2bcf8..db877d20 100644 --- a/docs/WhiteRabbit.md +++ b/docs/WhiteRabbit.md @@ -45,6 +45,21 @@ To increase the memory (in this example to 2400m), either set the environment va To lower the memory, set one of these variables to e.g. `-Xmx600m`. If you have a 32-bit Java VM installed and problems persist, consider installing 64-bit Java. +### Temporary Directory for Apache POI +(This addresses [issue 293](https://github.com/OHDSI/WhiteRabbit/issues/293)) + +The Apache POI library is used for generated the report in Excel format. This library creates its own directory for +temporary files in the system temporary directory. It has been reported that this can cause problems in a multi-user +environment, when each user attempts to create this directory with too restrictive permissions (read-only for other +users). WhiteRabbit from version 0.10.9 tries to circumvent this automatically, but this workaround can fail due +to concurrency problems. If you want to prevent this from happening, you can set either the environment variable +```ORG_OHDSI_WHITERABBIT_POI_TMPDIR``` or the Java system property ```org.ohdsi.whiterabbit.poi.tmpdir``` when starting +WhiteRabbit (best would be to add this to the ```whiteRabbit``` or ```whiteRabbit.bat``` script). Please note that +this directory should exist before your start WhiteRabbit, and is writable by any user that may want to run +WhiteRabbit. Each user will then have a separate subdirectory, so that further conflicts should be avoided. +Also, WhiteRabbit now attempts to detect this situation before the scan starts, so that the scan is not started, and +the problem identified sooner. + ## Support All source code, descriptions and input/output examples are available on GitHub: From 9c8419429a1fd5d541dd3ff80ca972605ac5f15a Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Wed, 10 May 2023 14:18:23 +0200 Subject: [PATCH 7/8] Code cleanup --- .../java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java index ecdfed92..c36e99ad 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java @@ -118,11 +118,9 @@ void testApachePoiTmpFileProblemWithAutomaticResolution(@TempDir Path tempDir) t // provoke the problem situation. make the default tmp dir readonly, try to process again assertTrue(Files.deleteIfExists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))); // or Apache Poi will happily reuse it assertTrue(defaultTmpPath.toFile().setReadOnly()); - System.out.println("defaultTmpPath: " + defaultTmpPath.toFile().getAbsolutePath()); RuntimeException thrown = assertThrows(RuntimeException.class, () -> { - testProcess(defaultTmpPath); - System.out.println("Hold it!"); - }); + testProcess(defaultTmpPath); + }); assertTrue(thrown.getMessage().contains("Permission denied")); // invoke the static method to set a new tmp dir, process again (should succeed) and verify that From cf092246ceb2e5150873071fb0d658e1b2cb790b Mon Sep 17 00:00:00 2001 From: Jan Blom Date: Wed, 10 May 2023 14:18:47 +0200 Subject: [PATCH 8/8] Review documentation --- docs/WhiteRabbit.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/WhiteRabbit.md b/docs/WhiteRabbit.md index db877d20..8790f4f0 100644 --- a/docs/WhiteRabbit.md +++ b/docs/WhiteRabbit.md @@ -48,17 +48,19 @@ If you have a 32-bit Java VM installed and problems persist, consider installing ### Temporary Directory for Apache POI (This addresses [issue 293](https://github.com/OHDSI/WhiteRabbit/issues/293)) -The Apache POI library is used for generated the report in Excel format. This library creates its own directory for -temporary files in the system temporary directory. It has been reported that this can cause problems in a multi-user -environment, when each user attempts to create this directory with too restrictive permissions (read-only for other -users). WhiteRabbit from version 0.10.9 tries to circumvent this automatically, but this workaround can fail due -to concurrency problems. If you want to prevent this from happening, you can set either the environment variable -```ORG_OHDSI_WHITERABBIT_POI_TMPDIR``` or the Java system property ```org.ohdsi.whiterabbit.poi.tmpdir``` when starting -WhiteRabbit (best would be to add this to the ```whiteRabbit``` or ```whiteRabbit.bat``` script). Please note that -this directory should exist before your start WhiteRabbit, and is writable by any user that may want to run -WhiteRabbit. Each user will then have a separate subdirectory, so that further conflicts should be avoided. -Also, WhiteRabbit now attempts to detect this situation before the scan starts, so that the scan is not started, and -the problem identified sooner. +The Apache POI library is used for generating the scan report in Excel format. This library creates its own directory for +temporary files in the system temporary directory. In [issue 293](https://github.com/OHDSI/WhiteRabbit/issues/293) it has +been reported that this can cause problems in a multi-user environment, when multiple user attempt to create this directory +with too restrictive permissions (read-only for other users). +WhiteRabbit from version 0.10.9 attempts to circumvent this automatically, but this workaround can fail due +to concurrency problems. If you want to prevent this from happening entirely , you can set either the environment variable +```ORG_OHDSI_WHITERABBIT_POI_TMPDIR``` or the Java system property ```org.ohdsi.whiterabbit.poi.tmpdir``` to a +temporary directory of your choice when starting WhiteRabbit (best would be to add this to the ```whiteRabbit``` or +```whiteRabbit.bat``` script). Please note that this directory should exist before your start WhiteRabbit, +and that it should be writable by any user that may want to run WhiteRabbit. +For each user a separate subdirectory will be created, so that permission related conflicts should be avoided. +Also, WhiteRabbit now attempts to detect this situation before the scan starts. If this is detected, +the scan is not started, and the problem identified before the scan, instead of afterwards. ## Support All source code, descriptions and input/output examples are available on GitHub: