diff --git a/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java b/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java index 6fed692a..8c15ade4 100644 --- a/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java +++ b/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java @@ -32,22 +32,26 @@ public class ETLTestFrameWorkGenerator { public static String[] keywords = new String[] { "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUTHORIZATION", "BACKUP", "BEGIN", "BETWEEN", - "BREAK", "BROWSE", "BULK", "BY", "CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED", "COALESCE", "COLLATE", "COLUMN", "COMMIT", - "COMPUTE", "CONSTRAINT", "CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME", - "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", "DBCC", "DEALLOCATE", "DECLARE", "DEFAULT", "DELETE", "DENY", "DESC", "DISK", - "DISTINCT", "DISTRIBUTED", "DOUBLE", "DROP", "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT", "EXEC", "EXECUTE", "EXISTS", "EXIT", "EXTERNAL", - "FETCH", "FILE", "FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM", "FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING", - "HOLDLOCK", "IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX", "INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL", - "LEFT", "LIKE", "LINENO", "LOAD", "MERGE", "NATIONAL", "NOCHECK", "NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", "ON", "OPEN", - "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML", "OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN", "PRECISION", - "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC", "RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES", "REPLICATION", "RESTORE", - "RESTRICT", "RETURN", "REVERT", "REVOKE", "RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE", "SCHEMA", "SECURITYAUDIT", "SELECT", - "SEMANTICKEYPHRASETABLE", "SEMANTICSIMILARITYDETAILSTABLE", "SEMANTICSIMILARITYTABLE", "SESSION_USER", "SET", "SETUSER", "SHUTDOWN", "SOME", - "STATISTICS", "SYSTEM_USER", "TABLE", "TABLESAMPLE", "TEXTSIZE", "THEN", "TO", "TOP", "TRAN", "TRANSACTION", "TRIGGER", "TRUNCATE", "TRY_CONVERT", - "TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", "UPDATE", "UPDATETEXT", "USE", "USER", "VALUES", "VARYING", "VIEW", "WAITFOR", "WHEN", "WHERE", "WHILE", - "WITH", "WITHIN GROUP", "WRITETEXT" }; + "BREAK", "BROWSE", "BULK", "BY", "CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED", "COALESCE", "COLLATE", "COLUMN", "COMMIT", + "COMPUTE", "CONSTRAINT", "CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME", + "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", "DBCC", "DEALLOCATE", "DECLARE", "DEFAULT", "DELETE", "DENY", "DESC", "DISK", + "DISTINCT", "DISTRIBUTED", "DOUBLE", "DROP", "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT", "EXEC", "EXECUTE", "EXISTS", "EXIT", "EXTERNAL", + "FETCH", "FILE", "FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM", "FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING", + "HOLDLOCK", "IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX", "INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL", + "LEFT", "LIKE", "LINENO", "LOAD", "MERGE", "NATIONAL", "NOCHECK", "NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", "ON", "OPEN", + "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML", "OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN", "PRECISION", + "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC", "RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES", "REPLICATION", "RESTORE", + "RESTRICT", "RETURN", "REVERT", "REVOKE", "RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE", "SCHEMA", "SECURITYAUDIT", "SELECT", + "SEMANTICKEYPHRASETABLE", "SEMANTICSIMILARITYDETAILSTABLE", "SEMANTICSIMILARITYTABLE", "SESSION_USER", "SET", "SETUSER", "SHUTDOWN", "SOME", + "STATISTICS", "SYSTEM_USER", "TABLE", "TABLESAMPLE", "TEXTSIZE", "THEN", "TO", "TOP", "TRAN", "TRANSACTION", "TRIGGER", "TRUNCATE", "TRY_CONVERT", + "TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", "UPDATE", "UPDATETEXT", "USE", "USER", "VALUES", "VARYING", "VIEW", "WAITFOR", "WHEN", "WHERE", "WHILE", + "WITH", "WITHIN GROUP", "WRITETEXT" }; private static Set keywordSet; + private static int DEFAULT = 0; + private static int NEGATE = 1; + private static int COUNT = 2; + public static void generate(ETL etl, String filename) { keywordSet = new HashSet(); @@ -64,9 +68,12 @@ private static List generateRScript(ETL etl) { List r = new ArrayList(); createInitFunction(r, etl.getSourceDatabase()); createDeclareTestFunction(r); + createSetDefaultFunctions(r, etl.getSourceDatabase()); + createGetDefaultFunctions(r, etl.getSourceDatabase()); createAddFunctions(r, etl.getSourceDatabase()); - createExpectFunctions(r, false, etl.getTargetDatabase()); - createExpectFunctions(r, true, etl.getTargetDatabase()); + createExpectFunctions(r, DEFAULT, etl.getTargetDatabase()); + createExpectFunctions(r, NEGATE, etl.getTargetDatabase()); + createExpectFunctions(r, COUNT, etl.getTargetDatabase()); return r; } @@ -81,7 +88,7 @@ private static void createDeclareTestFunction(List r) { r.add(""); } - private static void createExpectFunctions(List r, boolean negation, Database database) { + private static void createExpectFunctions(List r, int type, Database database) { for (Table table : database.getTables()) { StringBuilder line = new StringBuilder(); String rTableName = convertToRName(table.getName()); @@ -107,10 +114,12 @@ private static void createExpectFunctions(List r, boolean negation, Data testDefs.add(""); } - if (negation) + if (type == DEFAULT) + line.append("expect_" + rTableName + " <- function("); + else if (type == NEGATE) line.append("expect_no_" + rTableName + " <- function("); else - line.append("expect_" + rTableName + " <- function("); + line.append("expect_count_" + rTableName + " <- function(rowCount, "); line.append(StringUtilities.join(argDefs, ", ")); line.append(") {"); @@ -128,10 +137,12 @@ private static void createExpectFunctions(List r, boolean negation, Data r.addAll(testDefs); - if (negation) + if (type == DEFAULT) + r.add(" statement <- paste0(statement, \") = 0 THEN 'FAIL' ELSE 'PASS' END AS status;\")"); + else if (type == NEGATE) r.add(" statement <- paste0(statement, \") != 0 THEN 'FAIL' ELSE 'PASS' END AS status;\")"); else - r.add(" statement <- paste0(statement, \") = 0 THEN 'FAIL' ELSE 'PASS' END AS status;\")"); + r.add(" statement <- paste0(statement, \") != \",rowCount ,\" THEN 'FAIL' ELSE 'PASS' END AS status;\")"); r.add(" assign(\"testSql\", c(get(\"testSql\", envir = globalenv()), statement), envir = globalenv())"); r.add(" invisible(statement)"); @@ -162,9 +173,28 @@ private static void createInitFunction(List r, Database database) { r.add(" testSql <- c(testSql, \"CREATE TABLE test_results (id INT, description VARCHAR(512), test VARCHAR(256), status VARCHAR(5));\")"); r.add(" testSql <- c(testSql, \"\")"); - r.add(" assign(\"testSql\", testSql, envir = globalenv()) "); - r.add(" assign(\"testId\", 1, envir = globalenv()) "); - r.add(" assign(\"testDescription\", \"\", envir = globalenv()) "); + r.add(" assign(\"testSql\", testSql, envir = globalenv())"); + r.add(" assign(\"testId\", 1, envir = globalenv())"); + r.add(" assign(\"testDescription\", \"\", envir = globalenv())"); + r.add(""); + r.add(" defaultValues <- new.env(parent = globalenv())"); + r.add(" assign(\"defaultValues\", defaultValues, envir = globalenv())"); + for (Table table : database.getTables()) { + String rTableName = convertToRName(table.getName()); + r.add(""); + r.add(" defaults <- list()"); + for (Field field : table.getFields()) { + String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_"); + String defaultValue; + if (field.getValueCounts().length == 0) + defaultValue = ""; + else + defaultValue = field.getValueCounts()[0][0]; + if (!defaultValue.equals("")) + r.add(" defaults$" + rFieldName + " <- \"" + defaultValue + "\""); + } + r.add(" assign(\"" + rTableName + "\", defaults, envir = defaultValues)"); + } r.add("}"); r.add(""); r.add("initFramework()"); @@ -181,16 +211,10 @@ private static void createAddFunctions(List r, Database database) { for (Field field : table.getFields()) { String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_"); String sqlFieldName = convertToSqlName(field.getName()); - String defaultValue; - if (field.getValueCounts().length == 0) - defaultValue = ""; - else - defaultValue = field.getValueCounts()[0][0]; - if (defaultValue.equals("")) - argDefs.add(rFieldName + " = NULL"); - else - argDefs.add(rFieldName + " = \"" + defaultValue + "\""); - + argDefs.add(rFieldName); + insertLines.add(" if (missing(" + rFieldName + ")) {"); + insertLines.add(" " + rFieldName + " <- defaults$" + rFieldName); + insertLines.add(" }"); insertLines.add(" if (!is.null(" + rFieldName + ")) {"); insertLines.add(" insertFields <- c(insertFields, \"" + sqlFieldName + "\")"); insertLines.add(" insertValues <- c(insertValues, " + rFieldName + ")"); @@ -202,7 +226,7 @@ private static void createAddFunctions(List r, Database database) { line.append(StringUtilities.join(argDefs, ", ")); line.append(") {"); r.add(line.toString()); - + r.add(" defaults <- get(\"" + rTableName + "\", envir = defaultValues)"); r.add(" insertFields <- c()"); r.add(" insertValues <- c()"); r.addAll(insertLines); @@ -221,6 +245,47 @@ private static void createAddFunctions(List r, Database database) { r.add(""); } } + + private static void createSetDefaultFunctions(List r, Database database) { + for (Table table : database.getTables()) { + StringBuilder line = new StringBuilder(); + String rTableName = convertToRName(table.getName()); + List argDefs = new ArrayList(); + List insertLines = new ArrayList(); + for (Field field : table.getFields()) { + String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_"); + argDefs.add(rFieldName); + insertLines.add(" if (!missing(" + rFieldName + ")) {"); + insertLines.add(" defaults$" + rFieldName + " <- " + rFieldName); + insertLines.add(" }"); + } + + line.append("set_defaults_" + rTableName + " <- function("); + line.append(StringUtilities.join(argDefs, ", ")); + line.append(") {"); + r.add(line.toString()); + r.add(" defaults <- get(\"" + rTableName + "\", envir = defaultValues)"); + r.addAll(insertLines); + + + + r.add(" assign(\"" + rTableName + "\", defaults, envir = defaultValues)"); + r.add(" invisible(defaults)"); + r.add("}"); + r.add(""); + } + } + + private static void createGetDefaultFunctions(List r, Database database) { + for (Table table : database.getTables()) { + String rTableName = convertToRName(table.getName()); + r.add("get_defaults_" + rTableName + " <- function() {"); + r.add(" defaults <- get(\"" + rTableName + "\", envir = defaultValues)"); + r.add(" return(defaults)"); + r.add("}"); + r.add(""); + } + } private static String convertToRName(String name) { name = name.replaceAll(" ", "_").replaceAll("-", "_");