diff --git a/.classpath b/.classpath
index 870518be..ebb13e4d 100644
--- a/.classpath
+++ b/.classpath
@@ -1,28 +1,26 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README.md b/README.md
index 2b83f78f..defc9b98 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@ Features
- Can scan databases in SQL Server, Oracle, PostgreSQL, MySQL, MS Access, Amazon RedShift, and CSV files
- The scan report contains information on tables, fields, and frequency distributions of values
- Cutoff on the minimum frequency of values to protect patient privacy
+- WhiteRabbit can be run with a graphical user interface or from the command prompt
- Interactive tool (Rabbit in a Hat) for designing the ETL using the scan report as basis
- Rabbit in a Hat generates ETL specification document according to OMOP template
@@ -49,6 +50,8 @@ WhiteRabbit
2. Unzip the download
3. Double-click on WhiteRabbit.jar to start White Rabbit.
+(See the [Wiki](http://www.ohdsi.org/web/wiki/doku.php?id=documentation:software:whiterabbit#running_from_the_command_line) for details on how to run from the command prompt instead)
+
Rabbit-In-A-Hat
1. Using the files downloaded for WhiteRabbit, double-click on RabbitInAHat.jar to start Rabbit-In-A-Hat.
@@ -66,5 +69,7 @@ WhiteRabbit is licensed under Apache License 2.0
Development
===========
White Rabbit and Rabbit in a Hat are being developed in Eclipse. Contributions are welcome.
-###Development status
-Beta testing - We invite everyone to try the software, and report any issues they may find.
+
+### Development status
+
+Production. This program is being used by many people.
diff --git a/build.xml b/build.xml
index 4fc9938c..e2f614b1 100644
--- a/build.xml
+++ b/build.xml
@@ -22,7 +22,7 @@
-
+
@@ -30,7 +30,7 @@
-
+
@@ -40,8 +40,7 @@
-
-
+
@@ -51,7 +50,6 @@
-
diff --git a/iniFileExamples/WhiteRabbit.ini b/iniFileExamples/WhiteRabbit.ini
new file mode 100644
index 00000000..64c07618
--- /dev/null
+++ b/iniFileExamples/WhiteRabbit.ini
@@ -0,0 +1,12 @@
+WORKING_FOLDER = /users/joe # Path to the folder where all output will be written
+DATA_TYPE = PostgreSQL # "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", or "Redshift"
+SERVER_LOCATION = 127.0.0.1/data_base_name # Name or address of the server. For Postgres, add the database name
+USER_NAME = joe # User name for the database
+PASSWORD = supersecret # Password for the database
+DATABASE_NAME = schema_name # Name of the data schema used
+DELIMITER = , # The delimiter that separates values
+TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database
+SCAN_FIELD_VALUES = yes # Include a frequency count of field values in the scan report? "yes" or "no"
+MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report
+MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported
+ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values
diff --git a/lib/jtds-1.3.1-v20140512.jar b/lib/jtds-1.3.1-v20140512.jar
deleted file mode 100644
index 94a66872..00000000
Binary files a/lib/jtds-1.3.1-v20140512.jar and /dev/null differ
diff --git a/lib/ojdbc5.jar b/lib/ojdbc5.jar
deleted file mode 100644
index e85e3ea3..00000000
Binary files a/lib/ojdbc5.jar and /dev/null differ
diff --git a/lib/ojdbc6.jar b/lib/ojdbc6.jar
deleted file mode 100644
index b663cd24..00000000
Binary files a/lib/ojdbc6.jar and /dev/null differ
diff --git a/lib/ojdbc7.jar b/lib/ojdbc7.jar
new file mode 100644
index 00000000..fd38a6c4
Binary files /dev/null and b/lib/ojdbc7.jar differ
diff --git a/src/org/ohdsi/databases/DbType.java b/src/org/ohdsi/databases/DbType.java
index 1ec447f7..2f56b641 100644
--- a/src/org/ohdsi/databases/DbType.java
+++ b/src/org/ohdsi/databases/DbType.java
@@ -20,21 +20,22 @@
public class DbType {
public static DbType MYSQL = new DbType("mysql");
public static DbType MSSQL = new DbType("mssql");
+ public static DbType PDW = new DbType("pdw");
public static DbType ORACLE = new DbType("oracle");
public static DbType POSTGRESQL = new DbType("postgresql");
public static DbType MSACCESS = new DbType("msaccess");
- public static DbType REDSHIFT = new DbType("redshift");
-
+ public static DbType REDSHIFT = new DbType("redshift");
+
private enum Type {
- MYSQL, MSSQL, ORACLE, POSTGRESQL, MSACCESS, REDSHIFT
+ MYSQL, MSSQL, PDW, ORACLE, POSTGRESQL, MSACCESS, REDSHIFT
};
-
- private Type type;
-
+
+ private Type type;
+
public DbType(String type) {
this.type = Type.valueOf(type.toUpperCase());
}
-
+
public boolean equals(Object other) {
if (other instanceof DbType && ((DbType) other).type == type)
return true;
diff --git a/src/org/ohdsi/databases/RichConnection.java b/src/org/ohdsi/databases/RichConnection.java
index 3fcede20..de412ced 100644
--- a/src/org/ohdsi/databases/RichConnection.java
+++ b/src/org/ohdsi/databases/RichConnection.java
@@ -143,7 +143,7 @@ public List getTableNames(String database) {
String query = null;
if (dbType == DbType.MYSQL) {
query = "SHOW TABLES IN " + database;
- } else if (dbType == DbType.MSSQL) {
+ } else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
query = "SELECT name FROM " + database + ".sys.tables ORDER BY name";
} else if (dbType == DbType.ORACLE) {
query = "SELECT table_name FROM all_tables WHERE owner='" + database.toUpperCase() + "'";
@@ -160,7 +160,7 @@ public List getTableNames(String database) {
public List getFieldNames(String table) {
List names = new ArrayList();
- if (dbType == DbType.MSSQL) {
+ if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
for (Row row : query("SELECT name FROM syscolumns WHERE id=OBJECT_ID('" + table + "')"))
names.add(row.get("name"));
} else if (dbType == DbType.MYSQL)
@@ -193,16 +193,14 @@ public ResultSet getMsAccessFieldNames(String table){
public long getTableSize(String tableName) {
QueryResult qr = null;
Long returnVal = null;
- if (dbType == DbType.MSSQL || dbType == DbType.MSACCESS)
+ if (dbType == DbType.MSSQL|| dbType == DbType.PDW)
+ qr = query("SELECT COUNT_BIG(*) FROM [" + tableName + "];");
+ else if (dbType == DbType.MSACCESS )
qr = query("SELECT COUNT(*) FROM [" + tableName + "];");
- //return Long.parseLong(query("SELECT COUNT(*) FROM [" + tableName + "];").iterator().next().getCells().get(0));
else
qr = query("SELECT COUNT(*) FROM " + tableName + ";");
- // return Long.parseLong(query("SELECT COUNT(*) FROM " + tableName + ";").iterator().next().getCells().get(0));
-
- // Obtain the value and close the connection
try {
- returnVal = Long.parseLong(query("SELECT COUNT(*) FROM " + tableName + ";").iterator().next().getCells().get(0));
+ returnVal = Long.parseLong(qr.iterator().next().getCells().get(0));
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
@@ -415,7 +413,7 @@ else if (maxLength > 255)
return columnNameToSqlName(name) + " text";
else
return columnNameToSqlName(name) + " varchar(255)";
- } else if (dbType == DbType.MSSQL) {
+ } else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
if (isNumeric) {
if (maxLength < 10)
return columnNameToSqlName(name) + " int";
diff --git a/src/org/ohdsi/rabbitInAHat/ETLPackageTestFrameWorkGenerator.java b/src/org/ohdsi/rabbitInAHat/ETLPackageTestFrameWorkGenerator.java
new file mode 100644
index 00000000..3459c449
--- /dev/null
+++ b/src/org/ohdsi/rabbitInAHat/ETLPackageTestFrameWorkGenerator.java
@@ -0,0 +1,377 @@
+/*******************************************************************************
+ * Copyright 2016 Observational Health Data Sciences and Informatics
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.rabbitInAHat;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.ohdsi.rabbitInAHat.dataModel.Database;
+import org.ohdsi.rabbitInAHat.dataModel.ETL;
+import org.ohdsi.rabbitInAHat.dataModel.Field;
+import org.ohdsi.rabbitInAHat.dataModel.Table;
+import org.ohdsi.utilities.StringUtilities;
+import org.ohdsi.utilities.files.WriteTextFile;
+
+public class ETLPackageTestFrameWorkGenerator {
+
+ public static String[] keywords = new String[] { "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUTHORIZATION", "BACKUP", "BEGIN", "BETWEEN",
+ "BREAK", "BROWSE", "BULK", "BY", "CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED", "COALESCE", "COLLATE", "COLUMN", "COMMIT",
+ "COMPUTE", "CONSTRAINT", "CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME",
+ "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", "DBCC", "DEALLOCATE", "DECLARE", "DEFAULT", "DELETE", "DENY", "DESC", "DISK",
+ "DISTINCT", "DISTRIBUTED", "DOUBLE", "DROP", "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT", "EXEC", "EXECUTE", "EXISTS", "EXIT", "EXTERNAL",
+ "FETCH", "FILE", "FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM", "FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING",
+ "HOLDLOCK", "IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX", "INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL",
+ "LEFT", "LIKE", "LINENO", "LOAD", "MERGE", "NATIONAL", "NOCHECK", "NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", "ON", "OPEN",
+ "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML", "OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN", "PRECISION",
+ "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC", "RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES", "REPLICATION", "RESTORE",
+ "RESTRICT", "RETURN", "REVERT", "REVOKE", "RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE", "SCHEMA", "SECURITYAUDIT", "SELECT",
+ "SEMANTICKEYPHRASETABLE", "SEMANTICSIMILARITYDETAILSTABLE", "SEMANTICSIMILARITYTABLE", "SESSION_USER", "SET", "SETUSER", "SHUTDOWN", "SOME",
+ "STATISTICS", "SYSTEM_USER", "TABLE", "TABLESAMPLE", "TEXTSIZE", "THEN", "TO", "TOP", "TRAN", "TRANSACTION", "TRIGGER", "TRUNCATE", "TRY_CONVERT",
+ "TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", "UPDATE", "UPDATETEXT", "USE", "USER", "VALUES", "VARYING", "VIEW", "WAITFOR", "WHEN", "WHERE", "WHILE",
+ "WITH", "WITHIN GROUP", "WRITETEXT" };
+
+ private static Set keywordSet;
+ private static int DEFAULT = 0;
+ private static int NEGATE = 1;
+ private static int COUNT = 2;
+
+
+ public static void generate(ETL etl, String filename) {
+ keywordSet = new HashSet();
+ for (String keyword : keywords)
+ keywordSet.add(keyword);
+ List r = generateRScript(etl);
+ WriteTextFile out = new WriteTextFile(filename);
+ for (String line : r)
+ out.writeln(line);
+ out.close();
+ }
+
+ private static List generateRScript(ETL etl) {
+ List r = new ArrayList();
+ createInitFunction(r, etl.getSourceDatabase());
+ createDeclareTestFunction(r);
+ createSetDefaultFunctions(r, etl.getSourceDatabase());
+ createGetDefaultFunctions(r, etl.getSourceDatabase());
+ createAddFunctions(r, etl.getSourceDatabase());
+ createExpectFunctions(r, DEFAULT, etl.getTargetDatabase());
+ createExpectFunctions(r, NEGATE, etl.getTargetDatabase());
+ createExpectFunctions(r, COUNT, etl.getTargetDatabase());
+ createLookupFunctions(r, etl.getTargetDatabase());
+
+ return r;
+ }
+
+ private static void createDeclareTestFunction(List r) {
+ r.add("declareTestGroup <- function(groupName) {");
+ r.add(" frameworkContext$groupIndex <- frameworkContext$groupIndex + 1 ;");
+ r.add(" frameworkContext$currentGroup <- {}");
+ r.add("");
+ r.add(" frameworkContext$currentGroup$groupName <- groupName;");
+ r.add(" frameworkContext$currentGroup$groupItemIndex <- -1;");
+ r.add(" sql <- c(\"\",paste0(\"-- \", frameworkContext$groupIndex, \". \", groupName));");
+ r.add(" frameworkContext$insertSql = c(frameworkContext$insertSql, sql);");
+ r.add(" frameworkContext$testSql = c(frameworkContext$testSql, sql);");
+ r.add("}");
+ r.add("");
+ r.add("declareTest <- function(description, source_pid = NULL, cdm_pid = NULL) {");
+ r.add(" frameworkContext$testId = frameworkContext$testId + 1;");
+ r.add(" frameworkContext$testDescription = description;");
+ r.add(" frameworkContext$patient$source_pid = source_pid;");
+ r.add(" frameworkContext$patient$cdm_pid = cdm_pid;");
+ r.add(" if (is.null(frameworkContext$currentGroup)) { ");
+ r.add(" sql <- c(paste0(\"-- Test \", frameworkContext$testId, \": \", frameworkContext$testDescription));");
+ r.add(" } else {");
+ r.add(" frameworkContext$currentGroup$groupItemIndex = frameworkContext$currentGroup$groupItemIndex + 1;");
+ r.add(" sql <- c(paste0(\"-- \", frameworkContext$groupIndex, \".\", frameworkContext$currentGroup$groupItemIndex, \" \", frameworkContext$testDescription, \" [Test ID: \", frameworkContext$testId, \"]\"));");
+ r.add(" }");
+ r.add(" frameworkContext$insertSql = c(frameworkContext$insertSql, \"--\",sql,\"--\");");
+ r.add(" frameworkContext$testSql = c(frameworkContext$testSql, \"--\",sql,\"--\");");
+ r.add("}");
+ r.add("");
+ }
+
+ private static void createExpectFunctions(List r, int type, Database database) {
+ for (Table table : database.getTables()) {
+ StringBuilder line = new StringBuilder();
+ String rTableName = convertToRName(table.getName());
+ String sqlTableName = convertToSqlName(table.getName());
+ List argDefs = new ArrayList();
+ List testDefs = new ArrayList();
+ for (Field field : table.getFields()) {
+ String rFieldName = convertToRName(field.getName());
+ String sqlFieldName = convertToSqlName(field.getName());
+ argDefs.add(rFieldName);
+ testDefs.add(" if (!missing(" + rFieldName + ")) {");
+ testDefs.add(" if (is.null(" + rFieldName + ")) {");
+ testDefs.add(" whereClauses <- c(whereClauses, \"" + sqlFieldName + " IS NULL\")");
+ testDefs.add(" } else if (is(" + rFieldName + ", \"subQuery\")){");
+ testDefs.add(" whereClauses <- c(whereClauses, paste0(\"" + sqlFieldName + " = (\", as.character(" + rFieldName + "), \")\"))");
+ testDefs.add(" } else {");
+ testDefs.add(" whereClauses <- c(whereClauses, paste0(\"" + sqlFieldName + " = '\", " + rFieldName + ",\"'\"))");
+ testDefs.add(" }");
+ testDefs.add(" }");
+ testDefs.add("");
+ }
+
+ if (type == DEFAULT)
+ line.append("expect_" + rTableName + " <- function(");
+ else if (type == NEGATE)
+ line.append("expect_no_" + rTableName + " <- function(");
+ else
+ line.append("expect_count_" + rTableName + " <- function(rowCount, ");
+
+ line.append(StringUtilities.join(argDefs, ", "));
+ line.append(") {");
+ r.add(line.toString());
+
+ r.add("");
+ r.add(" if (is.null(frameworkContext$currentGroup)) {");
+ r.add(" testName <- frameworkContext$testDescription;");
+ r.add(" } else {");
+ r.add(" testName <- paste0(frameworkContext$groupIndex, \".\", frameworkContext$currentGroup$groupItemIndex, \" \", frameworkContext$testDescription);");
+ r.add(" }");
+ r.add("");
+ r.add(" source_pid <- frameworkContext$patient$source_pid;");
+ r.add(" if (is.null(source_pid)) {");
+ r.add(" source_pid <- \"NULL\";");
+ r.add(" } else {");
+ r.add(" source_pid <- paste0(\"'\", as.character(source_pid), \"'\");");
+ r.add(" }");
+ r.add("");
+ r.add(" cdm_pid <- frameworkContext$patient$cdm_pid;");
+ r.add(" if (is.null(cdm_pid)) {");
+ r.add(" cdm_pid <- \"NULL\"");
+ r.add(" }");
+ r.add("");
+ line = new StringBuilder();
+ line.append(" statement <- paste0(\"INSERT INTO @cdm_schema.test_results (id, description, test, source_pid, cdm_pid, status) SELECT ");
+ line.append("\", frameworkContext$testId, \" AS id, ");
+ line.append("'\", testName, \"' AS description, ");
+ line.append("'Expect " + table.getName() + "' AS test, ");
+ line.append("\", source_pid, \" as source_pid, ");
+ line.append("\", cdm_pid, \" as cdm_pid, ");
+ line.append("CASE WHEN(SELECT COUNT(*) FROM @cdm_schema." + sqlTableName + " WHERE \")");
+ r.add(line.toString());
+
+ r.add(" whereClauses = NULL;");
+
+ r.addAll(testDefs);
+
+ r.add(" statement <- paste0(statement, paste0(whereClauses, collapse=\" AND \"));");
+
+ if (type == DEFAULT)
+ r.add(" statement <- paste0(statement, \") = 0 THEN 'FAIL' ELSE 'PASS' END AS status;\")");
+ else if (type == NEGATE)
+ r.add(" statement <- paste0(statement, \") != 0 THEN 'FAIL' ELSE 'PASS' END AS status;\")");
+ else
+ r.add(" statement <- paste0(statement, \") != \",rowCount ,\" THEN 'FAIL' ELSE 'PASS' END AS status;\")");
+
+ r.add(" frameworkContext$testSql = c(frameworkContext$testSql, statement);");
+ r.add(" invisible(statement)");
+ r.add("}");
+ r.add("");
+ }
+ }
+
+ private static void createLookupFunctions(List r, Database database) {
+ for (Table table : database.getTables()) {
+ StringBuilder line = new StringBuilder();
+ String rTableName = convertToRName(table.getName());
+ String sqlTableName = convertToSqlName(table.getName());
+ List argDefs = new ArrayList();
+ List testDefs = new ArrayList();
+ for (Field field : table.getFields()) {
+ String rFieldName = convertToRName(field.getName());
+ String sqlFieldName = convertToSqlName(field.getName());
+ argDefs.add(rFieldName);
+ testDefs.add(" if (!missing(" + rFieldName + ")) {");
+ testDefs.add(" if (is.null(" + rFieldName + ")) {");
+ testDefs.add(" whereClauses <- c(whereClauses, \"" + sqlFieldName + " IS NULL\")");
+ testDefs.add(" } else {");
+ testDefs.add(" whereClauses <- c(whereClauses, paste0(\"" + sqlFieldName + " = '\", " + rFieldName + ",\"'\"))");
+ testDefs.add(" }");
+ testDefs.add(" }");
+ testDefs.add("");
+ }
+ line.append("lookup_" + rTableName + " <- function(fetchField, ");
+ line.append(StringUtilities.join(argDefs, ", "));
+ line.append(") {");
+ r.add(line.toString());
+
+ r.add(" whereClauses = NULL;");
+ line = new StringBuilder();
+ line.append(" statement <- paste0(\"SELECT \", fetchField , \" FROM @cdm_schema.");
+ line.append(sqlTableName);
+ line.append(" WHERE \")");
+ r.add(line.toString());
+ r.addAll(testDefs);
+ r.add(" statement <- paste0(statement, paste0(whereClauses, collapse=\" AND \"));");
+ r.add(" class(statement) <- \"subQuery\"");
+ r.add(" return(statement)");
+ r.add("}");
+ r.add("");
+ }
+ }
+
+ private static String convertToSqlName(String name) {
+ if (name.contains(" ") || name.contains(".") || keywordSet.contains(name.toUpperCase()))
+ name = "[" + name + "]";
+ return name;
+ }
+
+ private static void createInitFunction(List r, Database database) {
+ r.add("frameworkContext <- new.env(parent = emptyenv());");
+ r.add("initFramework <- function() {");
+ r.add(" frameworkContext$groupIndex <- 0;");
+ r.add(" insertSql <- c()");
+ for (Table table : database.getTables()) {
+ String sqlTableName = convertToSqlName(table.getName());
+ r.add(" insertSql <- c(insertSql, \"TRUNCATE TABLE @source_schema." + sqlTableName + ";\")");
+ }
+ r.add(" frameworkContext$insertSql <- insertSql;");
+
+ r.add(" testSql <- c()");
+ r.add(" testSql <- c(testSql, \"IF OBJECT_ID('@cdm_schema.test_results', 'U') IS NOT NULL\")");
+ r.add(" testSql <- c(testSql, \" DROP TABLE @cdm_schema.test_results;\")");
+ r.add(" testSql <- c(testSql, \"\")");
+ r.add(" testSql <- c(testSql, \"CREATE TABLE @cdm_schema.test_results (id INT, description VARCHAR(512), test VARCHAR(256), source_pid VARCHAR(50), cdm_pid int, status VARCHAR(5));\")");
+ r.add(" testSql <- c(testSql, \"\")");
+
+ r.add(" frameworkContext$testSql <- testSql;");
+ r.add(" frameworkContext$testId = 0;");
+ r.add(" frameworkContext$testDescription = \"\";");
+ r.add("");
+ r.add(" patient <- {}");
+ r.add(" patient$source_pid <- NULL");
+ r.add(" patient$cdm_pid <- NULL");
+ r.add(" frameworkContext$patient = patient;");
+ r.add("");
+ r.add(" frameworkContext$defaultValues =new.env(parent = emptyenv());");
+ for (Table table : database.getTables()) {
+ String rTableName = convertToRName(table.getName());
+ r.add("");
+ r.add(" defaults <- new.env(parent = emptyenv())");
+ for (Field field : table.getFields()) {
+ String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_");
+ String defaultValue;
+ if (field.getValueCounts().length == 0 || field.getValueCounts()[0][0].equalsIgnoreCase("List truncated..."))
+ defaultValue = "";
+ else
+ defaultValue = field.getValueCounts()[0][0];
+ if (!defaultValue.equals(""))
+ r.add(" defaults$" + rFieldName + " <- \"" + defaultValue + "\"");
+ }
+ r.add(" frameworkContext$defaultValues$" + rTableName + " = defaults;");
+ }
+ r.add("}");
+ r.add("");
+ }
+
+ private static void createAddFunctions(List r, Database database) {
+ for (Table table : database.getTables()) {
+ StringBuilder line = new StringBuilder();
+ String rTableName = convertToRName(table.getName());
+ String sqlTableName = convertToSqlName(table.getName());
+ List argDefs = new ArrayList();
+ List insertLines = new ArrayList();
+ for (Field field : table.getFields()) {
+ String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_");
+ String sqlFieldName = convertToSqlName(field.getName());
+ argDefs.add(rFieldName);
+ insertLines.add(" if (missing(" + rFieldName + ")) {");
+ insertLines.add(" " + rFieldName + " <- defaults$" + rFieldName);
+ insertLines.add(" }");
+ insertLines.add(" if (!is.null(" + rFieldName + ")) {");
+ insertLines.add(" insertFields <- c(insertFields, \"" + sqlFieldName + "\")");
+ insertLines.add(" insertValues <- c(insertValues, " + rFieldName + ")");
+ insertLines.add(" }");
+ insertLines.add("");
+ }
+
+ line.append("add_" + rTableName + " <- function(");
+ line.append(StringUtilities.join(argDefs, ", "));
+ line.append(") {");
+ r.add(line.toString());
+ r.add(" defaults <- frameworkContext$defaultValues$"+ rTableName + ";");
+ r.add(" insertFields <- c()");
+ r.add(" insertValues <- c()");
+ r.addAll(insertLines);
+
+ line = new StringBuilder();
+ line.append(" statement <- paste0(\"INSERT INTO @source_schema." + sqlTableName + " (\", ");
+ line.append("paste(insertFields, collapse = \", \"), ");
+ line.append("\") VALUES ('\", ");
+ line.append("paste(insertValues, collapse = \"', '\"), ");
+ line.append("\"');\")");
+ r.add(line.toString());
+
+ r.add(" frameworkContext$insertSql = c(frameworkContext$insertSql, statement);");
+ r.add(" invisible(statement);");
+ r.add("}");
+ r.add("");
+ }
+ }
+
+ private static void createSetDefaultFunctions(List r, Database database) {
+ for (Table table : database.getTables()) {
+ StringBuilder line = new StringBuilder();
+ String rTableName = convertToRName(table.getName());
+ List argDefs = new ArrayList();
+ List insertLines = new ArrayList();
+ for (Field field : table.getFields()) {
+ String rFieldName = field.getName().replaceAll(" ", "_").replaceAll("-", "_");
+ argDefs.add(rFieldName);
+ insertLines.add(" if (!missing(" + rFieldName + ")) {");
+ insertLines.add(" defaults$" + rFieldName + " <- " + rFieldName);
+ insertLines.add(" }");
+ }
+
+ line.append("set_defaults_" + rTableName + " <- function(");
+ line.append(StringUtilities.join(argDefs, ", "));
+ line.append(") {");
+ r.add(line.toString());
+ r.add(" defaults <- frameworkContext$defaultValues$" + rTableName + ";");
+ r.addAll(insertLines);
+
+ r.add(" invisible(defaults)");
+ r.add("}");
+ r.add("");
+ }
+ }
+
+ private static void createGetDefaultFunctions(List r, Database database) {
+ for (Table table : database.getTables()) {
+ String rTableName = convertToRName(table.getName());
+ r.add("get_defaults_" + rTableName + " <- function() {");
+ r.add(" return(frameworkContext$defaultValues)");
+ r.add("}");
+ r.add("");
+ }
+ }
+
+ private static String convertToRName(String name) {
+ if (name.startsWith("_") )
+ name = "U_" + name.substring(1);
+
+ name = name.replaceAll(" ", "_").replaceAll("-", "_");
+ return name;
+ }
+}
diff --git a/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java b/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java
index 47f3d888..c2cfb693 100644
--- a/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java
+++ b/src/org/ohdsi/rabbitInAHat/ETLTestFrameWorkGenerator.java
@@ -175,6 +175,8 @@ private static void createLookupFunctions(List r, Database database) {
testDefs.add(" }");
testDefs.add(" if (is.null(" + rFieldName + ")) {");
testDefs.add(" statement <- paste0(statement, \" " + sqlFieldName + " IS NULL\")");
+ testDefs.add(" } else if (is(" + rFieldName + ", \"subQuery\")){");
+ testDefs.add(" statement <- paste0(statement, \" " + sqlFieldName + " = (\", as.character(" + rFieldName + "), \")\")");
testDefs.add(" } else {");
testDefs.add(" statement <- paste0(statement, \" " + sqlFieldName + " = '\", " + rFieldName + ",\"'\")");
testDefs.add(" }");
diff --git a/src/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java b/src/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java
index 4f3c5e64..d8c715b6 100644
--- a/src/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java
+++ b/src/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java
@@ -63,6 +63,8 @@ public class RabbitInAHatMain implements ResizeListener, ActionListener {
public final static String ACTION_CMD_OPEN_SCAN_REPORT = "Open Scan Report";
public final static String ACTION_CMD_GENERATE_ETL_DOCUMENT = "Generate ETL Document";
public final static String ACTION_CMD_GENERATE_TEST_FRAMEWORK = "Generate ETL Test Framework";
+ public final static String ACTION_CMD_GENERATE_PACKAGE_TEST_FRAMEWORK = "Generate ETL Test Framework (for R Packages)";
+
public final static String ACTION_CMD_DISCARD_COUNTS = "Discard Value Counts";
public final static String ACTION_CMD_FILTER = "Filter";
public final static String ACTION_CMD_MAKE_MAPPING = "Make Mappings";
@@ -70,6 +72,7 @@ public class RabbitInAHatMain implements ResizeListener, ActionListener {
public final static String ACTION_CMD_SET_TARGET_V4 = "CDM v4";
public final static String ACTION_CMD_SET_TARGET_V5 = "CDM v5.0.0";
public final static String ACTION_CMD_SET_TARGET_V501 = "CDM v5.0.1";
+ public final static String ACTION_CMD_SET_TARGET_V510 = "CDM v5.1.0";
public final static String ACTION_ADD_STEM_TABLE = "Add stem table";
public final static String ACTION_CMD_SET_TARGET_CUSTOM = "Load Custom...";
public final static String ACTION_CMD_MARK_COMPLETED = "Mark Highlighted As Complete";
@@ -226,6 +229,11 @@ private JMenuBar createMenuBar() {
generateTestFrameworkItem.setActionCommand(ACTION_CMD_GENERATE_TEST_FRAMEWORK);
fileMenu.add(generateTestFrameworkItem);
+ JMenuItem generatePackageTestFrameworkItem = new JMenuItem(ACTION_CMD_GENERATE_PACKAGE_TEST_FRAMEWORK);
+ generatePackageTestFrameworkItem.addActionListener(this);
+ generatePackageTestFrameworkItem.setActionCommand(ACTION_CMD_GENERATE_PACKAGE_TEST_FRAMEWORK);
+ fileMenu.add(generatePackageTestFrameworkItem);
+
JMenu editMenu = new JMenu("Edit");
menuBar.add(editMenu);
@@ -256,6 +264,11 @@ private JMenuBar createMenuBar() {
targetCDMV501.addActionListener(this);
targetCDMV501.setActionCommand(ACTION_CMD_SET_TARGET_V501);
setTarget.add(targetCDMV501);
+
+ JMenuItem targetCDMV510 = new JMenuItem(ACTION_CMD_SET_TARGET_V510);
+ targetCDMV510.addActionListener(this);
+ targetCDMV510.setActionCommand(ACTION_CMD_SET_TARGET_V510);
+ setTarget.add(targetCDMV510);
JMenuItem loadTarget = new JMenuItem(ACTION_CMD_SET_TARGET_CUSTOM);
loadTarget.addActionListener(this);
@@ -387,6 +400,9 @@ public void actionPerformed(ActionEvent event) {
case ACTION_CMD_GENERATE_TEST_FRAMEWORK:
doGenerateTestFramework(chooseSavePath(FILE_FILTER_R));
break;
+ case ACTION_CMD_GENERATE_PACKAGE_TEST_FRAMEWORK:
+ doGeneratePackageTestFramework(chooseSavePath(FILE_FILTER_R));
+ break;
case ACTION_CMD_DISCARD_COUNTS:
doDiscardCounts();
break;
@@ -408,6 +424,9 @@ public void actionPerformed(ActionEvent event) {
case ACTION_CMD_SET_TARGET_V501:
doSetTargetCDM(CDMVersion.CDMV501);
break;
+ case ACTION_CMD_SET_TARGET_V510:
+ doSetTargetCDM(CDMVersion.CDMV510);
+ break;
case ACTION_CMD_SET_TARGET_CUSTOM:
doSetTargetCustom(chooseOpenPath(FILE_FILTER_CSV));
break;
@@ -440,6 +459,14 @@ private void doGenerateTestFramework(String filename) {
frame.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
}
}
+
+ private void doGeneratePackageTestFramework(String filename) {
+ if (filename != null) {
+ frame.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
+ ETLPackageTestFrameWorkGenerator.generate(ObjectExchange.etl, filename);
+ frame.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
+ }
+ }
private void doOpenWiki() {
try {
diff --git a/src/org/ohdsi/rabbitInAHat/dataModel/CDMV5.1.0.csv b/src/org/ohdsi/rabbitInAHat/dataModel/CDMV5.1.0.csv
new file mode 100644
index 00000000..fc7c4738
--- /dev/null
+++ b/src/org/ohdsi/rabbitInAHat/dataModel/CDMV5.1.0.csv
@@ -0,0 +1,278 @@
+TABLE_NAME,COLUMN_NAME,IS_NULLABLE,DATA_TYPE,DESCRIPTION
+LOCATION,LOCATION_ID,NO,INTEGER,A unique identifier for each geographic location.
+LOCATION,ADDRESS_1,YES,CHARACTER VARYING,"The address field 1, typically used for the street address, as it appears in the source data."
+LOCATION,ADDRESS_2,YES,CHARACTER VARYING,"The address field 2, typically used for additional detail such as buildings, suites, floors, as it appears in the source data."
+LOCATION,CITY,YES,CHARACTER VARYING,The city field as it appears in the source data.
+LOCATION,STATE,YES,CHARACTER VARYING,The state field as it appears in the source data.
+LOCATION,ZIP,YES,CHARACTER VARYING,The zip or postal code.
+LOCATION,COUNTY,YES,CHARACTER VARYING,The county.
+LOCATION,LOCATION_SOURCE_VALUE,YES,CHARACTER VARYING,The verbatim information that is used to uniquely identify the location as it appears in the source data.
+PERSON,PERSON_ID,NO,INTEGER,A unique identifier for each person.
+PERSON,GENDER_CONCEPT_ID,NO,INTEGER,A foreign key that refers to an identifier in the CONCEPT table for the unique gender of the person.
+PERSON,YEAR_OF_BIRTH,NO,INTEGER,"The year of birth of the person. For data sources with date of birth, the year is extracted. For data sources where the year of birth is not available, the approximate year of birth is derived based on any age group categorization available."
+PERSON,MONTH_OF_BIRTH,YES,INTEGER,"The month of birth of the person. For data sources that provide the precise date of birth, the month is extracted and stored in this field."
+PERSON,DAY_OF_BIRTH,YES,INTEGER,"The day of the month of birth of the person. For data sources that provide the precise date of birth, the day is extracted and stored in this field."
+PERSON,BIRTH_DATETIME,YES,DATETIME,The time of birth at the birth day
+PERSON,RACE_CONCEPT_ID,NO,INTEGER,A foreign key that refers to an identifier in the CONCEPT table for the unique race of the person.
+PERSON,ETHNICITY_CONCEPT_ID,NO,INTEGER,A foreign key that refers to the standard concept identifier in the Standardized Vocabularies for the ethnicity of the person.
+PERSON,LOCATION_ID,YES,INTEGER,"A foreign key to the place of residency for the person in the location table, where the detailed address information is stored."
+PERSON,PROVIDER_ID,YES,INTEGER,A foreign key to the primary care provider the person is seeing in the provider table.
+PERSON,CARE_SITE_ID,YES,INTEGER,"A foreign key to the site of primary care in the care_site table, where the details of the care site are stored."
+PERSON,PERSON_SOURCE_VALUE,YES,CHARACTER VARYING,An (encrypted) key derived from the person identifier in the source data. This is necessary when a use case requires a link back to the person data at the source dataset.
+PERSON,GENDER_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the gender of the person as it appears in the source data. The personÕs gender is mapped to a standard gender concept in the Standardized Vocabularies; the original value is stored here for reference.
+PERSON,GENDER_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to the gender concept that refers to the code used in the source.
+PERSON,RACE_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the race of the person as it appears in the source data. The person race is mapped to a standard race concept in the Standardized Vocabularies and the original value is stored here for reference.
+PERSON,RACE_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to the race concept that refers to the code used in the source.
+PERSON,ETHNICITY_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the ethnicity of the person as it appears in the source data. The person ethnicity is mapped to a standard ethnicity concept in the Standardized Vocabularies and the original code is, stored here for reference."
+PERSON,ETHNICITY_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to the ethnicity concept that refers to the code used in the source.
+OBSERVATION_PERIOD,OBSERVATION_PERIOD_ID,NO,INTEGER,A unique identifier for each observation period.
+OBSERVATION_PERIOD,PERSON_ID,NO,INTEGER,A foreign key identifier to the person for whom the observation period is defined. The demographic details of that person are stored in the person table.
+OBSERVATION_PERIOD,OBSERVATION_PERIOD_START_DATE,NO,DATE,The start date of the observation period for which data are available from the data source.
+OBSERVATION_PERIOD,OBSERVATION_PERIOD_START_DATETIME,NO,DATETIME,The start time of the observation period for which data are available from the data source.
+OBSERVATION_PERIOD,OBSERVATION_PERIOD_END_DATE,NO,DATE,The end date of the observation period for which data are available from the data source.
+OBSERVATION_PERIOD,OBSERVATION_PERIOD_END_DATETIME,NO,DATETIME,The end time of the observation period for which data are available from the data source.
+OBSERVATION_PERIOD,PERIOD_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key identifier to the predefined concept in the Standardized Vocabularies reflecting the source of the observation period information
+CARE_SITE,CARE_SITE_ID,NO,INTEGER,A unique identifier for each Care Site.
+CARE_SITE,CARE_SITE_NAME,YES,CHARACTER VARYING,The description or name of the Care Site
+CARE_SITE,PLACE_OF_SERVICE_CONCEPT_ID,YES,INTEGER,A foreign key that refers to a Place of Service Concept ID in the Standardized Vocabularies.
+CARE_SITE,LOCATION_ID,YES,INTEGER,"A foreign key to the geographic Location of the Care Site in the LOCATION table, where the detailed address information is stored."
+CARE_SITE,CARE_SITE_SOURCE_VALUE,YES,CHARACTER VARYING,"The identifier for the Care Site in the source data, stored here for reference."
+CARE_SITE,PLACE_OF_SERVICE_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the Place of Service as it appears in the source data, stored here for reference."
+VISIT_OCCURRENCE,VISIT_OCCURRENCE_ID,NO,INTEGER,A unique identifier for each Person's visit or encounter at a healthcare provider.
+VISIT_OCCURRENCE,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person for whom the visit is recorded. The demographic details of that Person are stored in the PERSON table.
+VISIT_OCCURRENCE,VISIT_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a visit Concept identifier in the Standardized Vocabularies.
+VISIT_OCCURRENCE,VISIT_START_DATE,NO,DATE,The start date of the visit.
+VISIT_OCCURRENCE,VISIT_START_DATETIME,YES,DATETIME,The time the visit started.
+VISIT_OCCURRENCE,VISIT_END_DATE,NO,DATE,The end date of the visit. If this is a one-day visit the end date should match the start date.
+VISIT_OCCURRENCE,VISIT_END_DATETIME,YES,DATETIME,The time the visit ended.
+VISIT_OCCURRENCE,VISIT_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined Concept identifier in the Standardized Vocabularies reflecting the type of source data from which the visit record is derived.
+VISIT_OCCURRENCE,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the provider table who was associated with the visit.
+VISIT_OCCURRENCE,CARE_SITE_ID,YES,INTEGER,A foreign key to the care site in the care site table that was visited.
+VISIT_OCCURRENCE,VISIT_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the visit as it appears in the source data.
+VISIT_OCCURRENCE,VISIT_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Concept that refers to the code used in the source.
+PROVIDER,PROVIDER_ID,NO,INTEGER,A unique identifier for each Provider.
+PROVIDER,PROVIDER_NAME,YES,CHARACTER VARYING,A description of the Provider.
+PROVIDER,NPI,YES,CHARACTER VARYING,The National Provider Identifier (NPI) of the provider.
+PROVIDER,DEA,YES,CHARACTER VARYING,The Drug Enforcement Administration (DEA) number of the provider.
+PROVIDER,SPECIALTY_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Specialty Concept ID in the Standardized Vocabularies.
+PROVIDER,CARE_SITE_ID,YES,INTEGER,A foreign key to the main Care Site where the provider is practicing.
+PROVIDER,YEAR_OF_BIRTH,YES,INTEGER,The year of birth of the Provider.
+PROVIDER,GENDER_CONCEPT_ID,YES,INTEGER,The gender of the Provider.
+PROVIDER,PROVIDER_SOURCE_VALUE,YES,CHARACTER VARYING,"The identifier used for the Provider in the source data, stored here for reference."
+PROVIDER,SPECIALTY_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the Provider specialty as it appears in the source data, stored here for reference."
+PROVIDER,SPECIALTY_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Concept that refers to the code used in the source.
+PROVIDER,GENDER_SOURCE_VALUE,YES,CHARACTER VARYING,"The gender code for the Provider as it appears in the source data, stored here for reference."
+PROVIDER,GENDER_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Concept that refers to the code used in the source.
+CONDITION_OCCURRENCE,CONDITION_OCCURRENCE_ID,NO,INTEGER,A unique identifier for each Condition Occurrence event.
+CONDITION_OCCURRENCE,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is experiencing the condition. The demographic details of that Person are stored in the PERSON table.
+CONDITION_OCCURRENCE,CONDITION_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Condition Concept identifier in the Standardized Vocabularies.
+CONDITION_OCCURRENCE,CONDITION_START_DATE,NO,DATE,The date when the instance of the Condition is recorded.
+CONDITION_OCCURRENCE,CONDITION_START_DATETIME,NO,DATETIME,The time when the instance of the Condition is recorded.
+CONDITION_OCCURRENCE,CONDITION_END_DATE,YES,DATE,The date when the instance of the Condition is considered to have ended.
+CONDITION_OCCURRENCE,CONDITION_END_DATETIME,YES,DATETIME,The time when the instance of the Condition is considered to have ended.
+CONDITION_OCCURRENCE,CONDITION_TYPE_CONCEPT_ID,NO,INTEGER,"A foreign key to the predefined Concept identifier in the Standardized Vocabularies reflecting the source data from which the condition was recorded, the level of standardization, and the type of occurrence."
+CONDITION_OCCURRENCE,STOP_REASON,YES,CHARACTER VARYING,"The reason that the condition was no longer present, as indicated in the source data."
+CONDITION_OCCURRENCE,PROVIDER_ID,YES,INTEGER,A foreign key to the Provider in the PROVIDER table who was responsible for capturing (diagnosing) the Condition.
+CONDITION_OCCURRENCE,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the visit in the VISIT table during which the Condition was determined (diagnosed).
+CONDITION_OCCURRENCE,CONDITION_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the condition as it appears in the source data. This code is mapped to a standard condition concept in the Standardized Vocabularies and the original code is stored here for reference.
+CONDITION_OCCURRENCE,CONDITION_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Condition Concept that refers to the code used in the source.
+DEATH,PERSON_ID,NO,INTEGER,A foreign key identifier to the deceased person. The demographic details of that person are stored in the person table.
+DEATH,DEATH_DATE,NO,DATE,"The date the person was deceased. If the precise date including day or month is not known or not allowed, December is used as the default month, and the last day of the month the default day."
+DEATH,DEATH_DATETIME,YES,DATETIME,The time the person was deceased.
+DEATH,DEATH_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key referring to the predefined concept identifier in the Standardized Vocabularies reflecting how the death was represented in the source data.
+DEATH,CAUSE_CONCEPT_ID,YES,INTEGER,A foreign key referring to a standard concept identifier in the Standardized Vocabularies for conditions.
+DEATH,CAUSE_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the cause of death as it appears in the source data. This code is mapped to a standard concept in the Standardized Vocabularies and the original code is, stored here for reference."
+DEATH,CAUSE_SOURCE_CONCEPT_ID,YES,INTEGER,"A foreign key to the concept that refers to the code used in the source. Note, this variable name is abbreviated to ensure it will be allowable across database platforms."
+DRUG_EXPOSURE,DRUG_EXPOSURE_ID,NO,INTEGER,A system-generated unique identifier for each Drug utilization event.
+DRUG_EXPOSURE,PERSON_ID,NO,INTEGER,A foreign key identifier to the person who is subjected to the Drug. The demographic details of that person are stored in the person table.
+DRUG_EXPOSURE,DRUG_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Concept identifier in the Standardized Vocabularies for the Drug concept.
+DRUG_EXPOSURE,DRUG_EXPOSURE_START_DATE,NO,DATE,"The start date for the current instance of Drug utilization. Valid entries include a start date of a prescription, the date a prescription was filled, or the date on which a Drug administration procedure was recorded."
+DRUG_EXPOSURE,DRUG_EXPOSURE_START_DATETIME,NO,DATETIME,The start time for the current instance of Drug utilization.
+DRUG_EXPOSURE,DRUG_EXPOSURE_END_DATE,YES,DATE,The end date for the current instance of Drug utilization. It is not available from all sources.
+DRUG_EXPOSURE,DRUG_EXPOSURE_END_DATETIME,YES,DATETIME,The end time for the current instance of Drug utilization. It is not available from all sources.
+DRUG_EXPOSURE,DRUG_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined Concept identifier in the Standardized Vocabularies reflecting the type of Drug Exposure recorded. It indicates how the Drug Exposure was represented in the source data.
+DRUG_EXPOSURE,STOP_REASON,YES,CHARACTER VARYING,"The reason the Drug was stopped. Reasons include regimen completed, changed, removed, etc."
+DRUG_EXPOSURE,REFILLS,YES,INTEGER,"The number of refills after the initial prescription. The initial prescription is not counted, values start with 0."
+DRUG_EXPOSURE,QUANTITY,YES,NUMERIC,The quantity of drug as recorded in the original prescription or dispensing record.
+DRUG_EXPOSURE,DAYS_SUPPLY,YES,INTEGER,The number of days of supply of the medication as recorded in the original prescription or dispensing record.
+DRUG_EXPOSURE,SIG,YES,TEXT,The directions (ÒsigneturÓ) on the Drug prescription as recorded in the original prescription (and printed on the container) or dispensing record.
+DRUG_EXPOSURE,ROUTE_CONCEPT_ID,YES,INTEGER,A foreign key to a predefined concept in the Standardized Vocabularies reflecting the route of administration.
+DRUG_EXPOSURE,EFFECTIVE_DRUG_DOSE,YES,NUMERIC,Numerical value of Drug dose for this Drug Exposure record.
+DRUG_EXPOSURE,DOSE_UNIT_CONCEPT_ID,YES,INTEGER,A foreign key to a predefined concept in the Standardized Vocabularies reflecting the unit the effective_drug_dose value is expressed.
+DRUG_EXPOSURE,LOT_NUMBER,YES,CHARACTER VARYING,An identifier assigned to a particular quantity or lot of Drug product from the manufacturer.
+DRUG_EXPOSURE,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the provider table who initiated (prescribed or administered) the Drug Exposure.
+DRUG_EXPOSURE,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the visit in the visit table during which the Drug Exposure was initiated.
+DRUG_EXPOSURE,DRUG_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the Drug as it appears in the source data. This code is mapped to a Standard Drug concept in the Standardized Vocabularies and the original code is, stored here for reference."
+DRUG_EXPOSURE,DRUG_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Drug Concept that refers to the code used in the source.
+DRUG_EXPOSURE,ROUTE_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the route of administration as detailed in the source.
+DRUG_EXPOSURE,DOSE_UNIT_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the dose unit as detailed in the source.
+DEVICE_EXPOSURE,DEVICE_EXPOSURE_ID,NO,INTEGER,A system-generated unique identifier for each Device Exposure.
+DEVICE_EXPOSURE,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is subjected to the Device. The demographic details of that person are stored in the Person table.
+DEVICE_EXPOSURE,DEVICE_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Concept identifier in the Standardized Vocabularies for the Device concept.
+DEVICE_EXPOSURE,DEVICE_EXPOSURE_START_DATE,NO,DATE,The date the Device or supply was applied or used.
+DEVICE_EXPOSURE,DEVICE_EXPOSURE_START_DATETIME,NO,DATETIME,The time the Device or supply was applied or used.
+DEVICE_EXPOSURE,DEVICE_EXPOSURE_END_DATE,YES,DATE,The date the Device or supply was removed from use.
+DEVICE_EXPOSURE,DEVICE_EXPOSURE_END_DATE,YES,DATETIME,The time the Device or supply was removed from use.
+DEVICE_EXPOSURE,DEVICE_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined Concept identifier in the Standardized Vocabularies reflecting the type of Device Exposure recorded. It indicates how the Device Exposure was represented in the source data.
+DEVICE_EXPOSURE,UNIQUE_DEVICE_ID,YES,CHARACTER VARYING,A UDI or equivalent identifying the instance of the Device used in the Person.
+DEVICE_EXPOSURE,QUANTITY,YES,INTEGER,The number of individual Devices used for the exposure.
+DEVICE_EXPOSURE,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the PROVIDER table who initiated of administered the Device.
+DEVICE_EXPOSURE,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the visit in the VISIT table during which the device was used.
+DEVICE_EXPOSURE,DEVICE_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the Device as it appears in the source data. This code is mapped to a standard Device Concept in the Standardized Vocabularies and the original code is stored here for reference.
+DEVICE_EXPOSURE,DEVICE_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Device Concept that refers to the code used in the source.
+PROCEDURE_OCCURRENCE,PROCEDURE_OCCURRENCE_ID,NO,INTEGER,A system-generated unique identifier for each Procedure Occurrence.
+PROCEDURE_OCCURRENCE,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is subjected to the Procedure. The demographic details of that Person are stored in the PERSON table.
+PROCEDURE_OCCURRENCE,PROCEDURE_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a standard procedure Concept identifier in the Standardized Vocabularies.
+PROCEDURE_OCCURRENCE,PROCEDURE_DATE,NO,DATE,The date on which the Procedure was performed.
+PROCEDURE_OCCURRENCE,PROCEDURE_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined Concept identifier in the Standardized Vocabularies reflecting the type of source data from which the procedure record is derived.
+PROCEDURE_OCCURRENCE,MODIFIER_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for a modifier to the Procedure (e.g. bilateral)
+PROCEDURE_OCCURRENCE,QUANTITY,YES,INTEGER,The quantity of procedures ordered or administered.
+PROCEDURE_OCCURRENCE,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the provider table who was responsible for carrying out the procedure.
+PROCEDURE_OCCURRENCE,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the visit in the visit table during which the Procedure was carried out.
+PROCEDURE_OCCURRENCE,PROCEDURE_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the Procedure as it appears in the source data. This code is mapped to a standard procedure Concept in the Standardized Vocabularies and the original code is, stored here for reference. Procedure source codes are typically ICD-9-Proc, CPT-4, HCPCS or OPCS-4 codes."
+PROCEDURE_OCCURRENCE,PROCEDURE_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Procedure Concept that refers to the code used in the source.
+PROCEDURE_OCCURRENCE,QUALIFIER_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the qualifier as it appears in the source data.
+MEASUREMENT,MEASUREMENT_ID,NO,INTEGER,A unique identifier for each Measurement.
+MEASUREMENT,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person about whom the measurement was recorded. The demographic details of that Person are stored in the PERSON table.
+MEASUREMENT,MEASUREMENT_CONCEPT_ID,NO,INTEGER,A foreign key to the standard measurement concept identifier in the Standardized Vocabularies.
+MEASUREMENT,MEASUREMENT_DATE,NO,DATE,The date of the Measurement.
+MEASUREMENT,MEASUREMENT_DATETIME,YES,DATETIME,"The time of the Measurement. (Some database systems don't have a datatype of time. To accomodate all temporal analyses, datatype datetime can be used (combining measurement_date and measurement_time)Relevant Forum Discussion"
+MEASUREMENT,MEASUREMENT_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined Concept in the Standardized Vocabularies reflecting the provenance from where the Measurement record was recorded.
+MEASUREMENT,OPERATOR_CONCEPT_ID,YES,INTEGER,"A foreign key identifier to the predefined Concept in the Standardized Vocabularies reflecting the mathematical operator that is applied to the value_as_number. Operators are <, ?, =, ?, >."
+MEASUREMENT,VALUE_AS_NUMBER,YES,NUMERIC,A Measurement result where the result is expressed as a numeric value.
+MEASUREMENT,VALUE_AS_CONCEPT_ID,YES,INTEGER,"A foreign key to a Measurement result represented as a Concept from the Standardized Vocabularies (e.g., positive/negative, present/absent, low/high, etc.)."
+MEASUREMENT,UNIT_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept ID of Measurement Units in the Standardized Vocabularies.
+MEASUREMENT,RANGE_LOW,YES,NUMERIC,The lower limit of the normal range of the Measurement result. The lower range is assumed to be of the same unit of measure as the Measurement value.
+MEASUREMENT,RANGE_HIGH,YES,NUMERIC,The upper limit of the normal range of the Measurement. The upper range is assumed to be of the same unit of measure as the Measurement value.
+MEASUREMENT,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the PROVIDER table who was responsible for initiating or obtaining the measurement.
+MEASUREMENT,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the Visit in the VISIT_OCCURRENCE table during which the Measurement was recorded.
+MEASUREMENT,MEASUREMENT_SOURCE_VALUE,YES,CHARACTER VARYING,The Measurement name as it appears in the source data. This code is mapped to a Standard Concept in the Standardized Vocabularies and the original code is stored here for reference.
+MEASUREMENT,MEASUREMENT_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Concept in the Standard Vocabularies that refers to the code used in the source.
+MEASUREMENT,UNIT_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the unit as it appears in the source data. This code is mapped to a standard unit concept in the Standardized Vocabularies and the original code is stored here for reference.
+MEASUREMENT,VALUE_SOURCE_VALUE,YES,CHARACTER VARYING,The source value associated with the content of the value_as_number or value_as_concept as stored in the source data.
+OBSERVATION,OBSERVATION_ID,NO,INTEGER,A unique identifier for each observation.
+OBSERVATION,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person about whom the observation was recorded. The demographic details of that Person are stored in the PERSON table.
+OBSERVATION,OBSERVATION_CONCEPT_ID,NO,INTEGER,A foreign key to the standard observation concept identifier in the Standardized Vocabularies.
+OBSERVATION,OBSERVATION_DATE,NO,DATE,The date of the observation.
+OBSERVATION,OBSERVATION_DATETIME,YES,DATETIME,The time of the observation.
+OBSERVATION,OBSERVATION_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key to the predefined concept identifier in the Standardized Vocabularies reflecting the type of the observation.
+OBSERVATION,VALUE_AS_NUMBER,YES,NUMERIC,The observation result stored as a number. This is applicable to observations where the result is expressed as a numeric value.
+OBSERVATION,VALUE_AS_STRING,YES,CHARACTER VARYING,The observation result stored as a string. This is applicable to observations where the result is expressed as verbatim text.
+OBSERVATION,VALUE_AS_CONCEPT_ID,YES,INTEGER,"A foreign key to an observation result stored as a Concept ID. This is applicable to observations where the result can be expressed as a Standard Concept from the Standardized Vocabularies (e.g., positive/negative, present/absent, low/high, etc.)."
+OBSERVATION,QUALIFIER_CONCEPT_ID,YES,INTEGER,"A foreign key to a Standard Concept ID for a qualifier (e.g., severity of drug-drug interaction alert)"
+OBSERVATION,UNIT_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept ID of measurement units in the Standardized Vocabularies.
+OBSERVATION,PROVIDER_ID,YES,INTEGER,A foreign key to the provider in the PROVIDER table who was responsible for making the observation.
+OBSERVATION,VISIT_OCCURRENCE_ID,YES,INTEGER,A foreign key to the visit in the VISIT_OCCURRENCE table during which the observation was recorded.
+OBSERVATION,OBSERVATION_SOURCE_VALUE,YES,CHARACTER VARYING,"The observation code as it appears in the source data. This code is mapped to a Standard Concept in the Standardized Vocabularies and the original code is, stored here for reference."
+OBSERVATION,OBSERVATION_SOURCE_CONCEPT_ID,YES,INTEGER,A foreign key to a Concept that refers to the code used in the source.
+OBSERVATION,UNIT_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the unit as it appears in the source data. This code is mapped to a standard unit concept in the Standardized Vocabularies and the original code is, stored here for reference."
+OBSERVATION,QUALIFIER_SOURCE_VALUE,YES,CHARACTER VARYING,The source value associated with a qualifier to characterize the observation
+NOTE,NOTE_ID,NO,INTEGER,A unique identifier for each note.
+NOTE,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person about whom the Note was recorded. The demographic details of that Person are stored in the PERSON table.
+NOTE,NOTE_DATE,NO,DATE,The date the note was recorded.
+NOTE,NOTE_DATETIME,YES,DATETIME,The time the note was recorded.
+NOTE,NOTE_TYPE_CONCEPT_ID,NO,INTEGER,"A foreign key to the predefined Concept in the Standardized Vocabularies reflecting the type, origin or provenance of the Note."
+NOTE,NOTE_TEXT,NO,TEXT,The content of the Note.
+NOTE,PROVIDER_ID,YES,INTEGER,A foreign key to the Provider in the PROVIDER table who took the Note.
+NOTE,VISIT_OCCURRENCE_ID,YES,INTEGER,Foreign key to the Visit in the VISIT_OCCURRENCE table when the Note was taken.
+NOTE,NOTE_SOURCE_VALUE,YES,CHARACTER VARYING,"The source value associated with the origin of the Note, as standardized using the note_tyype_concept_id"
+SPECIMEN,SPECIMEN_ID,NO,INTEGER,A unique identifier for each specimen.
+SPECIMEN,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person for whom the Specimen is recorded.
+SPECIMEN,SPECIMEN_CONCEPT_ID,NO,INTEGER,A foreign key referring to a Standard Concept identifier in the Standardized Vocabularies for the Specimen.
+SPECIMEN,SPECIMEN_TYPE_CONCEPT_ID,NO,INTEGER,A foreign key referring to the Concept identifier in the Standardized Vocabularies reflecting the system of record from which the Specimen was represented in the source data.
+SPECIMEN,SPECIMEN_DATE,NO,DATE,The date the specimen was obtained from the Person.
+SPECIMEN,SPECIMEN_DATETIME,YES,DATETIME,The time on the date when the Specimen was obtained from the person.
+SPECIMEN,QUANTITY,YES,NUMERIC,The amount of specimen collection from the person during the sampling procedure.
+SPECIMEN,UNIT_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for the Unit associated with the numeric quantity of the Specimen collection.
+SPECIMEN,ANATOMIC_SITE_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for the anatomic location of specimen collection.
+SPECIMEN,DISEASE_STATUS_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for the Disease Status of specimen collection.
+SPECIMEN,SPECIMEN_SOURCE_ID,YES,CHARACTER VARYING,The Specimen identifier as it appears in the source data.
+SPECIMEN,SPECIMEN_SOURCE_VALUE,YES,CHARACTER VARYING,"The Specimen value as it appears in the source data. This value is mapped to a Standard Concept in the Standardized Vocabularies and the original code is, stored here for reference."
+SPECIMEN,UNIT_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the Unit as detailed in the source.
+SPECIMEN,ANATOMIC_SITE_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the anatomic site as detailed in the source.
+SPECIMEN,DISEASE_STATUS_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the disease status as detailed in the source.
+FACT_RELATIONSHIP,DOMAIN_CONCEPT_ID_1,NO,INTEGER,"The concept representing the domain of fact one, from which the corresponding table can be inferred."
+FACT_RELATIONSHIP,FACT_ID_1,NO,INTEGER,The unique identifier in the table corresponding to the domain of fact one.
+FACT_RELATIONSHIP,DOMAIN_CONCEPT_ID_2,NO,INTEGER,"The concept representing the domain of fact two, from which the corresponding table can be inferred."
+FACT_RELATIONSHIP,FACT_ID_2,NO,INTEGER,The unique identifier in the table corresponding to the domain of fact two.
+FACT_RELATIONSHIP,RELATIONSHIP_CONCEPT_ID,NO,INTEGER,A foreign key to a Standard Concept ID of relationship in the Standardized Vocabularies.
+PAYER_PLAN_PERIOD,PAYER_PLAN_PERIOD_ID,NO,INTEGER,"A identifier for each unique combination of payer, plan, family code and time span."
+PAYER_PLAN_PERIOD,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person covered by the payer. The demographic details of that Person are stored in the PERSON table.
+PAYER_PLAN_PERIOD,PAYER_PLAN_PERIOD_START_DATE,NO,DATE,The start date of the payer plan period.
+PAYER_PLAN_PERIOD,PAYER_PLAN_PERIOD_END_DATE,NO,DATE,The end date of the payer plan period.
+PAYER_PLAN_PERIOD,PAYER_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the payer as it appears in the source data.
+PAYER_PLAN_PERIOD,PLAN_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the Person's health benefit plan as it appears in the source data.
+PAYER_PLAN_PERIOD,FAMILY_SOURCE_VALUE,YES,CHARACTER VARYING,The source code for the Person's family as it appears in the source data.
+DRUG_ERA,DRUG_ERA_ID,NO,INTEGER,A unique identifier for each Drug Era.
+DRUG_ERA,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is subjected to the Drug during the fDrug Era. The demographic details of that Person are stored in the PERSON table.
+DRUG_ERA,DRUG_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Concept identifier in the Standardized Vocabularies for the Ingredient Concept.
+DRUG_ERA,DRUG_ERA_START_DATE,NO,DATE,The start date for the Drug Era constructed from the individual instances of Drug Exposures. It is the start date of the very first chronologically recorded instance of conutilization of a Drug.
+DRUG_ERA,DRUG_ERA_END_DATE,NO,DATE,The end date for the drug era constructed from the individual instance of drug exposures. It is the end date of the final continuously recorded instance of utilization of a drug.
+DRUG_ERA,DRUG_EXPOSURE_COUNT,YES,INTEGER,The number of individual Drug Exposure occurrences used to construct the Drug Era.
+DRUG_ERA,GAP_DAYS,YES,INTEGER,The number of observed days of gap between Drug Exposure records which was tolerated in the building of the Drug Era record.
+DOSE_ERA,DOSE_ERA_ID,NO,INTEGER,A unique identifier for each Dose Era.
+DOSE_ERA,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is subjected to the drug during the drug era. The demographic details of that Person are stored in the PERSON table.
+DOSE_ERA,DRUG_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Concept identifier in the Standardized Vocabularies for the active Ingredient Concept.
+DOSE_ERA,UNIT_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a Standard Concept identifier in the Standardized Vocabularies for the unit concept.
+DOSE_ERA,DOSE_VALUE,NO,NUMERIC,The numeric value of the dose.
+DOSE_ERA,DOSE_ERA_START_DATE,NO,DATE,The start date for the drug era constructed from the individual instances of drug exposures. It is the start date of the very first chronologically recorded instance of utilization of a drug.
+DOSE_ERA,DOSE_ERA_END_DATE,NO,DATE,The end date for the drug era constructed from the individual instance of drug exposures. It is the end date of the final continuously recorded instance of utilization of a drug.
+CONDITION_ERA,CONDITION_ERA_ID,NO,INTEGER,A unique identifier for each Condition Era.
+CONDITION_ERA,PERSON_ID,NO,INTEGER,A foreign key identifier to the Person who is experiencing the Condition during the Condition Era. The demographic details of that Person are stored in the PERSON table.
+CONDITION_ERA,CONDITION_CONCEPT_ID,NO,INTEGER,A foreign key that refers to a standard Condition Concept identifier in the Standardized Vocabularies.
+CONDITION_ERA,CONDITION_ERA_START_DATE,NO,DATE,The start date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the start date of the very first chronologically recorded instance of the condition.
+CONDITION_ERA,CONDITION_ERA_END_DATE,NO,DATE,The end date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the end date of the final continuously recorded instance of the Condition.
+CONDITION_ERA,CONDITION_OCCURRENCE_COUNT,YES,INTEGER,The number of individual Condition Occurrences used to construct the condition era.
+CDM_SOURCE,CDM_SOURCE_NAME,NO,CHARACTER VARYING,The full name of the source
+CDM_SOURCE,CDM_SOURCE_ABBREVIATION,YES,CHARACTER VARYING,An abbreviation of the name
+CDM_SOURCE,CDM_HOLDER,YES,CHARACTER VARYING,The name of the organization responsible for the development of the CDM instance
+CDM_SOURCE,SOURCE_DESCRIPTION,YES,TEXT,A description of the source data origin and purpose for collection. The description may contain a summary of the period of time that is expected to be covered by this dataset.
+CDM_SOURCE,SOURCE_DOCUMENTATION_REFERENCE,YES,CHARACTER VARYING,URLÊor other external reference to location of source documentation
+CDM_SOURCE,CDM_ETL_REFERENCE,YES,CHARACTER VARYING,URLÊor other external reference to location of ETL specification documentation and ETL source code
+CDM_SOURCE,SOURCE_RELEASE_DATE,YES,DATE,"The date for which the source data are most current, such as the last day of data capture"
+CDM_SOURCE,CDM_RELEASE_DATE,YES,DATE,The date when the CDM was instantiated
+CDM_SOURCE,CDM_VERSION,YES,CHARACTER VARYING,The version of CDM used
+CDM_SOURCE,VOCABULARY_VERSION,YES,CHARACTER VARYING,The version of the vocabulary used
+COHORT,COHORT_DEFINITION_ID,NO,INTEGER,A foreign key to a record in the COHORT_DEFINITION table containing relevant Cohort Definition information.
+COHORT,SUBJECT_ID,NO,INTEGER,"A foreign key to the subject in the cohort. These could be referring to records in the PERSON, PROVIDER, VISIT_OCCURRENCE table."
+COHORT,COHORT_START_DATE,NO,DATE,"The date when the Cohort Definition criteria for the Person, Provider or Visit first match."
+COHORT,COHORT_END_DATE,NO,DATE,"The date when the Cohort Definition criteria for the Person, Provider or Visit no longer match or the Cohort membership was terminated."
+COHORT_DEFINITION,COHORT_DEFINITION_ID,NO,INTEGER,A unique identifier for each Cohort.
+COHORT_DEFINITION,COHORT_DEFINITION_NAME,NO,CHARACTER VARYING,A short description of the Cohort.
+COHORT_DEFINITION,COHORT_DEFINITION_DESCRIPTION,YES,TEXT,A complete description of the Cohort definition
+COHORT_DEFINITION,DEFINITION_TYPE_CONCEPT_ID,NO,INTEGER,Type defining what kind of Cohort Definition the record represents and how the syntax may be executed
+COHORT_DEFINITION,COHORT_DEFINITION_SYNTAX,YES,TEXT,Syntax or code to operationalize the Cohort definition
+COHORT_DEFINITION,SUBJECT_CONCEPT_ID,NO,INTEGER,"A foreign key to the Concept to which defines the domain of subjects that are members of the cohort (e.g., Person, Provider, Visit)."
+COHORT_DEFINITION,COHORT_INSTANTIATION_DATE,YES,DATE,A date to indicate when the Cohort was instantiated in the COHORT table
+COHORT_ATTRIBUTE,COHORT_DEFINITION_ID,NO,INTEGER,A foreign key to a record in theÊCOHORT_DEFINITIONÊtable containing relevant Cohort Definition information.
+COHORT_ATTRIBUTE,COHORT_START_DATE,NO,DATE,"The date when the Cohort Definition criteria for the Person, Provider or Visit first match."
+COHORT_ATTRIBUTE,COHORT_END_DATE,NO,DATE,"The date when the Cohort Definition criteria for the Person, Provider or Visit no longer match or the Cohort membership was terminated."
+COHORT_ATTRIBUTE,SUBJECT_ID,NO,INTEGER,"A foreign key to the subject in the Cohort. These could be referring to records in the PERSON, PROVIDER, VISIT_OCCURRENCE table."
+COHORT_ATTRIBUTE,ATTRIBUTE_DEFINITION_ID,NO,INTEGER,A foreign key to a record in theÊATTRIBUTE_DEFINITIONtable containing relevant Attribute Definition information.
+COHORT_ATTRIBUTE,VALUE_AS_NUMBER,YES,NUMERIC,The attribute result stored as a number. This is applicable to attributes where the result is expressed as a numeric value.
+COHORT_ATTRIBUTE,VALUE_AS_CONCEPT_ID,YES,INTEGER,The attribute result stored as a Concept ID. This is applicable to attributes where the result is expressed as a categorical value.
+ATTRIBUTE_DEFINITION,ATTRIBUTE_DEFINITION_ID,NO,INTEGER,A unique identifier for each Attribute.
+ATTRIBUTE_DEFINITION,ATTRIBUTE_NAME,NO,CHARACTER VARYING,A short description of the Attribute.
+ATTRIBUTE_DEFINITION,ATTRIBUTE_DESCRIPTION,YES,TEXT,A complete description of the Attribute definition
+ATTRIBUTE_DEFINITION,ATTRIBUTE_TYPE_CONCEPT_ID,NO,INTEGER,Type defining what kind of Attribute Definition the record represents and how the syntax may be executed
+ATTRIBUTE_DEFINITION,ATTRIBUTE_SYNTAX,YES,TEXT,Syntax or code to operationalize the Attribute definition
+COST,COST_ID,NO,INTEGER,A unique identifier for each COST record.
+COST,COST_EVENT_ID,NO,INTEGER,"A foreign key identifier to the event (e.g. Measurement, Procedure, Visit, Drug Exposure, etc) record for which cost data are recorded."
+COST,COST_DOMAIN_ID,NO,CHARACTER VARYING,"The concept representing the domain of the cost event, from which the corresponding table can be inferred that contains the entity for which cost information is recorded."
+COST,COST_TYPE_CONCEPT_ID,NO,INTEGER,"A foreign key identifier to a concept in the CONCEPT table for the provenance or the source of the COST data: Calculated from insurance claim information, provider revenue, calculated from cost-to-charge ratio, reported from accounting database, etc."
+COST,CURRENCY_CONCEPT_ID,YES,INTEGER,"A foreign key identifier to the concept representing the 3-letter code used to delineate international currencies, such as USD for US Dollar."
+COST,TOTAL_CHARGE,YES,NUMERIC,"The total amount charged by some provider of goods or services (e.g. hospital, physician pharmacy, dme provider) to payers (insurance companies, the patient)."
+COST,TOTAL_COST,YES,NUMERIC,The cost incurred by the provider of goods or services.
+COST,TOTAL_PAID,YES,NUMERIC,The total amount actually paid from all payers for goods or services of the provider.
+COST,PAID_BY_PAYER,YES,NUMERIC,The amount paid by the Payer for the goods or services.
+COST,PAID_BY_PATIENT,YES,NUMERIC,The total amount paid by the Person as a share of the expenses.
+COST,PAID_PATIENT_COPAY,YES,NUMERIC,The amount paid by the Person as a fixed contribution to the expenses.
+COST,PAID_PATIENT_COINSURANCE,YES,NUMERIC,"The amount paid by the Person as a joint assumption of risk. Typically, this is a percentage of the expenses defined by the Payer Plan after the Person's deductible is exceeded."
+COST,PAID_PATIENT_DEDUCTIBLE,YES,NUMERIC,The amount paid by the Person that is counted toward the deductible defined by the Payer Plan. paid_patient_deductible does contribute to the paid_by_patient variable.
+COST,PAID_BY_PRIMARY,YES,NUMERIC,The amount paid by a primary Payer through the coordination of benefits.
+COST,PAID_INGREDIENT_COST,YES,NUMERIC,"The amount paid by the Payer to a pharmacy for the drug, excluding the amount paid for dispensing the drug. paid_ingredient_cost contributes to the paid_by_payer field if this field is populated with a nonzero value."
+COST,PAID_DISPENSING_FEE,YES,NUMERIC,"The amount paid by the Payer to a pharmacy for dispensing a drug, excluding the amount paid for the drug ingredient. paid_dispensing_fee contributes to the paid_by_payer field if this field is populated with a nonzero value."
+COST,PAYER_PLAN_PERIOD_ID,YES,INTEGER,"A foreign key to the PAYER_PLAN_PERIOD table, where the details of the Payer, Plan and Family are stored. Record the payer_plan_id that relates to the payer who contributed to the paid_by_payer field."
+COST,AMOUNT_ALLOWED,YES,NUMERIC,The contracted amount agreed between the payer and provider.
+COST,REVENUE_CODE_CONCEPT_ID,YES,INTEGER,A foreign key referring to a Standard Concept ID in the Standardized Vocabularies for Revenue codes.
+COST,REVENUE_CODE_SOURCE_VALUE,YES,CHARACTER VARYING,"The source code for the Revenue code as it appears in the source data, stored here for reference."
\ No newline at end of file
diff --git a/src/org/ohdsi/rabbitInAHat/dataModel/Database.java b/src/org/ohdsi/rabbitInAHat/dataModel/Database.java
index ad32dfc7..f85d96ed 100644
--- a/src/org/ohdsi/rabbitInAHat/dataModel/Database.java
+++ b/src/org/ohdsi/rabbitInAHat/dataModel/Database.java
@@ -35,7 +35,7 @@
public class Database implements Serializable {
public enum CDMVersion {
- CDMV4("CDMV4.csv"), CDMV5("CDMV5.csv"), CDMV501("CDMV5.0.1.csv");
+ CDMV4("CDMV4.csv"), CDMV5("CDMV5.csv"), CDMV501("CDMV5.0.1.csv"), CDMV510("CDMV5.1.0.csv");
private final String fileName;
diff --git a/src/org/ohdsi/rabbitInAHat/dataModel/StemTableDefaultMappingV5.0.1.csv b/src/org/ohdsi/rabbitInAHat/dataModel/StemTableDefaultMappingV5.0.1.csv
index a6b7e10d..3c1110cf 100644
--- a/src/org/ohdsi/rabbitInAHat/dataModel/StemTableDefaultMappingV5.0.1.csv
+++ b/src/org/ohdsi/rabbitInAHat/dataModel/StemTableDefaultMappingV5.0.1.csv
@@ -11,6 +11,7 @@ STEM_TABLE,START_DATE,CONDITION_OCCURRENCE,CONDITION_START_DATE
STEM_TABLE,END_DATE,CONDITION_OCCURRENCE,CONDITION_END_DATE
STEM_TABLE,STOP_REASON,CONDITION_OCCURRENCE,STOP_REASON
STEM_TABLE,PERSON_ID,DRUG_EXPOSURE,PERSON_ID
+STEM_TABLE,ID,DRUG_EXPOSURE,DRUG_EXPOSURE_ID
STEM_TABLE,CONCEPT_ID,DRUG_EXPOSURE,DRUG_CONCEPT_ID
STEM_TABLE,START_DATE,DRUG_EXPOSURE,DRUG_EXPOSURE_START_DATE
STEM_TABLE,END_DATE,DRUG_EXPOSURE,DRUG_EXPOSURE_END_DATE
@@ -31,6 +32,7 @@ STEM_TABLE,SOURCE_CONCEPT_ID,DRUG_EXPOSURE,DRUG_SOURCE_CONCEPT_ID
STEM_TABLE,ROUTE_SOURCE_VALUE,DRUG_EXPOSURE,ROUTE_SOURCE_VALUE
STEM_TABLE,DOSE_UNIT_SOURCE_VALUE,DRUG_EXPOSURE,DOSE_UNIT_SOURCE_VALUE
STEM_TABLE,PERSON_ID,DEVICE_EXPOSURE,PERSON_ID
+STEM_TABLE,ID,DEVICE_EXPOSURE,DEVICE_EXPOSURE_ID
STEM_TABLE,CONCEPT_ID,DEVICE_EXPOSURE,DEVICE_CONCEPT_ID
STEM_TABLE,START_DATE,DEVICE_EXPOSURE,DEVICE_EXPOSURE_START_DATE
STEM_TABLE,END_DATE,DEVICE_EXPOSURE,DEVICE_EXPOSURE_END_DATE
@@ -42,6 +44,7 @@ STEM_TABLE,VISIT_OCCURRENCE_ID,DEVICE_EXPOSURE,VISIT_OCCURRENCE_ID
STEM_TABLE,SOURCE_VALUE,DEVICE_EXPOSURE,DEVICE_SOURCE_VALUE
STEM_TABLE,SOURCE_CONCEPT_ID,DEVICE_EXPOSURE,DEVICE_SOURCE_CONCEPT_ID
STEM_TABLE,PERSON_ID,MEASUREMENT,PERSON_ID
+STEM_TABLE,ID,MEASUREMENT,MEASUREMENT_ID
STEM_TABLE,CONCEPT_ID,MEASUREMENT,MEASUREMENT_CONCEPT_ID
STEM_TABLE,START_DATE,MEASUREMENT,MEASUREMENT_DATE
STEM_TABLE,START_TIME,MEASUREMENT,MEASUREMENT_TIME
@@ -59,6 +62,7 @@ STEM_TABLE,SOURCE_CONCEPT_ID,MEASUREMENT,MEASUREMENT_SOURCE_CONCEPT_ID
STEM_TABLE,UNIT_SOURCE_VALUE,MEASUREMENT,UNIT_SOURCE_VALUE
STEM_TABLE,VALUE_SOURCE_VALUE,MEASUREMENT,VALUE_SOURCE_VALUE
STEM_TABLE,PERSON_ID,OBSERVATION,PERSON_ID
+STEM_TABLE,ID,OBSERVATION,OBSERVATION_ID
STEM_TABLE,CONCEPT_ID,OBSERVATION,OBSERVATION_CONCEPT_ID
STEM_TABLE,START_DATE,OBSERVATION,OBSERVATION_DATE
STEM_TABLE,START_TIME,OBSERVATION,OBSERVATION_TIME
@@ -75,6 +79,7 @@ STEM_TABLE,SOURCE_CONCEPT_ID,OBSERVATION,OBSERVATION_SOURCE_CONCEPT_ID
STEM_TABLE,UNIT_SOURCE_VALUE,OBSERVATION,UNIT_SOURCE_VALUE
STEM_TABLE,QUALIFIER_SOURCE_VALUE,OBSERVATION,QUALIFIER_SOURCE_VALUE
STEM_TABLE,PERSON_ID,PROCEDURE_OCCURRENCE,PERSON_ID
+STEM_TABLE,ID,PROCEDURE_OCCURRENCE,PROCEDURE_OCCURRENCE_ID
STEM_TABLE,CONCEPT_ID,PROCEDURE_OCCURRENCE,PROCEDURE_CONCEPT_ID
STEM_TABLE,START_DATE,PROCEDURE_OCCURRENCE,PROCEDURE_DATE
STEM_TABLE,TYPE_CONCEPT_ID,PROCEDURE_OCCURRENCE,PROCEDURE_TYPE_CONCEPT_ID
@@ -85,9 +90,18 @@ STEM_TABLE,VISIT_OCCURRENCE_ID,PROCEDURE_OCCURRENCE,VISIT_OCCURRENCE_ID
STEM_TABLE,SOURCE_VALUE,PROCEDURE_OCCURRENCE,PROCEDURE_SOURCE_VALUE
STEM_TABLE,SOURCE_CONCEPT_ID,PROCEDURE_OCCURRENCE,PROCEDURE_SOURCE_CONCEPT_ID
STEM_TABLE,QUALIFIER_SOURCE_VALUE,PROCEDURE_OCCURRENCE,QUALIFIER_SOURCE_VALUE
-STEM_TABLE,PERSON_ID,DEATH,PERSON_ID
-STEM_TABLE,START_DATE,DEATH,DEATH_DATE
-STEM_TABLE,TYPE_CONCEPT_ID,DEATH,DEATH_TYPE_CONCEPT_ID
-STEM_TABLE,CONCEPT_ID,DEATH,CAUSE_CONCEPT_ID
-STEM_TABLE,SOURCE_VALUE,DEATH,CAUSE_SOURCE_VALUE
-STEM_TABLE,SOURCE_CONCEPT_ID,DEATH,CAUSE_SOURCE_CONCEPT_ID
+STEM_TABLE,PERSON_ID,SPECIMEN,PERSON_ID
+STEM_TABLE,ID,SPECIMEN,SPECIMEN_ID
+STEM_TABLE,CONCEPT_ID,SPECIMEN,SPECIMEN_CONCEPT_ID
+STEM_TABLE,TYPE_CONCEPT_ID,SPECIMEN,SPECIMEN_TYPE_CONCEPT_ID
+STEM_TABLE,START_DATE,SPECIMEN,SPECIMEN_DATE
+STEM_TABLE,START_TIME,SPECIMEN,SPECIMEN_TIME
+STEM_TABLE,QUANTITY,SPECIMEN,QUANTITY
+STEM_TABLE,UNIT_CONCEPT_ID,SPECIMEN,UNIT_CONCEPT_ID
+STEM_TABLE,ANATOMIC_SITE_CONCEPT_ID,SPECIMEN,ANATOMIC_SITE_CONCEPT_ID
+STEM_TABLE,DISEASE_STATUS_CONCEPT_ID,SPECIMEN,DISEASE_STATUS_CONCEPT_ID
+STEM_TABLE,SPECIMEN_SOURCE_ID,SPECIMEN,SPECIMEN_SOURCE_ID
+STEM_TABLE,SOURCE_VALUE,SPECIMEN,SPECIMEN_SOURCE_VALUE
+STEM_TABLE,UNIT_SOURCE_VALUE,SPECIMEN,UNIT_SOURCE_VALUE
+STEM_TABLE,ANATOMIC_SITE_SOURCE_VALUE,SPECIMEN,ANATOMIC_SITE_SOURCE_VALUE
+STEM_TABLE,DISEASE_STATUS_SOURCE_VALUE,SPECIMEN,DISEASE_STATUS_SOURCE_VALUE
diff --git a/src/org/ohdsi/rabbitInAHat/dataModel/StemTableV5.0.1.csv b/src/org/ohdsi/rabbitInAHat/dataModel/StemTableV5.0.1.csv
index c509e34f..a4fade39 100644
--- a/src/org/ohdsi/rabbitInAHat/dataModel/StemTableV5.0.1.csv
+++ b/src/org/ohdsi/rabbitInAHat/dataModel/StemTableV5.0.1.csv
@@ -35,3 +35,8 @@ STEM_TABLE,VALUE_AS_CONCEPT_ID,YES,INTEGER,"A foreign key to a result represente
STEM_TABLE,VALUE_AS_NUMBER,YES,DECIMAL,A result where the result is expressed as a numeric value.
STEM_TABLE,VALUE_AS_STRING,YES,CHARACTER VARYING,The result stored as a string. This is applicable where the result is expressed as verbatim text.
STEM_TABLE,VALUE_SOURCE_VALUE,YES,CHARACTER VARYING,The source value associated with the content of the value_as_number or value_as_concept as stored in the source data.
+STEM_TABLE,ANATOMIC_SITE_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for the anatomic location of specimen collection.
+STEM_TABLE,DISEASE_STATUS_CONCEPT_ID,YES,INTEGER,A foreign key to a Standard Concept identifier for the Disease Status of specimen collection.
+STEM_TABLE,SPECIMEN_SOURCE_ID,YES,INTEGER,The Specimen identifier as it appears in the source data.
+STEM_TABLE,ANATOMIC_SITE_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the anatomic site as detailed in the source.
+STEM_TABLE,DISEASE_STATUS_SOURCE_VALUE,YES,CHARACTER VARYING,The information about the disease status as detailed in the source.
diff --git a/src/org/ohdsi/utilities/WhiteRabbitLauncher.java b/src/org/ohdsi/utilities/WhiteRabbitLauncher.java
index 241cf9d7..9ac93460 100644
--- a/src/org/ohdsi/utilities/WhiteRabbitLauncher.java
+++ b/src/org/ohdsi/utilities/WhiteRabbitLauncher.java
@@ -12,7 +12,7 @@ public static void main(String[] args) throws Exception {
float heapSizeMegs = (Runtime.getRuntime().maxMemory() / 1024) / 1024;
- if (heapSizeMegs > MIN_HEAP) {
+ if (heapSizeMegs > MIN_HEAP || args.length > 0) {
System.out.println("Launching with current VM");
WhiteRabbitMain.main(args);
} else {
diff --git a/src/org/ohdsi/utilities/files/IniFile.java b/src/org/ohdsi/utilities/files/IniFile.java
new file mode 100644
index 00000000..0639fa3d
--- /dev/null
+++ b/src/org/ohdsi/utilities/files/IniFile.java
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * Copyright 2016 Observational Health Data Sciences and Informatics
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.utilities.files;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class IniFile {
+ private Map settings = new HashMap();
+
+ public IniFile(String filename){
+ for (String line : new ReadTextFile(filename)){
+ int indexOfHash = line.lastIndexOf('#');
+ if (indexOfHash != -1)
+ line = line.substring(0,indexOfHash);
+
+ int indexOfEqualsSigns = line.indexOf('=');
+
+ if (indexOfEqualsSigns != -1)
+ settings.put(line.substring(0,indexOfEqualsSigns).trim().toLowerCase(), line.substring(indexOfEqualsSigns+1).trim());
+ }
+ }
+
+ public String get(String fieldName){
+ String value = settings.get(fieldName.toLowerCase());
+ if (value == null)
+ return "";
+ else
+ return value;
+ }
+}
diff --git a/src/org/ohdsi/utilities/files/MultiRowIterator.java b/src/org/ohdsi/utilities/files/MultiRowIterator.java
index def1ef65..c358251c 100644
--- a/src/org/ohdsi/utilities/files/MultiRowIterator.java
+++ b/src/org/ohdsi/utilities/files/MultiRowIterator.java
@@ -1,158 +1,159 @@
-/*******************************************************************************
- * Copyright 2016 Observational Health Data Sciences and Informatics
- *
- * This file is part of WhiteRabbit
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-package org.ohdsi.utilities.files;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-
-import org.ohdsi.utilities.files.MultiRowIterator.MultiRowSet;
-
-/**
- * Allows iteration over multiple tables (as Iterator) simultaneously, synchronized by the value of the [linkingColumn]. Assumes all tables are sorted by
- * the [linkingColumn].
- *
- * @author MSCHUEMI
- */
-public class MultiRowIterator implements Iterator {
-
- private Iterator[] iterators;
- private String[] tableNames;
- private Row[] nextRows;
- private MultiRowSet buffer;
- private String linkingColumn;
- private boolean sortedNumerically;
-
- public MultiRowIterator(String linkingColumn, String[] tableNames, Iterator... tableIterators) {
- this(linkingColumn, false, tableNames, tableIterators);
- }
-
- public MultiRowIterator(String linkingColumn, boolean sortedNumerically, String[] tableNames, Iterator[] tableIterators) {
- this.iterators = tableIterators;
- this.linkingColumn = linkingColumn;
- this.tableNames = tableNames;
- this.sortedNumerically = sortedNumerically;
- startRead();
- }
-
- private void startRead() {
- nextRows = new Row[iterators.length];
- for (int i = 0; i < iterators.length; i++)
- if (iterators[i].hasNext())
- nextRows[i] = iterators[i].next();
- else
- nextRows[i] = null;
- readNext();
- }
-
- @Override
- public boolean hasNext() {
- return (buffer != null);
- }
-
- @Override
- public MultiRowSet next() {
- MultiRowSet result = buffer;
- readNext();
- return result;
- }
-
- private void readNext() {
- String lowestLinkingColumn = findLowestLinkingColumn(nextRows);
- if (lowestLinkingColumn == null) {
- buffer = null;
- return;
- }
- buffer = new MultiRowSet(tableNames);
- buffer.linkingId = lowestLinkingColumn;
- for (int i = 0; i < iterators.length; i++) {
- Iterator iterator = iterators[i];
- while (nextRows[i] != null && nextRows[i].get(linkingColumn).equals(lowestLinkingColumn)) {
- buffer.get(tableNames[i]).add(nextRows[i]);
- if (iterator.hasNext())
- nextRows[i] = iterator.next();
- else
- nextRows[i] = null;
- }
- }
- }
-
- private String findLowestLinkingColumn(Row[] rows) {
- String linkingId = null;
- for (Row row : rows)
- if (row != null && (linkingId == null || compare(row.get(linkingColumn), linkingId) < 0))
- linkingId = row.get(linkingColumn);
- return linkingId;
- }
-
- private int compare(String value1, String value2) {
- if (sortedNumerically)
- return efficientLongCompare(value1, value2);
- else
- return value1.compareTo(value2);
- }
-
- private int efficientLongCompare(String value1, String value2) {
- if (value1.length() > value2.length())
- return 1;
- else if (value1.length() < value2.length())
- return -1;
- else
- return value1.compareTo(value2);
- }
-
- @Override
- public void remove() {
- System.err.println("Calling unimplemented remove method in class " + this.getClass().getName());
- }
-
- public static class MultiRowSet extends HashMap> {
- private static final long serialVersionUID = 1164317535150664720L;
-
- public String linkingId;
-
- public MultiRowSet(String[] tableNames) {
- for (String tableName : tableNames) {
- put(tableName, new ArrayList());
- }
- }
-
- public List getNonEmptyTableNames() {
- List result = new ArrayList();
- for (String tableName : keySet())
- if (get(tableName).size() != 0)
- result.add(tableName);
- return result;
- }
-
- /**
- * returns the total number of rows (summed across the tables)
- *
- * @return
- */
- public int totalSize() {
- int size = 0;
- for (List rows : values())
- size += rows.size();
- return size;
- }
-
- }
-
-}
+/*******************************************************************************
+ * Copyright 2016 Observational Health Data Sciences and Informatics
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.utilities.files;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+
+import org.ohdsi.utilities.files.MultiRowIterator.MultiRowSet;
+
+/**
+ * Allows iteration over multiple tables (as Iterator) simultaneously, synchronized by the value of the [linkingColumn]. Assumes all tables are sorted by
+ * the [linkingColumn].
+ *
+ * @author MSCHUEMI
+ */
+public class MultiRowIterator implements Iterator {
+
+ private Iterator[] iterators;
+ private String[] tableNames;
+ private Row[] nextRows;
+ private MultiRowSet buffer;
+ private String linkingColumn;
+ private boolean sortedNumerically;
+
+ @SafeVarargs
+ public MultiRowIterator(String linkingColumn, String[] tableNames, Iterator... tableIterators) {
+ this(linkingColumn, false, tableNames, tableIterators);
+ }
+
+ public MultiRowIterator(String linkingColumn, boolean sortedNumerically, String[] tableNames, Iterator[] tableIterators) {
+ this.iterators = tableIterators;
+ this.linkingColumn = linkingColumn;
+ this.tableNames = tableNames;
+ this.sortedNumerically = sortedNumerically;
+ startRead();
+ }
+
+ private void startRead() {
+ nextRows = new Row[iterators.length];
+ for (int i = 0; i < iterators.length; i++)
+ if (iterators[i].hasNext())
+ nextRows[i] = iterators[i].next();
+ else
+ nextRows[i] = null;
+ readNext();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return (buffer != null);
+ }
+
+ @Override
+ public MultiRowSet next() {
+ MultiRowSet result = buffer;
+ readNext();
+ return result;
+ }
+
+ private void readNext() {
+ String lowestLinkingColumn = findLowestLinkingColumn(nextRows);
+ if (lowestLinkingColumn == null) {
+ buffer = null;
+ return;
+ }
+ buffer = new MultiRowSet(tableNames);
+ buffer.linkingId = lowestLinkingColumn;
+ for (int i = 0; i < iterators.length; i++) {
+ Iterator iterator = iterators[i];
+ while (nextRows[i] != null && nextRows[i].get(linkingColumn).equals(lowestLinkingColumn)) {
+ buffer.get(tableNames[i]).add(nextRows[i]);
+ if (iterator.hasNext())
+ nextRows[i] = iterator.next();
+ else
+ nextRows[i] = null;
+ }
+ }
+ }
+
+ private String findLowestLinkingColumn(Row[] rows) {
+ String linkingId = null;
+ for (Row row : rows)
+ if (row != null && (linkingId == null || compare(row.get(linkingColumn), linkingId) < 0))
+ linkingId = row.get(linkingColumn);
+ return linkingId;
+ }
+
+ private int compare(String value1, String value2) {
+ if (sortedNumerically)
+ return efficientLongCompare(value1, value2);
+ else
+ return value1.compareTo(value2);
+ }
+
+ private int efficientLongCompare(String value1, String value2) {
+ if (value1.length() > value2.length())
+ return 1;
+ else if (value1.length() < value2.length())
+ return -1;
+ else
+ return value1.compareTo(value2);
+ }
+
+ @Override
+ public void remove() {
+ System.err.println("Calling unimplemented remove method in class " + this.getClass().getName());
+ }
+
+ public static class MultiRowSet extends HashMap> {
+ private static final long serialVersionUID = 1164317535150664720L;
+
+ public String linkingId;
+
+ public MultiRowSet(String[] tableNames) {
+ for (String tableName : tableNames) {
+ put(tableName, new ArrayList());
+ }
+ }
+
+ public List getNonEmptyTableNames() {
+ List result = new ArrayList();
+ for (String tableName : keySet())
+ if (get(tableName).size() != 0)
+ result.add(tableName);
+ return result;
+ }
+
+ /**
+ * returns the total number of rows (summed across the tables)
+ *
+ * @return
+ */
+ public int totalSize() {
+ int size = 0;
+ for (List rows : values())
+ size += rows.size();
+ return size;
+ }
+
+ }
+
+}
diff --git a/src/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java b/src/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java
index c5ef85b7..dd1ea499 100644
--- a/src/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java
+++ b/src/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java
@@ -15,8 +15,12 @@ public class WriteCSVFileWithHeader {
private boolean headerWritten = false;
public WriteCSVFileWithHeader(String fileName) {
+ this(fileName, CSVFormat.RFC4180);
+ }
+
+ public WriteCSVFileWithHeader(String fileName, CSVFormat format) {
try {
- printer = new CSVPrinter(new FileWriter(fileName), CSVFormat.RFC4180);
+ printer = new CSVPrinter(new FileWriter(fileName), format);
} catch (IOException e) {
throw new RuntimeException(e.getMessage());
}
diff --git a/src/org/ohdsi/whiteRabbit/DbSettings.java b/src/org/ohdsi/whiteRabbit/DbSettings.java
index 0b8a9f00..31784dec 100644
--- a/src/org/ohdsi/whiteRabbit/DbSettings.java
+++ b/src/org/ohdsi/whiteRabbit/DbSettings.java
@@ -20,6 +20,7 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.csv.CSVFormat;
import org.ohdsi.databases.DbType;
public class DbSettings {
@@ -39,4 +40,5 @@ public class DbSettings {
// CSV file settings
public char delimiter = ',';
+ public CSVFormat csvFormat = CSVFormat.RFC4180;
}
diff --git a/src/org/ohdsi/whiteRabbit/WhiteRabbitMain.java b/src/org/ohdsi/whiteRabbit/WhiteRabbitMain.java
index 651cf1ae..3d29034b 100644
--- a/src/org/ohdsi/whiteRabbit/WhiteRabbitMain.java
+++ b/src/org/ohdsi/whiteRabbit/WhiteRabbitMain.java
@@ -40,6 +40,7 @@
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Vector;
import javax.swing.BorderFactory;
import javax.swing.Box;
@@ -69,10 +70,12 @@
import javax.swing.event.ChangeListener;
import javax.swing.filechooser.FileNameExtensionFilter;
+import org.apache.commons.csv.CSVFormat;
import org.ohdsi.databases.DbType;
import org.ohdsi.databases.RichConnection;
import org.ohdsi.utilities.DirectoryUtilities;
import org.ohdsi.utilities.StringUtilities;
+import org.ohdsi.utilities.files.IniFile;
import org.ohdsi.whiteRabbit.fakeDataGenerator.FakeDataGenerator;
import org.ohdsi.whiteRabbit.scan.SourceDataScan;
@@ -88,26 +91,26 @@ public class WhiteRabbitMain implements ActionListener {
private JTextField folderField;
private JTextField scanReportFileField;
- private JComboBox scanRowCount;
- private JComboBox scanValuesCount;
+ private JComboBox scanRowCount;
+ private JComboBox scanValuesCount;
private JCheckBox scanValueScan;
private JSpinner scanMinCellCount;
private JSpinner generateRowCount;
- private JComboBox sourceType;
- private JComboBox targetType;
+ private JComboBox sourceType;
+ private JComboBox targetType;
private JTextField targetUserField;
private JTextField targetPasswordField;
private JTextField targetServerField;
private JTextField targetDatabaseField;
private JTextField sourceDelimiterField;
- private JTextField targetDelimiterField;
+ private JComboBox targetCSVFormat;
private JTextField sourceServerField;
private JTextField sourceUserField;
private JTextField sourcePasswordField;
private JTextField sourceDatabaseField;
private JButton addAllButton;
- private JList tableList;
- private List tables = new ArrayList();
+ private JList tableList;
+ private Vector tables = new Vector();
private boolean sourceIsFiles = true;
private boolean targetIsFiles = false;
@@ -118,27 +121,95 @@ public static void main(String[] args) {
}
public WhiteRabbitMain(String[] args) {
- frame = new JFrame("White Rabbit");
+ if (args.length == 2 && args[0].equalsIgnoreCase("-ini"))
+ launchCommandLine(args[1]);
+ else {
+ frame = new JFrame("White Rabbit");
+
+ frame.addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ System.exit(0);
+ }
+ });
+ frame.setLayout(new BorderLayout());
+ frame.setJMenuBar(createMenuBar());
- frame.addWindowListener(new WindowAdapter() {
- public void windowClosing(WindowEvent e) {
- System.exit(0);
- }
- });
- frame.setLayout(new BorderLayout());
- frame.setJMenuBar(createMenuBar());
+ JComponent tabsPanel = createTabsPanel();
+ JComponent consolePanel = createConsolePanel();
+
+ frame.add(consolePanel, BorderLayout.CENTER);
+ frame.add(tabsPanel, BorderLayout.NORTH);
- JComponent tabsPanel = createTabsPanel();
- JComponent consolePanel = createConsolePanel();
+ loadIcons(frame);
+ frame.pack();
+ frame.setVisible(true);
+ ObjectExchange.frame = frame;
+ }
+ }
- frame.add(consolePanel, BorderLayout.CENTER);
- frame.add(tabsPanel, BorderLayout.NORTH);
+ private void launchCommandLine(String iniFileName) {
+ IniFile iniFile = new IniFile(iniFileName);
+ DbSettings dbSettings = new DbSettings();
+ if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Delimited text files")) {
+ dbSettings.dataType = DbSettings.CSVFILES;
+ if (iniFile.get("DELIMITER").equalsIgnoreCase("tab"))
+ dbSettings.delimiter = '\t';
+ else
+ dbSettings.delimiter = iniFile.get("DELIMITER").charAt(0);
+ } else {
+ dbSettings.dataType = DbSettings.DATABASE;
+ dbSettings.user = iniFile.get("USER_NAME");
+ dbSettings.password = iniFile.get("PASSWORD");
+ dbSettings.server = iniFile.get("SERVER_LOCATION");
+ dbSettings.database = iniFile.get("DATABASE_NAME");
+ if (iniFile.get("DATA_TYPE").equalsIgnoreCase("MySQL"))
+ dbSettings.dbType = DbType.MYSQL;
+ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Oracle"))
+ dbSettings.dbType = DbType.ORACLE;
+ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("PostgreSQL"))
+ dbSettings.dbType = DbType.POSTGRESQL;
+ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Redshift"))
+ dbSettings.dbType = DbType.REDSHIFT;
+ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("SQL Server")) {
+ dbSettings.dbType = DbType.MSSQL;
+ if (iniFile.get("USER_NAME").length() != 0) { // Not using windows authentication
+ String[] parts = iniFile.get("USER_NAME").split("/");
+ if (parts.length == 2) {
+ dbSettings.user = parts[1];
+ dbSettings.domain = parts[0];
+ }
+ }
+ } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("PDW")) {
+ dbSettings.dbType = DbType.PDW;
+ if (iniFile.get("USER_NAME").length() != 0) { // Not using windows authentication
+ String[] parts = iniFile.get("USER_NAME").split("/");
+ if (parts.length == 2) {
+ dbSettings.user = parts[1];
+ dbSettings.domain = parts[0];
+ }
+ }
+ } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("MS Access"))
+ dbSettings.dbType = DbType.MSACCESS;
+ }
+ if (iniFile.get("TABLES_TO_SCAN").equalsIgnoreCase("*")) {
+ RichConnection connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType);
+ for (String table : connection.getTableNames(dbSettings.database))
+ dbSettings.tables.add(table);
+ connection.close();
+ } else {
+ for (String table : iniFile.get("TABLES_TO_SCAN").split(",")) {
+ if (dbSettings.dataType == DbSettings.CSVFILES)
+ table = iniFile.get("WORKING_FOLDER") + "/" + table;
+ dbSettings.tables.add(table);
+ }
+ }
- loadIcons(frame);
- frame.pack();
- frame.setVisible(true);
- ObjectExchange.frame = frame;
- executeParameters(args);
+ SourceDataScan sourceDataScan = new SourceDataScan();
+ int maxRows = Integer.parseInt(iniFile.get("ROWS_PER_TABLE"));
+ boolean scanValues = iniFile.get("SCAN_FIELD_VALUES").equalsIgnoreCase("yes");
+ int minCellCount = Integer.parseInt(iniFile.get("MIN_CELL_COUNT"));
+ int maxValues = Integer.parseInt(iniFile.get("MAX_DISTINCT_VALUES"));
+ sourceDataScan.process(dbSettings, maxRows, scanValues, minCellCount, maxValues, iniFile.get("WORKING_FOLDER") + "/ScanReport.xlsx");
}
private JComponent createTabsPanel() {
@@ -189,7 +260,7 @@ public void actionPerformed(ActionEvent e) {
sourcePanel.setLayout(new GridLayout(0, 2));
sourcePanel.setBorder(BorderFactory.createTitledBorder("Source data location"));
sourcePanel.add(new JLabel("Data type"));
- sourceType = new JComboBox(new String[] { "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "Redshift" });
+ sourceType = new JComboBox(new String[] { "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "PDW", "Redshift" });
sourceType.setToolTipText("Select the type of source data available");
sourceType.addItemListener(new ItemListener() {
@@ -286,7 +357,7 @@ private JPanel createScanPanel() {
JPanel tablePanel = new JPanel();
tablePanel.setLayout(new BorderLayout());
tablePanel.setBorder(new TitledBorder("Tables to scan"));
- tableList = new JList();
+ tableList = new JList();
tableList.setToolTipText("Specify the tables (or CSV files) to be scanned here");
tablePanel.add(new JScrollPane(tableList), BorderLayout.CENTER);
@@ -349,14 +420,14 @@ public void stateChanged(ChangeEvent arg0) {
scanOptionsPanel.add(Box.createHorizontalGlue());
scanOptionsPanel.add(new JLabel("Max distinct values "));
- scanValuesCount = new JComboBox(new String[] { "100", "1,000", "10,000" });
+ scanValuesCount = new JComboBox(new String[] { "100", "1,000", "10,000" });
scanValuesCount.setSelectedIndex(1);
scanValuesCount.setToolTipText("Maximum number of distinct values per field to be reported");
scanOptionsPanel.add(scanValuesCount);
scanOptionsPanel.add(Box.createHorizontalGlue());
scanOptionsPanel.add(new JLabel("Rows per table "));
- scanRowCount = new JComboBox(new String[] { "100,000", "500,000", "1 million", "all" });
+ scanRowCount = new JComboBox(new String[] { "100,000", "500,000", "1 million", "all" });
scanRowCount.setSelectedIndex(0);
scanRowCount.setToolTipText("Maximum number of rows per table to be scanned for field values");
scanOptionsPanel.add(scanRowCount);
@@ -419,7 +490,7 @@ public void actionPerformed(ActionEvent e) {
targetPanel.setLayout(new GridLayout(0, 2));
targetPanel.setBorder(BorderFactory.createTitledBorder("Target data location"));
targetPanel.add(new JLabel("Data type"));
- targetType = new JComboBox(new String[] { "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL" });
+ targetType = new JComboBox(new String[] { "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL" });
// targetType = new JComboBox(new String[] { "Delimited text files", "MySQL" });
targetType.setToolTipText("Select the type of source data available");
targetType.addItemListener(new ItemListener() {
@@ -431,7 +502,7 @@ public void itemStateChanged(ItemEvent arg0) {
targetUserField.setEnabled(!targetIsFiles);
targetPasswordField.setEnabled(!targetIsFiles);
targetDatabaseField.setEnabled(!targetIsFiles);
- targetDelimiterField.setEnabled(targetIsFiles);
+ targetCSVFormat.setEnabled(targetIsFiles);
if (!targetIsFiles && arg0.getItem().toString().equals("Oracle")) {
targetServerField
@@ -476,11 +547,12 @@ public void itemStateChanged(ItemEvent arg0) {
targetDatabaseField.setEnabled(false);
targetPanel.add(targetDatabaseField);
- targetPanel.add(new JLabel("Delimiter"));
- targetDelimiterField = new JTextField(",");
- targetDelimiterField.setToolTipText("The delimiter that separates values. Enter 'tab' for tab.");
- targetDelimiterField.setEnabled(true);
- targetPanel.add(targetDelimiterField);
+ targetPanel.add(new JLabel("CSV Format"));
+ targetCSVFormat = new JComboBox<>(
+ new String[] { "Default (comma, CRLF)", "TDF (tab, CRLF)", "MySQL (tab, LF)", "RFC4180", "Excel CSV" });
+ targetCSVFormat.setToolTipText("The format of the output");
+ targetCSVFormat.setEnabled(true);
+ targetPanel.add(targetCSVFormat);
c.gridx = 0;
c.gridy = 1;
@@ -566,39 +638,6 @@ private Image loadIcon(String name, JFrame f) {
return null;
}
- private void executeParameters(String[] args) {
- String mode = null;
- for (String arg : args) {
- if (arg.startsWith("-")) {
- mode = arg.toLowerCase();
- } else {
- if (mode.equals("-folder"))
- folderField.setText(arg);
- if (mode.equals("-targetpassword"))
- targetPasswordField.setText(arg);
- if (mode.equals("-targetserver"))
- targetServerField.setText(arg);
- if (mode.equals("-targettype"))
- targetType.setSelectedItem(arg);
- if (mode.equals("-targetdatabase"))
- targetDatabaseField.setText(arg);
- if (mode.equals("-targetuser"))
- targetUserField.setText(arg);
- if (mode.equals("-sourceserver"))
- sourceServerField.setText(arg);
- if (mode.equals("-sourcetype"))
- sourceType.setSelectedItem(arg);
- if (mode.equals("-sourcedatabase"))
- sourceDatabaseField.setText(arg);
- if (mode.equals("-sourceuser"))
- sourceUserField.setText(arg);
- if (mode.equals("-sourcepassword"))
- sourcePasswordField.setText(arg);
- mode = null;
- }
- }
- }
-
private void pickFolder() {
JFileChooser fileChooser = new JFileChooser(new File(folderField.getText()));
fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
@@ -616,9 +655,9 @@ private void pickScanReportFile() {
}
private void removeTables() {
- for (Object item : tableList.getSelectedValues()) {
+ for (String item : tableList.getSelectedValuesList()) {
tables.remove(item);
- tableList.setListData(tables.toArray());
+ tableList.setListData(tables);
}
}
@@ -630,7 +669,7 @@ private void addAllTables() {
for (String table : connection.getTableNames(sourceDbSettings.database)) {
if (!tables.contains(table))
tables.add((String) table);
- tableList.setListData(tables.toArray());
+ tableList.setListData(tables);
}
connection.close();
}
@@ -652,7 +691,7 @@ private void pickTables() {
String tableName = DirectoryUtilities.getRelativePath(new File(folderField.getText()), table);
if (!tables.contains(tableName))
tables.add(tableName);
- tableList.setListData(tables.toArray());
+ tableList.setListData(tables);
}
}
@@ -669,7 +708,7 @@ private void pickTables() {
for (Object item : selectionDialog.getSelectedItems()) {
if (!tables.contains(item))
tables.add((String) item);
- tableList.setListData(tables.toArray());
+ tableList.setListData(tables);
}
}
}
@@ -714,6 +753,15 @@ else if (sourceType.getSelectedItem().toString().equals("SQL Server")) {
dbSettings.domain = parts[0];
}
}
+ } if (sourceType.getSelectedItem().toString().equals("PDW")) {
+ dbSettings.dbType = DbType.PDW;
+ if (sourceUserField.getText().length() != 0) { // Not using windows authentication
+ String[] parts = sourceUserField.getText().split("/");
+ if (parts.length == 2) {
+ dbSettings.user = parts[1];
+ dbSettings.domain = parts[0];
+ }
+ }
} else if (sourceType.getSelectedItem().toString().equals("MS Access"))
dbSettings.dbType = DbType.MSACCESS;
}
@@ -771,14 +819,27 @@ private DbSettings getTargetDbSettings() {
DbSettings dbSettings = new DbSettings();
if (targetType.getSelectedItem().equals("Delimited text files")) {
dbSettings.dataType = DbSettings.CSVFILES;
- if (targetDelimiterField.getText().length() == 0) {
- JOptionPane.showMessageDialog(frame, "Delimiter field cannot be empty for target files", "Error", JOptionPane.ERROR_MESSAGE);
- return null;
+
+ switch((String) targetCSVFormat.getSelectedItem()) {
+ case "Default (comma, CRLF)":
+ dbSettings.csvFormat = CSVFormat.DEFAULT;
+ break;
+ case "RFC4180":
+ dbSettings.csvFormat = CSVFormat.RFC4180;
+ break;
+ case "Excel CSV":
+ dbSettings.csvFormat = CSVFormat.EXCEL;
+ break;
+ case "TDF (tab, CRLF)":
+ dbSettings.csvFormat = CSVFormat.TDF;
+ break;
+ case "MySQL (tab, LF)":
+ dbSettings.csvFormat = CSVFormat.MYSQL;
+ break;
+ default:
+ dbSettings.csvFormat = CSVFormat.RFC4180;
}
- if (targetDelimiterField.getText().toLowerCase().equals("tab"))
- dbSettings.delimiter = '\t';
- else
- dbSettings.delimiter = targetDelimiterField.getText().charAt(0);
+
} else {
dbSettings.dataType = DbSettings.DATABASE;
dbSettings.user = targetUserField.getText();
@@ -800,6 +861,15 @@ else if (sourceType.getSelectedItem().toString().equals("SQL Server")) {
dbSettings.domain = parts[0];
}
}
+ } else if (sourceType.getSelectedItem().toString().equals("PDW")) {
+ dbSettings.dbType = DbType.PDW;
+ if (sourceUserField.getText().length() != 0) { // Not using windows authentication
+ String[] parts = sourceUserField.getText().split("/");
+ if (parts.length == 2) {
+ dbSettings.user = parts[1];
+ dbSettings.domain = parts[0];
+ }
+ }
}
if (dbSettings.database.trim().length() == 0) {
@@ -930,7 +1000,7 @@ private class DBTableSelectionDialog extends JDialog implements ActionListener {
private JButton yesButton = null;
private JButton noButton = null;
private boolean answer = false;
- private JList list;
+ private JList list;
public boolean getAnswer() {
return answer;
@@ -948,7 +1018,7 @@ public DBTableSelectionDialog(JFrame frame, boolean modal, String tableNames) {
JLabel message = new JLabel("Select tables");
panel.add(message, BorderLayout.NORTH);
- list = new JList(tableNames.split("\t"));
+ list = new JList(tableNames.split("\t"));
JScrollPane scrollPane = new JScrollPane(list);
panel.add(scrollPane, BorderLayout.CENTER);
@@ -976,8 +1046,8 @@ public void actionPerformed(ActionEvent e) {
}
}
- public Object[] getSelectedItems() {
- return list.getSelectedValues();
+ public List getSelectedItems() {
+ return list.getSelectedValuesList();
}
}
@@ -1011,9 +1081,6 @@ private void handleError(Exception e) {
private JMenuBar createMenuBar() {
JMenuBar menuBar = new JMenuBar();
- JMenu fileMenu = new JMenu("File");
- int menuShortcutMask = Toolkit.getDefaultToolkit().getMenuShortcutKeyMask();
-
JMenu helpMenu = new JMenu("Help");
menuBar.add(helpMenu);
JMenuItem helpItem = new JMenuItem(ACTION_CMD_HELP);
diff --git a/src/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java b/src/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java
index 7b3149b6..d96ccfab 100644
--- a/src/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java
+++ b/src/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java
@@ -88,7 +88,7 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file
if (!name.toLowerCase().endsWith(".csv"))
name = name + ".csv";
System.out.println("Generating table " + name);
- WriteCSVFileWithHeader out = new WriteCSVFileWithHeader(name);
+ WriteCSVFileWithHeader out = new WriteCSVFileWithHeader(name, dbSettings.csvFormat);
for (Row row : generateRows(table))
out.write(row);
out.close();
diff --git a/src/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/src/org/ohdsi/whiteRabbit/scan/SourceDataScan.java
index 03fd0cb9..dffbaa46 100644
--- a/src/org/ohdsi/whiteRabbit/scan/SourceDataScan.java
+++ b/src/org/ohdsi/whiteRabbit/scan/SourceDataScan.java
@@ -282,25 +282,28 @@ private List processDatabaseTable(String table, RichConnection connec
}
private QueryResult fetchRowsFromTable(RichConnection connection, String table, long rowCount) {
- String query;
- if (dbType == DbType.MSSQL || dbType == DbType.MSACCESS)
+ String query = null;
+
+ if (sampleSize == -1) {
+ if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.MSACCESS)
query = "SELECT * FROM [" + table + "]";
else
query = "SELECT * FROM " + table;
-
- if (sampleSize != -1) {
+ } else {
if (dbType == DbType.MSSQL)
- query += " TABLESAMPLE (" + sampleSize + " ROWS)";
+ query = "SELECT * FROM [" + table + "] TABLESAMPLE (" + sampleSize + " ROWS)";
else if (dbType == DbType.MYSQL)
- query += " ORDER BY RAND() LIMIT " + sampleSize;
+ query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize;
+ else if (dbType == DbType.PDW)
+ query = "SELECT TOP " + sampleSize + " * FROM [" + table + "] ORDER BY RAND()";
else if (dbType == DbType.ORACLE) {
if (sampleSize < rowCount) {
double percentage = 100 * sampleSize / (double) rowCount;
if (percentage < 100)
- query += " SAMPLE(" + percentage + ")";
+ query = "SELECT * FROM " + table + " SAMPLE(" + percentage + ")";
}
} else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT)
- query += " ORDER BY RANDOM() LIMIT " + sampleSize;
+ query = "SELECT * FROM " + table + " ORDER BY RANDOM() LIMIT " + sampleSize;
else if (dbType == DbType.MSACCESS)
query = "SELECT " + "TOP " + sampleSize + " * FROM [" + table + "]";
}
@@ -328,7 +331,7 @@ private List fetchTableStructure(RichConnection connection, String ta
String query = null;
if (dbType == DbType.ORACLE)
query = "SELECT COLUMN_NAME,DATA_TYPE FROM ALL_TAB_COLUMNS WHERE table_name = '" + table + "' AND owner = '" + database.toUpperCase() + "'";
- else if (dbType == DbType.MSSQL) {
+ else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
String trimmedDatabase = database;
if (database.startsWith("[") && database.endsWith("]"))
trimmedDatabase = database.substring(1, database.length() - 1);