Updated siny app for mskai and phenotype library 2

thehyve · Feb 24, 2021 · aed5143 · aed5143
1 parent b0c8582
commit aed5143
Show file tree

Hide file tree

Showing 15 changed files with 793 additions and 428 deletions.
diff --git a/MskaiCohortDiagnostics/R/DataPulls.R b/MskaiCohortDiagnostics/R/DataPulls.R
@@ -13,7 +13,7 @@ loadRecommenderStandardFromDatabase <-
         renderTranslateQuerySql(
           connection = dataSource$connection,
           sql = sql,
-          results_database_schema = 'concept_prevalence',
+          # results_database_schema = dataSource$resultsDatabaseSchema,
           vocabulary_database_schema = dataSource$vocabularyDatabaseSchema,
           source_list = conceptList,
           snakeCaseToCamelCase = TRUE
@@ -40,7 +40,7 @@ loadRecommenderSourceFromDatabase <-
         renderTranslateQuerySql(
           connection = dataSource$connection,
           sql = sql,
-          results_database_schema = 'concept_prevalence',
+          # results_database_schema = dataSource$resultsDatabaseSchema,
           vocabulary_database_schema = dataSource$vocabularyDatabaseSchema,
           source_list = conceptList,
           snakeCaseToCamelCase = TRUE
@@ -246,8 +246,8 @@ resolveConceptSetExpressionUsingDatabase <- function(dataSource = .GlobalEnv,
   # get all conceptIds (as dataframe) that are excluded in concept set expression with descendants
   excludedConceptIdsWithDescendants <- descendantConcepts %>% 
     dplyr::filter(.data$ancestorConceptId %in% (conceptSetExpressionTable %>% 
-                                                  dplyr::filter(.data$isExcluded == TRUE && 
-                                                                  .data$includeDescendants == TRUE) %>% 
+                                                  dplyr::filter(.data$isExcluded == TRUE) %>% 
+                                                  dplyr::filter(.data$includeDescendants == TRUE) %>% 
                                                   dplyr::pull(.data$conceptId)))
 
   # conceptIds in conceptSetExpression table
@@ -403,4 +403,26 @@ getDetailsForConceptIds <- function(dataSource = dataSource,
     ) %>%  
     dplyr::arrange(1)
   return(data)
+}
+
+
+getConceptPrevalenceCountsForConceptIds <- function(dataSource = .GlobalEnv,
+                                                    conceptIdsList) {
+
+  sql <- "select *
+          from concept_prevalence.cp_master
+          where concept_id in (@concept_list);"
+  if (length(conceptIdsList) > 0) {
+    data <-
+      renderTranslateQuerySql(
+        connection = dataSource$connection,
+        concept_list = conceptIdsList,
+        sql = sql, 
+        snakeCaseToCamelCase = TRUE
+      ) %>%  
+      dplyr::arrange(1)
+    return(data) 
+  } else {
+    return(dplyr::tibble())
+  }
 }
diff --git a/MskaiCohortDiagnostics/R/HelperFunctions.R b/MskaiCohortDiagnostics/R/HelperFunctions.R
@@ -8,7 +8,8 @@ cohortReference <- function(outputId) {
 standardDataTable <- function(data, 
                               selectionMode = "single",
                               selected = c(1),
-                              searching = TRUE) {
+                              searching = TRUE,
+                              pageLength = 10) {
 
   dataTableFilter =
     list(position = 'top',
@@ -17,7 +18,7 @@ standardDataTable <- function(data,
 
   dataTableOption =
     list(
-      pageLength = 10,
+      pageLength = pageLength,
       lengthMenu = list(c(5, 10, 20, -1), c("5", "10", "20", "All")),
       lengthChange = TRUE,
       searching = searching,
@@ -66,7 +67,18 @@ standardDataTable <- function(data,
                                            'temporalChoices',
                                            'covariateName',
                                            'conceptId',
-                                           'databaseId'
+                                           'databaseId',
+                                           'standard', 
+                                           'invalidReason',
+                                           'invalid',
+                                           'conceptCode', 
+                                           'isExcluded',
+                                           'excluded',
+                                           'includeDescendants',
+                                           'descendants',
+                                           'includeMapped',
+                                           'mapped',
+                                           'conceptInSet'
                                            )
 
   convertVariableToFactor <- function(data, variables) {
@@ -114,7 +126,7 @@ standardDataTable <- function(data,
   colNames <- colnames(data)
   listRounds <-
     c(colNames[stringr::str_detect(string = tolower(colNames),
-                                   pattern = 'entries|subjects|count|min|max|p10|p25|median|p75|p90|max|before|onvisitstart|after|duringvisit')]
+                                   pattern = 'entries|subjects|count|min|max|p10|p25|median|p75|p90|max|before|onvisitstart|after|duringvisit|dbc|drc|rc')]
       , colNames[stringr::str_detect(string = colNames,
                                      pattern = paste0(database$databaseId, collapse = "|"))])
   listDecimal <-
@@ -270,3 +282,29 @@ convertMdToHtml <- function(markdown) {
 #   }
 #   return(cohort)
 # }
+
+
+pivotIndexBreakDownData <- function(data, variable, phenotypeLibraryMode = TRUE) {
+  pivotByPhenotypeCohort <- c('phenotypeId', 'phenotypeName', 'cohortId', 'cohortName', 'conceptId', 'conceptName')
+  pivotByCohort <- c('cohortId', 'cohortName', 'conceptId', 'conceptName')
+  if (phenotypeLibraryMode) {
+    if (nrow(data) > 0) {
+      data <- data %>%
+        dplyr::select(dplyr::all_of(pivotByPhenotypeCohort), 'databaseId', dplyr::all_of(variable)) %>%
+        tidyr::pivot_wider(id_cols = pivotByPhenotypeCohort,
+                           values_from = dplyr::all_of(variable),
+                           names_from = 'databaseId',
+                           values_fill = 0)
+    } else {dplyr::tibble('no data')}
+  } else {
+    if (nrow(data) > 0) {
+    data <- data %>%
+      dplyr::select(dplyr::all_of(pivotByCohort), 'databaseId', dplyr::all_of(variable)) %>%
+      tidyr::pivot_wider(id_cols = pivotByCohort,
+                         values_from = dplyr::all_of(variable),
+                         names_from = 'databaseId',
+                         values_fill = 0)
+    } else {dplyr::tibble('no data')}
+  }
+  return(data)
+}
diff --git a/MskaiCohortDiagnostics/R/Results.R b/MskaiCohortDiagnostics/R/Results.R
@@ -216,15 +216,13 @@ getIndexEventBreakdown <- function(dataSource = .GlobalEnv,
     dplyr::left_join(y = cohortCounts,
                      by = c("cohortId" = "cohortId",
                             "databaseId" = "databaseId")) %>% 
-    dplyr::mutate(percent = .data$conceptCount/.data$cohortEntries) %>%
     dplyr::relocate(
       .data$databaseId,
       .data$cohortId,
       .data$conceptId,
       .data$conceptName,
       .data$conceptCount
-    ) %>% 
-    dplyr::select(-.data$cohortEntries)
+    )
   return(data)
 }
 

diff --git a/MskaiCohortDiagnostics/R/Tables.R b/MskaiCohortDiagnostics/R/Tables.R
@@ -2,6 +2,13 @@ library(magrittr)
 
 prepareTable1 <- function(covariates,
                           pathToCsv = "Table1Specs.csv") {
+  if ('conceptName' %in% colnames(covariates)) {
+    covariates <- covariates %>% 
+      dplyr::mutate(covariateName = conceptName)
+  }
+  # remove negative values that are created because of minCellCount
+  covariates <- covariates %>% 
+    dplyr::filter(.data$mean > 0)
   space <- "&nbsp;"
   specifications <- readr::read_csv(
     file = pathToCsv,

diff --git a/MskaiCohortDiagnostics/global.R b/MskaiCohortDiagnostics/global.R
@@ -309,7 +309,21 @@ if (isValidConnection && databaseMode && !foundPremergedFile) {
       )
     }
   }
-
+  for (table in c("recommender_set")) {
+    if (table %in% resultsTablesOnServer &&
+        !exists(x = snakeCaseToCamelCase(string = table)) &&
+        !isEmpty(
+          connection = connectionPool,
+          tableName = table,
+          resultsDatabaseSchema = 'concept_prevalence'
+        )) {
+      assign(
+        x = snakeCaseToCamelCase(table),
+        value = dplyr::tibble(),
+        envir = .GlobalEnv
+      )
+    }
+  }
   dataSource <-
     createDatabaseDataSource(
       connection = connectionPool,
@@ -385,13 +399,11 @@ if (exists("cohort")) {
     }
     cohortMetaData <- dplyr::bind_rows(cohortMetaData) %>%
       readr::type_convert(col_types = readr::cols())
-    if ('referent_concept_id' %in% colnames(cohortMetaData)) {
+    if ('referentConceptId' %in% colnames(cohortMetaData)) {
       referentConceptIds <-
         c(referentConceptIds,
-          cohortMetaData$referent_concept_id) %>% unique()
+          cohortMetaData$referentConceptId) %>% unique()
     }
-    colnames(cohortMetaData) <-
-      snakeCaseToCamelCase(colnames(cohortMetaData))
   }
 } else {
   stop("Cohort table not found in data source")
@@ -586,5 +598,4 @@ if (exists(x = "phenotypeDescription")) {
   appTitle <- phenotypeLibraryModeDefaultTitle
 } else {
   appTitle <- cohortDiagnosticModeDefaultTitle
-}
-
+}
diff --git a/MskaiCohortDiagnostics/resultsDataModelSpecification.csv b/MskaiCohortDiagnostics/resultsDataModelSpecification.csv
@@ -99,6 +99,7 @@ inclusion_rule_stats,cohort_id,bigint,Yes,Yes,No,Yes
 inclusion_rule_stats,database_id,varchar,Yes,Yes,No,Yes
 index_event_breakdown,concept_id,int,Yes,Yes,No,Yes
 index_event_breakdown,concept_count,float,Yes,No,No,Yes
+index_event_breakdown,subject_count,float,Yes,No,No,Yes
 index_event_breakdown,cohort_id,bigint,Yes,Yes,No,Yes
 index_event_breakdown,database_id,varchar,Yes,Yes,No,Yes
 orphan_concept,cohort_id,bigint,Yes,Yes,No,Yes