diff --git a/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml b/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml index c38ff1f574a..56acf3f9277 100644 --- a/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml +++ b/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml @@ -433,13 +433,15 @@ ( - WITH study_attribute_levels AS ( - SELECT DISTINCT cancer_study_identifier - FROM clinical_data_derived - WHERE attribute_name = #{clinicalDataFilter.attributeId} - AND type='${type}' + WITH , + study_attribute_levels AS ( + SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier + FROM clinical_attribute_meta cam + INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id + WHERE cam.attr_id = #{clinicalDataFilter.attributeId} + AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0) - AND cancer_study_identifier IN + AND cs.cancer_study_identifier IN #{studyId} @@ -448,6 +450,7 @@ + ( SELECT DISTINCT sd.${unique_id} FROM sample_derived sd INNER JOIN study_attribute_levels sal ON sd.cancer_study_identifier = sal.cancer_study_identifier @@ -467,6 +470,12 @@ = 'NA') + + UNION ALL + + -- Also include samples from studies without the attribute (they are also NA) + + ) @@ -502,15 +511,57 @@ + + study_with_attribute AS ( + SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier + FROM clinical_attribute_meta cam + INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id + WHERE cam.attr_id = #{clinicalDataFilter.attributeId} + + AND cs.cancer_study_identifier IN + + #{studyId} + + + ), + study_without_attribute AS ( + SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier + FROM cancer_study cs + WHERE cs.cancer_study_identifier NOT IN (SELECT cancer_study_identifier FROM study_with_attribute) + + AND cs.cancer_study_identifier IN + + #{studyId} + + + ) + + + + SELECT DISTINCT sd.${unique_id} + FROM sample_derived sd + INNER JOIN study_without_attribute swa ON sd.cancer_study_identifier = swa.cancer_study_identifier + + + + + + + + + + ( - WITH study_attribute_levels AS ( - SELECT DISTINCT cancer_study_identifier - FROM clinical_data_derived - WHERE attribute_name = #{clinicalDataFilter.attributeId} - AND type='${type}' + WITH , + study_attribute_levels AS ( + SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier + FROM clinical_attribute_meta cam + INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id + WHERE cam.attr_id = #{clinicalDataFilter.attributeId} + AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0) - AND cancer_study_identifier IN + AND cs.cancer_study_identifier IN #{studyId} @@ -546,6 +597,13 @@ + + + UNION ALL + + -- When NA is selected: also include samples from studies without the attribute + + ) diff --git a/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java b/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java index ed686507b6e..f0634162beb 100644 --- a/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java +++ b/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java @@ -131,9 +131,8 @@ public void getSamplesFilteredByClinicalData() { mapper.getFilteredSamples( StudyViewFilterFactory.make( studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null)); - // Only 7 study_genie_pub samples with genuine "NA" age data - // acc_tcga samples are excluded since that study has no age attribute defined - assertEquals(7, filteredSamples5.size()); + // 4 acc_tcga + 7 study_genie_pub samples with "NA" AGE data or no AGE data + assertEquals(11, filteredSamples5.size()); // NA + UNKNOWN studyViewFilter.setClinicalDataFilters( @@ -147,8 +146,8 @@ public void getSamplesFilteredByClinicalData() { mapper.getFilteredSamples( StudyViewFilterFactory.make( studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null)); - // 7 genuine NA samples from study_genie_pub + 1 UNKNOWN sample - assertEquals(8, filteredSamples6.size()); + // 11 NA + 1 UNKNOWN + assertEquals(12, filteredSamples6.size()); // null age filter (start, end, and value are null) // should return all samples with age attribute @@ -161,16 +160,21 @@ public void getSamplesFilteredByClinicalData() { assertEquals(27, filteredSamples7.size()); // NA dead filter + // study_genie_pub: patients with dead='NA' or no dead data (17 samples) + // + acc_tcga: all samples (study has no dead attribute, treated as NA) (4 samples) studyViewFilter.setClinicalDataFilters( List.of(newClinicalDataFilter("dead", List.of(newDataFilterValue(null, null, "NA"))))); var filteredSamples8 = mapper.getFilteredSamples( StudyViewFilterFactory.make( studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null)); - assertEquals(17, filteredSamples8.size()); + assertEquals(21, filteredSamples8.size()); // null age filter + NA dead filter (test null numerical + any categorical filter) - // should return same as NA dead filter test + // null age filter returns samples with age attribute (27 samples from study_genie_pub only, + // acc_tcga has no age) + // NA dead filter returns 21 samples (17 from study_genie_pub + 4 from acc_tcga) + // INTERSECT: only study_genie_pub samples that pass both filters = 17 samples studyViewFilter.setClinicalDataFilters( List.of( newClinicalDataFilter("age", List.of(newDataFilterValue(null, null, null))),