diff --git a/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml b/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml
index c38ff1f574a..56acf3f9277 100644
--- a/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml
+++ b/src/main/resources/mappers/clickhouse/studyview/ClickhouseStudyViewFilterMapper.xml
@@ -433,13 +433,15 @@
(
- WITH study_attribute_levels AS (
- SELECT DISTINCT cancer_study_identifier
- FROM clinical_data_derived
- WHERE attribute_name = #{clinicalDataFilter.attributeId}
- AND type='${type}'
+ WITH ,
+ study_attribute_levels AS (
+ SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
+ FROM clinical_attribute_meta cam
+ INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
+ WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
+ AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0)
- AND cancer_study_identifier IN
+ AND cs.cancer_study_identifier IN
#{studyId}
@@ -448,6 +450,7 @@
+ (
SELECT DISTINCT sd.${unique_id}
FROM sample_derived sd
INNER JOIN study_attribute_levels sal ON sd.cancer_study_identifier = sal.cancer_study_identifier
@@ -467,6 +470,12 @@
= 'NA')
+
+ UNION ALL
+
+ -- Also include samples from studies without the attribute (they are also NA)
+
+ )
@@ -502,15 +511,57 @@
+
+ study_with_attribute AS (
+ SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
+ FROM clinical_attribute_meta cam
+ INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
+ WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
+
+ AND cs.cancer_study_identifier IN
+
+ #{studyId}
+
+
+ ),
+ study_without_attribute AS (
+ SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
+ FROM cancer_study cs
+ WHERE cs.cancer_study_identifier NOT IN (SELECT cancer_study_identifier FROM study_with_attribute)
+
+ AND cs.cancer_study_identifier IN
+
+ #{studyId}
+
+
+ )
+
+
+
+ SELECT DISTINCT sd.${unique_id}
+ FROM sample_derived sd
+ INNER JOIN study_without_attribute swa ON sd.cancer_study_identifier = swa.cancer_study_identifier
+
+
+
+
+
+
+
+
+
+
(
- WITH study_attribute_levels AS (
- SELECT DISTINCT cancer_study_identifier
- FROM clinical_data_derived
- WHERE attribute_name = #{clinicalDataFilter.attributeId}
- AND type='${type}'
+ WITH ,
+ study_attribute_levels AS (
+ SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
+ FROM clinical_attribute_meta cam
+ INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
+ WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
+ AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0)
- AND cancer_study_identifier IN
+ AND cs.cancer_study_identifier IN
#{studyId}
@@ -546,6 +597,13 @@
+
+
+ UNION ALL
+
+ -- When NA is selected: also include samples from studies without the attribute
+
+
)
diff --git a/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java b/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java
index ed686507b6e..f0634162beb 100644
--- a/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java
+++ b/src/test/java/org/cbioportal/infrastructure/repository/clickhouse/sample/ClickhouseSampleMapperTest.java
@@ -131,9 +131,8 @@ public void getSamplesFilteredByClinicalData() {
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
- // Only 7 study_genie_pub samples with genuine "NA" age data
- // acc_tcga samples are excluded since that study has no age attribute defined
- assertEquals(7, filteredSamples5.size());
+ // 4 acc_tcga + 7 study_genie_pub samples with "NA" AGE data or no AGE data
+ assertEquals(11, filteredSamples5.size());
// NA + UNKNOWN
studyViewFilter.setClinicalDataFilters(
@@ -147,8 +146,8 @@ public void getSamplesFilteredByClinicalData() {
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
- // 7 genuine NA samples from study_genie_pub + 1 UNKNOWN sample
- assertEquals(8, filteredSamples6.size());
+ // 11 NA + 1 UNKNOWN
+ assertEquals(12, filteredSamples6.size());
// null age filter (start, end, and value are null)
// should return all samples with age attribute
@@ -161,16 +160,21 @@ public void getSamplesFilteredByClinicalData() {
assertEquals(27, filteredSamples7.size());
// NA dead filter
+ // study_genie_pub: patients with dead='NA' or no dead data (17 samples)
+ // + acc_tcga: all samples (study has no dead attribute, treated as NA) (4 samples)
studyViewFilter.setClinicalDataFilters(
List.of(newClinicalDataFilter("dead", List.of(newDataFilterValue(null, null, "NA")))));
var filteredSamples8 =
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
- assertEquals(17, filteredSamples8.size());
+ assertEquals(21, filteredSamples8.size());
// null age filter + NA dead filter (test null numerical + any categorical filter)
- // should return same as NA dead filter test
+ // null age filter returns samples with age attribute (27 samples from study_genie_pub only,
+ // acc_tcga has no age)
+ // NA dead filter returns 21 samples (17 from study_genie_pub + 4 from acc_tcga)
+ // INTERSECT: only study_genie_pub samples that pass both filters = 17 samples
studyViewFilter.setClinicalDataFilters(
List.of(
newClinicalDataFilter("age", List.of(newDataFilterValue(null, null, null))),