Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,15 @@
</foreach>

(
WITH study_attribute_levels AS (
SELECT DISTINCT cancer_study_identifier
FROM clinical_data_derived
WHERE attribute_name = #{clinicalDataFilter.attributeId}
AND type='${type}'
WITH <include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.studyAttributeFilterCTEs"/>,
study_attribute_levels AS (
SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
FROM clinical_attribute_meta cam
INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0)
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
AND cancer_study_identifier IN
AND cs.cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
#{studyId}
</foreach>
Expand All @@ -448,6 +450,7 @@

<!-- if 'NA' is selected, prepare NA samples/patients -->
<if test="userSelectsNA">
(
SELECT DISTINCT sd.${unique_id}
FROM sample_derived sd
INNER JOIN study_attribute_levels sal ON sd.cancer_study_identifier = sal.cancer_study_identifier
Expand All @@ -467,6 +470,12 @@
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.normalizeAttributeValue">
<property name="attribute_value" value="categorical_clinical_data.attribute_value"/>
</include> = 'NA')

UNION ALL

-- Also include samples from studies without the attribute (they are also NA)
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.samplesFromStudiesWithoutAttributeAsNA"/>
)
</if>

<!-- if both 'NA' and non-NA are selected, union them together -->
Expand Down Expand Up @@ -502,15 +511,57 @@
</if>
</sql>

<sql id="studyAttributeFilterCTEs">
study_with_attribute AS (
SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
FROM clinical_attribute_meta cam
INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
AND cs.cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
#{studyId}
</foreach>
</if>
),
study_without_attribute AS (
SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
WHERE cs.cancer_study_identifier NOT IN (SELECT cancer_study_identifier FROM study_with_attribute)
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
AND cs.cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
#{studyId}
</foreach>
</if>
)
</sql>

<sql id="samplesFromStudiesWithoutAttributeAsNA">
SELECT DISTINCT sd.${unique_id}
FROM sample_derived sd
INNER JOIN study_without_attribute swa ON sd.cancer_study_identifier = swa.cancer_study_identifier
</sql>

<sql id="categoricalClinicalDataCountFilter">
<!-- check if 'NA' is selected -->
<bind name="userSelectsNA" value="false" />
<foreach item="dataFilterValue" collection="clinicalDataFilter.values">
<if test="dataFilterValue.value == 'NA'">
<bind name="userSelectsNA" value="true" />
</if>
</foreach>

(
WITH study_attribute_levels AS (
SELECT DISTINCT cancer_study_identifier
FROM clinical_data_derived
WHERE attribute_name = #{clinicalDataFilter.attributeId}
AND type='${type}'
WITH <include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.studyAttributeFilterCTEs"/>,
study_attribute_levels AS (
SELECT DISTINCT cs.cancer_study_identifier as cancer_study_identifier
FROM clinical_attribute_meta cam
INNER JOIN cancer_study cs ON cam.cancer_study_id = cs.cancer_study_id
WHERE cam.attr_id = #{clinicalDataFilter.attributeId}
AND cam.patient_attribute = IF('${type}' = 'patient', 1, 0)
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
AND cancer_study_identifier IN
AND cs.cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
#{studyId}
</foreach>
Expand Down Expand Up @@ -546,6 +597,13 @@
</otherwise>
</choose>
</foreach>

<if test="userSelectsNA">
UNION ALL

-- When NA is selected: also include samples from studies without the attribute
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.samplesFromStudiesWithoutAttributeAsNA"/>
</if>
)
</sql>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,8 @@ public void getSamplesFilteredByClinicalData() {
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
// Only 7 study_genie_pub samples with genuine "NA" age data
// acc_tcga samples are excluded since that study has no age attribute defined
assertEquals(7, filteredSamples5.size());
// 4 acc_tcga + 7 study_genie_pub samples with "NA" AGE data or no AGE data
assertEquals(11, filteredSamples5.size());

// NA + UNKNOWN
studyViewFilter.setClinicalDataFilters(
Expand All @@ -147,8 +146,8 @@ public void getSamplesFilteredByClinicalData() {
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
// 7 genuine NA samples from study_genie_pub + 1 UNKNOWN sample
assertEquals(8, filteredSamples6.size());
// 11 NA + 1 UNKNOWN
assertEquals(12, filteredSamples6.size());

// null age filter (start, end, and value are null)
// should return all samples with age attribute
Expand All @@ -161,16 +160,21 @@ public void getSamplesFilteredByClinicalData() {
assertEquals(27, filteredSamples7.size());

// NA dead filter
// study_genie_pub: patients with dead='NA' or no dead data (17 samples)
// + acc_tcga: all samples (study has no dead attribute, treated as NA) (4 samples)
studyViewFilter.setClinicalDataFilters(
List.of(newClinicalDataFilter("dead", List.of(newDataFilterValue(null, null, "NA")))));
var filteredSamples8 =
mapper.getFilteredSamples(
StudyViewFilterFactory.make(
studyViewFilter, List.of(), studyViewFilter.getStudyIds(), null));
assertEquals(17, filteredSamples8.size());
assertEquals(21, filteredSamples8.size());

// null age filter + NA dead filter (test null numerical + any categorical filter)
// should return same as NA dead filter test
// null age filter returns samples with age attribute (27 samples from study_genie_pub only,
// acc_tcga has no age)
// NA dead filter returns 21 samples (17 from study_genie_pub + 4 from acc_tcga)
// INTERSECT: only study_genie_pub samples that pass both filters = 17 samples
studyViewFilter.setClinicalDataFilters(
List.of(
newClinicalDataFilter("age", List.of(newDataFilterValue(null, null, null))),
Expand Down
Loading