Skip to content

Commit

Permalink
Merge pull request #4679 from broadinstitute/exclude-search
Browse files Browse the repository at this point in the history
Exclude search section
  • Loading branch information
hanars authored Mar 3, 2025
2 parents cc768a0 + 9d0507b commit fc0c46a
Show file tree
Hide file tree
Showing 12 changed files with 122 additions and 37 deletions.
7 changes: 6 additions & 1 deletion hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,9 @@ def _prefilter_entries_table(self, ht, **kwargs):
return ht

def _filter_annotated_table(self, ht, gene_ids=None, rs_ids=None, frequencies=None, in_silico=None, pathogenicity=None,
parsed_annotations=None, is_comp_het=False, **kwargs):
parsed_annotations=None, is_comp_het=False, exclude=None, **kwargs):

ht = self._filter_excluded(ht, exclude)

ht = self._filter_by_gene_ids(ht, hl.set(gene_ids) if gene_ids else None)

Expand All @@ -729,6 +731,9 @@ def _annotate_filtered_gene_transcripts(cls, ht, gene_ids):
**{FILTERED_GENE_TRANSCRIPTS: ht[cls.TRANSCRIPTS_FIELD].filter(lambda t: gene_ids.contains(t.gene_id))}
)

def _filter_excluded(self, ht, exclude):
return ht

def _filter_by_gene_ids(self, ht, gene_ids):
if gene_ids is None:
return ht
Expand Down
7 changes: 7 additions & 0 deletions hail_search/queries/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,13 @@ def _get_allowed_transcripts_filter(allowed_consequence_ids):
if canonical_consequences else allowed_consequence_ids
).contains)

def _filter_excluded(self, ht, exclude):
for key in self.PATHOGENICITY_FILTERS.keys():
path_terms = (exclude or {}).get(key)
if path_terms:
ht = ht.filter(hl.is_missing(ht[key]) | ~self._has_path_expr(ht, path_terms, key))
return ht

def _get_annotation_override_fields(self, annotations, pathogenicity=None, **kwargs):
annotation_overrides = super()._get_annotation_override_fields(annotations, **kwargs)
for key in self.PATHOGENICITY_FILTERS.keys():
Expand Down
6 changes: 6 additions & 0 deletions hail_search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,7 @@ async def test_annotations_filter(self):
[VARIANT1, VARIANT2, MITO_VARIANT1, MITO_VARIANT3], pathogenicity=pathogenicity, sample_data=FAMILY_2_ALL_SAMPLE_DATA,
)

exclude = {'clinvar': pathogenicity['clinvar'][1:]}
pathogenicity['clinvar'] = pathogenicity['clinvar'][:1]
annotations = {'SCREEN': ['CTCF-only', 'DNase-only'], 'UTRAnnotator': ['5_prime_UTR_stop_codon_loss_variant']}
selected_transcript_variant_2 = {**VARIANT2, 'selectedMainTranscriptId': 'ENST00000408919'}
Expand All @@ -864,6 +865,11 @@ async def test_annotations_filter(self):
sample_data=FAMILY_2_ALL_SAMPLE_DATA,
)

await self._assert_expected_search(
[VARIANT1, VARIANT4, MITO_VARIANT3], exclude=exclude, pathogenicity=pathogenicity,
annotations=annotations, sample_data=FAMILY_2_ALL_SAMPLE_DATA,
)

await self._assert_expected_search(
[], pathogenicity=pathogenicity, annotations=annotations, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA,
genome_version='GRCh37',
Expand Down
7 changes: 3 additions & 4 deletions seqr/utils/search/elasticsearch/es_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,13 @@ def _filter(self, new_filter):
self._search = self._search.filter(new_filter)
return self

def filter_variants(self, inheritance_mode=None, inheritance_filter=None, genes=None, intervals=None, rs_ids=None, variant_ids=None, locus=None,
def filter_variants(self, inheritance_mode=None, inheritance_filter=None, genes=None, intervals=None, rs_ids=None, variant_ids=None, exclude_locations=False,
frequencies=None, pathogenicity=None, in_silico=None, annotations=None, annotations_secondary=None,
quality_filter=None, custom_query=None, skip_genotype_filter=False, dataset_type=None, secondary_dataset_type=None, **kwargs):

self._filter_custom(custom_query)

self._filter_by_location(genes, intervals, variant_ids, rs_ids, locus)
self._filter_by_location(genes, intervals, variant_ids, rs_ids, exclude_locations)

self._parse_annotation_overrides(annotations, pathogenicity)

Expand Down Expand Up @@ -272,9 +272,8 @@ def _parse_annotation_overrides(self, annotations, pathogenicity):
if new_svs:
self._consequence_overrides[NEW_SV_FIELD] = new_svs

def _filter_by_location(self, genes, intervals, variant_ids, rs_ids, locus):
def _filter_by_location(self, genes, intervals, variant_ids, rs_ids, exclude_locations):
if genes or intervals:
exclude_locations = locus and locus.get('excludeLocations')
self._filter(_location_filter(genes, intervals, exclude_locations))
if genes and not exclude_locations:
self._filtered_gene_ids = set(genes.keys())
Expand Down
2 changes: 1 addition & 1 deletion seqr/utils/search/elasticsearch/es_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,7 @@ def test_filtered_get_es_variants(self):
'in_silico': {'cadd': '11.5', 'sift': 'D', 'fathmm': 'D'},
'inheritance': {'mode': 'de_novo'},
'customQuery': {'term': {'customFlag': 'flagVal'}},
'locus': {'rawItems': 'WASH7P, chr2:1234-5678, chr7:100-10100%10', 'excludeLocations': True},
'exclude': {'rawItems': 'WASH7P, chr2:1234-5678, chr7:100-10100%10'},
})

results_model = VariantSearchResults.objects.create(variant_search=search_model)
Expand Down
3 changes: 1 addition & 2 deletions seqr/utils/search/hail_search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,8 @@ def _get_sort_metadata(sort, samples):


def _parse_location_search(search):
locus = search.pop('locus', None) or {}
parsed_locus = search.pop('parsedLocus')
exclude_locations = locus.get('excludeLocations')
exclude_locations = parsed_locus.get('exclude_locations')

genes = parsed_locus.get('genes') or {}
intervals = parsed_locus.get('intervals')
Expand Down
7 changes: 4 additions & 3 deletions seqr/utils/search/hail_search_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _test_minimal_search_call(self, expected_search_body=None, call_offset=-1, u
self.assertEqual(executed_request.url.split('/')[-1], url_path)
self.assertDictEqual(json.loads(executed_request.body), expected_search)

def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None,
def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals=False,
rs_ids=None, variant_ids=None, dataset_type=None, secondary_dataset_type=None,
frequencies=None, inheritance_mode='de_novo', inheritance_filter=None,
quality_filter=None, sort='xpos', sort_metadata=None, **kwargs):
Expand Down Expand Up @@ -107,9 +107,10 @@ def test_query_variants(self):
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**LOCATION_SEARCH, sample_data=EXPECTED_SAMPLE_DATA)

self.search_model.search['locus']['excludeLocations'] = True
locus = self.search_model.search.pop('locus')
self.search_model.search['exclude'] = {'rawItems': locus['rawItems']}
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**EXCLUDE_LOCATION_SEARCH)
self._test_expected_search_call(exclude={}, **EXCLUDE_LOCATION_SEARCH)

self.search_model.search = {
'inheritance': {'mode': 'recessive', 'filter': {'affected': {
Expand Down
47 changes: 40 additions & 7 deletions seqr/utils/search/search_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,23 @@ def _test_invalid_search_params(self, search_func):
search_func(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'Invalid genes/intervals: DDX11L1, ENSG00000223972')

self.search_model.search['exclude'] = self.search_model.search['locus']
with self.assertRaises(InvalidSearchException) as cm:
search_func(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'Cannot specify both Location and Excluded Genes/Intervals')

self.search_model.search['locus'] = {}
with self.assertRaises(InvalidSearchException) as cm:
search_func(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'Invalid genes/intervals: DDX11L1, ENSG00000223972')

self.search_model.search['pathogenicity'] = {'clinvar': ['pathogenic', 'vus']}
self.search_model.search['exclude'] = {'clinvar': ['benign', 'vus']}
with self.assertRaises(InvalidSearchException) as cm:
search_func(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'ClinVar pathogenicity vus is both included and excluded')

self.search_model.search['exclude'] = {}
self.search_model.search['inheritance'] = {'mode': 'recessive'}
with self.assertRaises(InvalidSearchException) as cm:
query_variants(self.results_model)
Expand Down Expand Up @@ -264,19 +280,22 @@ def test_invalid_search_query_variants(self):

def _test_expected_search_call(self, mock_get_variants, results_cache, search_fields=None, genes=None, intervals=None,
rs_ids=None, variant_ids=None, parsed_variant_ids=None, inheritance_mode='de_novo',
dataset_type=None, secondary_dataset_type=None, omitted_sample_guids=None, **kwargs):
dataset_type=None, secondary_dataset_type=None, omitted_sample_guids=None,
exclude_locations=False, exclude=None, **kwargs):
expected_search = {
'inheritance_mode': inheritance_mode,
'inheritance_filter': {},
'parsedLocus': {
'genes': genes, 'intervals': intervals, 'rs_ids': rs_ids, 'variant_ids': variant_ids,
'parsed_variant_ids': parsed_variant_ids,
'parsed_variant_ids': parsed_variant_ids, 'exclude_locations': exclude_locations,
},
'skipped_samples': mock.ANY,
'dataset_type': dataset_type,
'secondary_dataset_type': secondary_dataset_type,
}
expected_search.update({field: self.search_model.search[field] for field in search_fields or []})
if exclude:
expected_search['exclude'] = exclude

mock_get_variants.assert_called_with(mock.ANY, expected_search, self.user, results_cache, '37', **kwargs)
searched_samples = self.affected_search_samples
Expand Down Expand Up @@ -333,7 +352,7 @@ def _mock_get_variants(families, search, user, previous_search_results, genome_v
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
mock_get_variants, results_cache, sort='xpos', page=1, num_results=2, skip_genotype_filter=False,
search_fields=['locus'], rs_ids=[], variant_ids=['1-248367227-TC-T', '2-103343353-GAGA-G'],
rs_ids=[], variant_ids=['1-248367227-TC-T', '2-103343353-GAGA-G'],
parsed_variant_ids=[('1', 248367227, 'TC', 'T'), ('2', 103343353, 'GAGA', 'G')], dataset_type='SNV_INDEL',
omitted_sample_guids=['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733', 'S000149_hg00733'],
)
Expand All @@ -342,14 +361,14 @@ def _mock_get_variants(families, search, user, previous_search_results, genome_v
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
mock_get_variants, results_cache, sort='xpos', page=1, num_results=100, skip_genotype_filter=False,
search_fields=['locus'], rs_ids=['rs9876'], variant_ids=[], parsed_variant_ids=[],
rs_ids=['rs9876'], variant_ids=[], parsed_variant_ids=[],
)

self.search_model.search['locus']['rawItems'] = 'WASH7P, chr2:1234-5678, chr7:100-10100%10, ENSG00000186092'
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
mock_get_variants, results_cache, sort='xpos', page=1, num_results=100, skip_genotype_filter=False,
search_fields=['locus'], genes={
genes={
'ENSG00000227232': mock.ANY, 'ENSG00000186092': mock.ANY,
}, intervals=[
{'chrom': '2', 'start': 1234, 'end': 5678, 'offset': None},
Expand All @@ -362,11 +381,25 @@ def _mock_get_variants(families, search, user, previous_search_results, genome_v
self.assertEqual(parsed_genes['ENSG00000227232']['geneSymbol'], 'WASH7P')
self.assertEqual(parsed_genes['ENSG00000186092']['geneSymbol'], 'OR4F5')

self.search_model.search.update({'pathogenicity': {'clinvar': ['pathogenic', 'likely_pathogenic']}, 'locus': {}})
locus = self.search_model.search.pop('locus')
self.search_model.search['exclude'] = {'clinvar': ['benign'], 'rawItems': locus['rawItems']}
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
mock_get_variants, results_cache, sort='xpos', page=1, num_results=100, skip_genotype_filter=False,
genes={
'ENSG00000227232': mock.ANY, 'ENSG00000186092': mock.ANY,
}, intervals=[
{'chrom': '2', 'start': 1234, 'end': 5678, 'offset': None},
{'chrom': '7', 'start': 100, 'end': 10100, 'offset': 0.1},
], exclude_locations=True, exclude={'clinvar': ['benign']},
)

del self.search_model.search['exclude']['rawItems']
self.search_model.search.update({'pathogenicity': {'clinvar': ['pathogenic', 'likely_pathogenic']}})
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
mock_get_variants, results_cache, sort='xpos', page=1, num_results=100, skip_genotype_filter=False,
search_fields=['pathogenicity', 'locus'], dataset_type='SNV_INDEL', omitted_sample_guids=SV_SAMPLES,
search_fields=['exclude', 'pathogenicity'], dataset_type='SNV_INDEL', omitted_sample_guids=SV_SAMPLES,
)

self.search_model.search = {
Expand Down
18 changes: 15 additions & 3 deletions seqr/utils/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,20 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_
genome_version = _get_search_genome_version(families)
_validate_sort(sort, families)

locus = search.pop('locus', None) or {}
exclude = search.get('exclude', None) or {}
exclude_locations = bool(exclude.get('rawItems'))
if locus and exclude_locations:
raise InvalidSearchException('Cannot specify both Location and Excluded Genes/Intervals')

rs_ids = None
variant_ids = None
parsed_variant_ids = None
genes, intervals, invalid_items = parse_locus_list_items(search.get('locus', {}), genome_version=genome_version)
genes, intervals, invalid_items = parse_locus_list_items(locus or exclude, genome_version=genome_version)
if invalid_items:
raise InvalidSearchException('Invalid genes/intervals: {}'.format(', '.join(invalid_items)))
if not (genes or intervals):
rs_ids, variant_ids, parsed_variant_ids, invalid_items = _parse_variant_items(search.get('locus', {}))
rs_ids, variant_ids, parsed_variant_ids, invalid_items = _parse_variant_items(locus)
if invalid_items:
raise InvalidSearchException('Invalid variants: {}'.format(', '.join(invalid_items)))
if rs_ids and variant_ids:
Expand All @@ -259,10 +265,16 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_
if variant_ids:
num_results = len(variant_ids)

exclude.pop('rawItems', None)
if exclude.get('clinvar') and (search.get('pathogenicity') or {}).get('clinvar'):
duplicates = set(search['pathogenicity']['clinvar']).intersection(exclude['clinvar'])
if duplicates:
raise InvalidSearchException(f'ClinVar pathogenicity {", ".join(sorted(duplicates))} is both included and excluded')

parsed_search = {
'parsedLocus': {
'genes': genes, 'intervals': intervals, 'rs_ids': rs_ids, 'variant_ids': variant_ids,
'parsed_variant_ids': parsed_variant_ids,
'parsed_variant_ids': parsed_variant_ids, 'exclude_locations': exclude_locations,
},
}
parsed_search.update(search)
Expand Down
46 changes: 33 additions & 13 deletions ui/pages/Search/components/VariantSearchFormPanelConfigs.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import {
INHERITANCE_MODE_LOOKUP, LOCUS_FIELD_NAME,
NUM_ALT_OPTIONS,
PANEL_APP_FIELD_NAME,
CLINVAR_FIELD,
PATHOGENICITY_FIELDS,
PATHOGENICITY_FILTER_OPTIONS, QUALITY_FILTER_OPTIONS, SV_GROUPS, SV_GROUPS_NO_NEW, VARIANT_ANNOTATION_LAYOUT_GROUPS,
} from '../constants'
Expand Down Expand Up @@ -107,11 +108,14 @@ const SELECTED_MOIS_FIELD_NAME = 'selectedMOIs'
const PANEL_APP_COLORS = [...new Set(
Object.entries(PANEL_APP_CONFIDENCE_LEVELS).sort((a, b) => b[0] - a[0]).map(config => config[1]),
)]
const BASE_LOCUS_FIELD = {
name: LOCUS_LIST_ITEMS_FIELD.name,
label: LOCUS_LIST_ITEMS_FIELD.label,
labelHelp: LOCUS_LIST_ITEMS_FIELD.labelHelp,
}
export const LOCATION_FIELDS = [
{
name: LOCUS_LIST_ITEMS_FIELD.name,
label: LOCUS_LIST_ITEMS_FIELD.label,
labelHelp: LOCUS_LIST_ITEMS_FIELD.labelHelp,
...BASE_LOCUS_FIELD,
component: LocusListItemsFilter,
width: 9,
shouldShow: locus => !locus[PANEL_APP_FIELD_NAME],
Expand Down Expand Up @@ -157,15 +161,6 @@ export const LOCATION_FIELDS = [
shouldShow: locus => !locus[PANEL_APP_FIELD_NAME],
shouldDisable: locus => !locus[LOCUS_LIST_ITEMS_FIELD.name],
},
{
name: 'excludeLocations',
component: LocusListItemsFilter,
filterComponent: AlignedBooleanCheckbox,
label: 'Exclude locations',
labelHelp: 'Search for variants not in the specified genes/ intervals',
width: 10,
shouldDisable: locus => !!locus[VARIANT_FIELD_NAME],
},
]

export const SNP_QUALITY_FILTER_FIELDS = [
Expand Down Expand Up @@ -320,14 +315,15 @@ const JsonSelectPropsWithAll = (options, all) => ({
})

export const PATHOGENICITY_PANEL_NAME = 'pathogenicity'
const PATHOGENICITY_FIELD_PROPS = { control: AlignedCheckboxGroup, format: val => val || [] }
export const PATHOGENICITY_PANEL = {
name: PATHOGENICITY_PANEL_NAME,
headerProps: {
title: 'Pathogenicity',
inputProps: JsonSelectPropsWithAll(PATHOGENICITY_FILTER_OPTIONS, ANY_PATHOGENICITY_FILTER),
},
fields: PATHOGENICITY_FIELDS,
fieldProps: { control: AlignedCheckboxGroup, format: val => val || [] },
fieldProps: PATHOGENICITY_FIELD_PROPS,
helpText: 'Filter by reported pathogenicity. This overrides the annotation filter, the frequency filter, and the call quality filter. Variants will be returned if they have the specified transcript consequence AND the specified frequencies AND all individuals pass all specified quality filters OR if the variant has the specified pathogenicity and a frequency up to 0.05.',
}
export const HGMD_HEADER_INPUT_PROPS = JsonSelectPropsWithAll(
Expand Down Expand Up @@ -504,3 +500,27 @@ export const QUALITY_PANEL = {
},
fieldProps: { control: LazyLabeledSlider, format: val => val || null },
}

const ES_EXCLUDE_FIELDS = [
{
...BASE_LOCUS_FIELD,
component: Form.TextArea,
rows: 8,
},
]
const EXCLUDE_FIELDS = [
{
...CLINVAR_FIELD,
...PATHOGENICITY_FIELD_PROPS,
width: 8,
},
...ES_EXCLUDE_FIELDS,
]

export const EXCLUDE_PANEL = {
name: 'exclude',
headerProps: { title: 'Exclude' },
fields: EXCLUDE_FIELDS,
esEnabledFields: ES_EXCLUDE_FIELDS,
helpText: 'Exclude variants from the search results based on the specified criteria. This filter will override any other filters applied.',
}
7 changes: 5 additions & 2 deletions ui/pages/Search/components/VariantSearchFormPanels.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
FREQUENCY_PANEL,
LOCATION_PANEL,
QUALITY_PANEL,
EXCLUDE_PANEL,
ANNOTATION_SECONDARY_NAME,
PATHOGENICITY_PANEL_NAME,
HGMD_HEADER_INPUT_PROPS,
Expand Down Expand Up @@ -94,6 +95,7 @@ const PANELS = [
FREQUENCY_PANEL,
LOCATION_PANEL,
QUALITY_PANEL,
EXCLUDE_PANEL,
]

const stopPropagation = e => e.stopPropagation()
Expand Down Expand Up @@ -130,10 +132,10 @@ const formatField = (field, name, esEnabled, { formatNoEsLabel, ...fieldProps })

const PanelContent = React.memo(({
name, fields, fieldProps, helpText, fieldLayout, fieldLayoutInput, esEnabled, noPadding, datasetTypes,
datasetTypeFields, datasetTypeFieldLayoutInput,
datasetTypeFields, datasetTypeFieldLayoutInput, esEnabledFields,
}) => {
const layoutInput = (datasetTypeFieldLayoutInput || {})[datasetTypes] || fieldLayoutInput
const panelFields = (datasetTypeFields || {})[datasetTypes] || fields
const panelFields = (datasetTypeFields || {})[datasetTypes] || (esEnabled && esEnabledFields) || fields
const fieldComponents = panelFields && configuredFields(
{ fields: panelFields.map(field => formatField(field, name, esEnabled, fieldProps || {})) },
)
Expand Down Expand Up @@ -165,6 +167,7 @@ PanelContent.propTypes = {
fieldLayoutInput: PropTypes.arrayOf(PropTypes.string),
datasetTypeFieldLayoutInput: PropTypes.object,
esEnabled: PropTypes.bool,
esEnabledFields: PropTypes.arrayOf(PropTypes.object),
noPadding: PropTypes.bool,
}

Expand Down
Loading

0 comments on commit fc0c46a

Please sign in to comment.