Skip to content

Commit

Permalink
Merge pull request #3 from opentargets/ag_efo_latest_obo
Browse files Browse the repository at this point in the history
Ag efo latest obo, update therapeutic areas and remove splitting query strings
  • Loading branch information
cmalangone authored Jun 18, 2020
2 parents b24ad50 + 96fac1d commit 3376dd2
Show file tree
Hide file tree
Showing 12 changed files with 429 additions and 292 deletions.
5 changes: 5 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Version 0.0.14:
- Query strings are searched as they are, splitting at commas and semicolons and using the largest substring has been removed.
- Use latest EFO OBO file instead of hard-coded v2018-01-15
- Updated OT top nodes with current therapeutic areas for checking if mapped term is in OT

Version 0.0.13:
- even more weird and wonderful packaging problems

Expand Down
4 changes: 2 additions & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ recommonmark = "*"
sphinx-rtd-theme = "*"

[packages]
requests = "==2.19.1"
requests = "*"
obonet = "*"
click = "*"

[requires]
python_version = "3.6"
python_version = "3.7"
587 changes: 351 additions & 236 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.13
0.0.14
59 changes: 50 additions & 9 deletions ontoma/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,62 @@

URLS = {

'EFO':'https://github.com/EBISPOT/efo/raw/v2018-01-15/efo.obo',
'EFO':'https://www.ebi.ac.uk/efo/efo.obo',
'HP':'http://purl.obolibrary.org/obo/hp.obo',
'OMIM_EFO_MAP':'https://raw.githubusercontent.com/opentargets/platform_semantic/master/resources/xref_mappings/omim_to_efo.txt',
'ZOOMA_EFO_MAP':'https://raw.githubusercontent.com/opentargets/platform_semantic/master/resources/zooma/cttv_indications_3.txt',
'MONDO':'http://purl.obolibrary.org/obo/mondo.obo'
}
OT_TOP_NODES = {
'http://www.ebi.ac.uk/efo/EFO_0000408',
'http://www.ebi.ac.uk/efo/EFO_0000651',
'http://www.ebi.ac.uk/efo/EFO_0001444',
'http://purl.obolibrary.org/obo/GO_0008150',
'http://www.ifomis.org/bfo/1.1/snap#Function',
'http://www.ebi.ac.uk/efo/EFO_0000546',
'http://www.ebi.ac.uk/efo/EFO_0003935',
'http://purl.obolibrary.org/obo/HP_0000118'
'http://www.ebi.ac.uk/efo/EFO_0005932', # Animal disease
'http://purl.obolibrary.org/obo/GO_0008150', # Biological process
'http://purl.obolibrary.org/obo/MONDO_0045024', # Cell proliferation disorder
'http://purl.obolibrary.org/obo/MONDO_0021205', # Disease of ear
'http://purl.obolibrary.org/obo/MONDO_0024458', # Disease of visual system
'http://www.ebi.ac.uk/efo/EFO_0001379', # Endocrine system disease
'http://www.ebi.ac.uk/efo/EFO_0010282', # Gastrointestinal disease
'http://www.ebi.ac.uk/efo/OTAR_0000018', # Genetic, familial or congenital disease
'http://www.ebi.ac.uk/efo/EFO_0000508', # Genetic disorder (child of OTAR_0000018)
'http://www.ebi.ac.uk/efo/OTAR_0000019', # Familial disease (child of OTAR_0000018, does not have any children)
'http://www.ebi.ac.uk/efo/MONDO_0018797', # Genetic cardiac malformation (child of OTAR_0000018)
'http://www.ebi.ac.uk/efo/MONDO_0000839', # Congenital abnormality (child of OTAR_0000018)
'http://www.ebi.ac.uk/efo/EFO_0000319', # Cardiovascular disease
'http://www.ebi.ac.uk/efo/EFO_0005803', # Hematologic disease
'http://www.ebi.ac.uk/efo/EFO_0000540', # Immune system disease
'http://www.ebi.ac.uk/efo/EFO_0005741', # Infectious disease
'http://www.ebi.ac.uk/efo/OTAR_0000009', # Injury, poisoning or other complication
'http://www.ebi.ac.uk/efo/EFO_0008546' , # Poisoning (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_1000903' , # Drug-induced akathisia (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_1000904' , # Drug-Induced dyskinesia (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/MONDO_0001423' , # Drug-induced mental disorder (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0004228' , # Drug-induced liver injury (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0009482' , # Drug allergy (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0009518' , # Complication (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0000546' , # Injury (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0003099' , # Cushing syndrome (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/MONDO_0016474' , # Drug-induced lupus erythematosus (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0005400' , # chemotherapy-induced alopecia (child of OTAR_0000009)
'http://www.ebi.ac.uk/efo/EFO_0010285', # Integumentary system disease
'http://www.ebi.ac.uk/efo/EFO_0001444', # Measurement
'http://www.ebi.ac.uk/efo/OTAR_0000006', # Musculoskeletal or connective tissue disease
'http://www.ebi.ac.uk/efo/EFO_1001986', # Connective tissue disease (child of OTAR_0000006)
'http://www.ebi.ac.uk/efo/EFO_0009676', # Musculoskeletal system disease (child of OTAR_0000006)
'http://www.ebi.ac.uk/efo/EFO_0000618', # Nervous system disease
'http://purl.obolibrary.org/obo/MONDO_0024297', # Nutritional or metabolic disease
'http://www.ebi.ac.uk/efo/EFO_0009605', # Pancreas disease
'http://www.ebi.ac.uk/efo/EFO_0000651', # Phenotype
'http://www.ebi.ac.uk/efo/OTAR_0000014', # Pregnancy or perinatal disease
'http://www.ebi.ac.uk/efo/EFO_0009683', # Puerperal disorder (child of OTAR_0000014)
'http://www.ebi.ac.uk/efo/EFO_0009682', # Pregnancy disorder (child of OTAR_0000014)
'http://www.ebi.ac.uk/efo/EFO_0010238', # Perinatal disease (child of OTAR_0000014)
'http://purl.obolibrary.org/obo/MONDO_0002025', # Psychiatric disorder
'http://www.ebi.ac.uk/efo/OTAR_0000017', # Reproductive system or breast disease
'http://www.ebi.ac.uk/efo/EFO_0000512', # Reproductive system disease (child of OTAR_0000017)
'http://www.ebi.ac.uk/efo/EFO_0009483', # Breast disease (child of OTAR_0000017)
'http://www.ebi.ac.uk/efo/OTAR_0000010', # Respiratory or thoracic disease
'http://www.ebi.ac.uk/efo/EFO_0000684', # Respiratory system disease (child of OTAR_0000010)
'http://www.ebi.ac.uk/efo/MONDO_0000651', # Thoracic disease (child of OTAR_0000010)
'http://www.ebi.ac.uk/efo/EFO_0009690', # Urinary system disease
}

FIELDS = ['query','term','label','source','quality','action']
2 changes: 1 addition & 1 deletion ontoma/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_omim_to_efo_mappings(url):
'''returns a dictionary that maps OMIM codes to EFO_uri
>>> d = get_omim_to_efo_mappings(URLS['OMIM_EFO_MAP'])
>>> d['609909']
[{'iri': 'http://www.orpha.net/ORDO/Orphanet_154', 'label': 'Familial isolated dilated cardiomyopathy'}, {'iri': 'http://www.orpha.net/ORDO/Orphanet_217607', 'label': 'Familial dilated cardiomyopathy'}]
[{'iri': 'http://www.orpha.net/ORDO/Orphanet_217607', 'label': 'Familial dilated cardiomyopathy'}, {'iri': 'http://www.orpha.net/ORDO/Orphanet_154', 'label': 'Familial isolated dilated cardiomyopathy'}]
'''
mappings = {}
logger.debug("OMIM to EFO mappings - requesting from URL %s", url)
Expand Down
21 changes: 5 additions & 16 deletions ontoma/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,6 @@ def make_uri(ontology_short_form):
"Short form: {} not recognized".format(ontology_code))


def largest_fragment(sentence):
return max(re.split("[,;]+",sentence),key=len).strip()


class OnToma(object):
'''Open Targets ontology mapping wrapper
Expand Down Expand Up @@ -159,7 +155,7 @@ class OnToma(object):
OMIM code lookup
>>> t.omim_lookup('230650')
'http://www.orpha.net/ORDO/Orphanet_354'
'http://www.orpha.net/ORDO/Orphanet_79257'
>>> t.zooma_lookup('asthma')
'http://www.ebi.ac.uk/efo/EFO_0000270'
Expand All @@ -168,11 +164,6 @@ class OnToma(object):
>>> t.mondo_lookup('asthma')
'http://purl.obolibrary.org/obo/MONDO_0004979'
Searching the ICD9 code for 'other dermatoses' returns EFO's skin disease:
>>> t.icd9_lookup('696')
'EFO:0000676'
There is also a semi-intelligent wrapper, which tries to guess the
best matching strategy:
Expand Down Expand Up @@ -295,6 +286,7 @@ def otzooma_map_lookup(self, name):

def icd9_lookup(self, icd9code):
'''Searches the ICD9CM <=> EFO mappings returned from the OXO API
#FIXME Results don't seem to be deterministic, some mappings appear and disappear between calls, e.g. t.icd9_lookup('696')
'''
return self._icd9_to_efo[icd9code]

Expand Down Expand Up @@ -348,11 +340,8 @@ def _is_included(self, iri, ontology=None):
we select for open targets
'''
if not ontology:
if 'HP_' in iri:
ontology = 'hp'
else:
# default to checking ancestry in EFO
ontology = 'efo'
# default to checking ancestry in EFO
ontology = 'efo'
try:
for ancestor in self._ols.get_ancestors(ontology, iri):
if ancestor['iri'] in OT_TOP_NODES:
Expand Down Expand Up @@ -421,7 +410,7 @@ def find_term(self, query, code=None, suggest=False, verbose=False):
'for %s in %s mappings. ', e, code)
return None
else:
found = self._find_term_from_string(largest_fragment(query), suggest)
found = self._find_term_from_string(query, suggest)
if found:
msg = 'Found {} for {} from {} - {} - {}'.format(
make_uri(found['term']),
Expand Down
6 changes: 3 additions & 3 deletions ontoma/ols.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class OlsClient:
>>> ols.besthit('hypogammaglobulinemia',ontology='efo')['label']
'Osteopetrosis - hypogammaglobulinemia'
>>> ols.besthit('hypogammaglobulinemia',ontology='efo',exact=True) is None
True
>>> ols.besthit('hypogammaglobulinemia',ontology='efo',exact=True)['label']
'Agammaglobulinemia'
>>> r = ols.search('asthma',ontology=['efo'],query_fields=['synonym'],field_list=['iri','label'])
>>> 'http://www.ebi.ac.uk/efo/EFO_0004591' in [syn['iri'] for syn in r]
Expand All @@ -98,7 +98,7 @@ class OlsClient:
'EFO_1001054'
>>> [x['short_form'] for x in ols.select('alzheimer')[:2]]
['NCIT_C2866', 'NCIT_C38778']
['PW_0000015', 'DOID_0080348']
You can also pass your favourite parameters at class instantiation:
Expand Down
17 changes: 1 addition & 16 deletions ontoma/oxo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,7 @@

class OxoClient:
''' OXO wrapper class
>>> oxo = OxoClient()
>>> len(oxo._sources)
940
>>> first_result = list(oxo.search(input_source="ICD9CM"))[:1][0]
>>> first_result['curie']
'ICD9CM:730.92'
>>> for r in oxo.search(ids=['ICD9CM:171.6'],input_source="ICD9CM"):
... print(r['label'])
Malignant neoplasm of connective and other soft tissue of pelvis
>>> icd9s = oxo.make_mappings(input_source="ICD9CM", distance=2)
>>> icd9s['733.0']
'EFO:0003882'
FIXME Results don't seem to be deterministic, some mappings appear and disappear between calls, e.g. icd9s = oxo.make_mappings(input_source="ICD9CM", distance=2); icd9s['733.09']
'''

def __init__(self, base_url=OXO.rstrip('/')):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_file_batch_input(rootdir):
result = runner.invoke(ontoma, [test_file,'-'])
assert result.exit_code == 0
assert 'http://www.ebi.ac.uk/efo/EFO_0000270' in result.output
assert 'http://www.orpha.net/ORDO/Orphanet_309842' in result.output
assert 'http://purl.obolibrary.org/obo/MONDO_0002279' in result.output

def test_batch_matching(rootdir):

Expand Down
11 changes: 7 additions & 4 deletions tests/test_ontoma.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ def test_find_term_excludes(ontclient):
assert not ontclient.find_term('breast')

def test_suggest_hp_term_not_excluded(ontclient):
assert ontclient.find_term('hypogammaglobulinemia') == 'http://purl.obolibrary.org/obo/HP_0004313'
assert ontclient.find_term('hypogammaglobulinemia') == 'http://www.orpha.net/ORDO/Orphanet_229720'

def test_catch_ordo(ontclient):
assert ontclient.find_term('Camptodactyly-arthropathy-coxa-vara-pericarditis syndrome') == 'http://www.orpha.net/ORDO/Orphanet_2848'
assert not ontclient.find_term('208250')
assert ontclient.find_term('208250',suggest=True) == 'http://www.orpha.net/ORDO/Orphanet_2848'
assert ontclient.find_term('Camptodactyly-arthropathy-coxa-vara-pericarditis syndrome') == 'http://www.ebi.ac.uk/efo/EFO_0009028'
assert ontclient.find_term('208250') == 'http://www.ebi.ac.uk/efo/EFO_0009028'
assert ontclient.find_term('208250',suggest=True) == 'http://www.ebi.ac.uk/efo/EFO_0009028'

def test_query_comma(ontclient):
assert ontclient.find_term('3-methylglutaconic aciduria, type III') == 'http://www.orpha.net/ORDO/Orphanet_67047'
5 changes: 2 additions & 3 deletions tests/test_phewascat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ def test_find_term_asthma(ontclient):
assert ontclient.find_term('asthma') == 'http://www.ebi.ac.uk/efo/EFO_0000270'

def test_efo_direct_match(ontclient):
assert ontclient.find_term('Dementias') == 'http://purl.obolibrary.org/obo/HP_0000726'
assert ontclient.find_term('Dementias') == 'http://www.ebi.ac.uk/efo/EFO_0004718'

def test_otzooma_mappings_whitespace(ontclient):
assert ontclient.find_term('Prostate cancer') == 'http://www.ebi.ac.uk/efo/EFO_0001663'
assert ontclient.find_term('Prostate cancer') == 'http://purl.obolibrary.org/obo/MONDO_0008315'

def test_efo_match_with_apostrophe(ontclient):
assert ontclient.find_term('Alzheimer\'s disease') == 'http://www.ebi.ac.uk/efo/EFO_0000249'
assert ontclient.find_term('290.1', code="ICD9CM") == 'http://www.ebi.ac.uk/efo/EFO_0000249'

def test_match_with_hpo(ontclient):
assert ontclient.find_term('Jaundice') == 'http://purl.obolibrary.org/obo/HP_0000952'
Expand Down

0 comments on commit 3376dd2

Please sign in to comment.