Skip to content

Commit

Permalink
Merge pull request #180 from monarch-initiative/d2g-exclusions
Browse files Browse the repository at this point in the history
Disease-Gene exclusions
  • Loading branch information
joeflack4 authored Dec 15, 2024
2 parents dfb9cb0 + 1be4a68 commit 20f7580
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 19 deletions.
8 changes: 8 additions & 0 deletions data/exclusions-disease-gene.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
omim_id mondo_id mondo_label orcid exclusion_reason_comment
OMIM:603956 MONDO:0002974 cervical cancer' https://orcid.org/0000-0002-4142-7153 evidence of various genes involved
OMIM:619151 MONDO:0030894 "AMED syndrome, digenic'" https://orcid.org/0000-0002-4142-7153 digenic
OMIM:158901 MONDO:0008031 https://orcid.org/0000-0002-4142-7153 digenic
OMIM:108770 MONDO:0007171 atrial standstill 1' https://orcid.org/0000-0002-4142-7153 digenic
OMIM:620040 MONDO:0031057 "dyskeratosis congenita, digenic'" https://orcid.org/0000-0002-4142-7153 digenic
OMIM:619478 MONDO:0030355 "facioscapulohumeral muscular dystrophy 4, digenic'" https://orcid.org/0000-0002-4142-7153 digenic
OMIM:300818 MONDO:0010438 paroxysmal nocturnal hemoglobinuria 1 https://orcid.org/0000-0002-4142-7153 "disease caused by a somatic mutation, therefore a gene association stating this is due to a germline mutation should not be added"
1 change: 1 addition & 0 deletions omim2obo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DATA_DIR = ROOT_DIR / 'data'
ENV_PATH = ROOT_DIR / '.env'
REVIEW_CASES_PATH = ROOT_DIR / 'review.tsv'
DISEASE_GENE_EXCLUSIONS_PATH = DATA_DIR / 'exclusions-disease-gene.tsv'

with open(DATA_DIR / 'dipper/GLOBAL_TERMS.yaml') as file:
GLOBAL_TERMS = yaml.safe_load(file)
Expand Down
47 changes: 29 additions & 18 deletions omim2obo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
Assumptions
1. Mappings obtained from official OMIM files as described above are interpreted correctly (e.g. skos:exactMatch).
"""
from typing import Set
from typing import Optional, Set

import yaml
from hashlib import md5
Expand All @@ -64,7 +64,7 @@
from omim2obo.parsers.omim_entry_parser import REVIEW_CASES, cleanup_title, get_alt_and_included_titles_and_symbols, \
get_pubs, get_mapped_ids, log_review_cases, recapitalize_acronyms_in_titles
from omim2obo.parsers.omim_txt_parser import * # todo: change to specific imports

from omim2obo.utils.utils import get_d2g_exclusions_by_curator

# Vars
OUTPATH = os.path.join(ROOT_DIR / 'omim.ttl')
Expand Down Expand Up @@ -123,21 +123,22 @@ def add_subclassof_restriction(graph: Graph, predicate: URIRef, some_values_from
return b


def add_subclassof_restriction_with_evidence(
graph: Graph, predicate: URIRef, some_values_from: URIRef, on: URIRef, evidence: Union[str, Literal]
def add_subclassof_restriction_with_evidence_and_source(
graph: Graph, predicate: URIRef, some_values_from: URIRef, on: URIRef, evidence: Union[str, Literal],
source: Optional[URIRef] = None,
):
"""Creates a subClassOf someValuesFrom restriction, and adds an evidence axiom to it."""
evidence = Literal(evidence) if type(evidence) is str else evidence
# Add restriction on MIM class
b: BNode = add_subclassof_restriction(graph, predicate, some_values_from, on)
# Add axiom to restriction
b2 = BNode()
graph.add((b2, RDF['type'], OWL['Axiom']))
graph.add((b2, OWL['annotatedSource'], on))
graph.add((b2, OWL['annotatedProperty'], RDFS['subClassOf']))
graph.add((b2, OWL['annotatedTarget'], b))
graph.add((b2, BIOLINK['has_evidence'], evidence))
graph.add((b2, RDFS['comment'], evidence))
annotation_pred_vals = [
(BIOLINK['has_evidence'], evidence),
(RDFS['comment'], evidence)
]
annotation_pred_vals += [(oboInOwl.source, source)] if source else []

add_axiom_annotations(graph, on, RDFS['subClassOf'], b, annotation_pred_vals)


# Classes
Expand Down Expand Up @@ -200,6 +201,7 @@ def omim2obo(use_cache: bool = False):
# - Non-OMIM triples
graph.add((URIRef('http://purl.obolibrary.org/obo/mondo/omim.owl'), RDF.type, OWL.Ontology))
graph.add((URIRef(oboInOwl.hasSynonymType), RDF.type, OWL.AnnotationProperty))
graph.add((URIRef(oboInOwl.source), RDF.type, OWL.AnnotationProperty))
graph.add((URIRef(MONDONS.omim_included), RDF.type, OWL.AnnotationProperty))
graph.add((URIRef(OMO['0003000']), RDF.type, OWL.AnnotationProperty))
graph.add((BIOLINK['has_evidence'], RDF.type, OWL.AnnotationProperty))
Expand Down Expand Up @@ -362,11 +364,13 @@ def omim2obo(use_cache: bool = False):
'gene_id': gene_mim, 'phenotype_label': p_lab, 'mapping_key': p_map_key, 'mapping_label': p_map_lab})

# - Add relations (subclass restrictions)
exclusions_p_mim_orcid_map = get_d2g_exclusions_by_curator()
for p_mim, assocs in phenotype_genes.items():
for assoc in assocs:
gene_mim, p_lab, p_map_key, p_map_lab = assoc['gene_id'], assoc['phenotype_label'], \
assoc['mapping_key'], assoc['mapping_label']
evidence = f'Evidence: ({p_map_key}) {p_map_lab}'
p_mim_excluded = p_mim in exclusions_p_mim_orcid_map

# Skip: No phenotype or unknown defect
# - not p_mim: Skip because not an association to another MIM (Provenance:
Expand All @@ -376,26 +380,33 @@ def omim2obo(use_cache: bool = False):
if not p_mim or p_map_key == '1':
continue

# Add restrictions: Gene->Disease non-causal (disease-defining) relationships
# Add restrictions: Gene->Disease non-causal / non-disease-defining relationships
# - RO:0003302 docs: see MORBIDMAP_PHENOTYPE_MAPPING_KEY_PREDICATES
if p_map_key != '3': # 3 = 'causal' (disease-defining). Handled separately below.
g2d_pred = MORBIDMAP_PHENOTYPE_MAPPING_KEY_PREDICATES[p_map_key] if len(assocs) == 1 else RO['0003302']
add_subclassof_restriction_with_evidence(graph, g2d_pred, OMIM[p_mim], OMIM[gene_mim], evidence)
# - Mapping key 3 = 'causal' (disease-defining). Handled separately below.
if p_map_key != '3' or p_mim_excluded:
g2d_pred = MORBIDMAP_PHENOTYPE_MAPPING_KEY_PREDICATES[p_map_key] \
if len(assocs) == 1 and not p_mim_excluded \
else RO['0003302']
orcid: Optional[URIRef] = exclusions_p_mim_orcid_map[p_mim] if p_mim_excluded else None
add_subclassof_restriction_with_evidence_and_source(
graph, g2d_pred, OMIM[p_mim], OMIM[gene_mim], evidence, orcid)
continue

# Skip non-causal (disease-defining) cases
if len(assocs) > 1 or p_map_key != '3' or not p2g_is_definitive(p_lab):
if len(assocs) > 1 or not p2g_is_definitive(p_lab): # or cases above: (p_map_key != '3') & p_mim_excluded
continue

# Log review.tsv cases
log_review_cases(p_mim, p_lab, p_map_key, gene_mim, gene_phenotypes, omim_types)

# Add restrictions: Disease-defining ('causal germline mutation')
# - Disease --(RO:0004003 'has material basis in germline mutation in')--> Gene
# https://www.ebi.ac.uk/ols4/ontologies/ro/properties?iri=http://purl.obolibrary.org/obo/RO_0004003
add_subclassof_restriction_with_evidence(
add_subclassof_restriction_with_evidence_and_source(
graph, RO['0004003'], OMIM[gene_mim], OMIM[p_mim], evidence)
# - Gene --(RO:0004013 'is causal germline mutation in')--> Disease
# https://www.ebi.ac.uk/ols4/ontologies/ro/properties?iri=http://purl.obolibrary.org/obo/RO_0004013
add_subclassof_restriction_with_evidence(
add_subclassof_restriction_with_evidence_and_source(
graph, RO['0004013'], OMIM[p_mim], OMIM[gene_mim], evidence)

# PUBMED, UMLS
Expand Down
1 change: 1 addition & 0 deletions omim2obo/namespaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
# publication/citation/reference sources
DOI = Namespace('http://dx.doi.org/') # Digital Object identifier
GENEREVIEWS = Namespace('http://www.ncbi.nlm.nih.gov/books/') # NCBI gene and diseases
ORCID = Namespace('https://orcid.org/') # Open Researcher and Contributor ID
# more bogus IRIs
ISBN = Namespace('https://monarchinitiative.org/ISBN_') # International Standard Book Number
ISBN_10 = Namespace('https://monarchinitiative.org/ISBN10_') # Same as ISBN has 10 digits pre 2007
Expand Down
18 changes: 17 additions & 1 deletion omim2obo/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
"""Misc utilities"""
from typing import List, Union
from typing import Dict, List, Optional, Union

import pandas as pd

from omim2obo.config import DISEASE_GENE_EXCLUSIONS_PATH
from omim2obo.namespaces import ORCID


# todo: also in mondo-ingest. Refactor into mondolib: https://github.com/monarch-initiative/mondolib/issues/13
Expand All @@ -14,3 +19,14 @@ def remove_angle_brackets(uris: Union[str, List[str]]) -> Union[str, List[str]]:
x = x[:-1] if x.endswith('>') else x
uris2.append(x)
return uris2[0] if str_input else uris2


def get_d2g_exclusions_by_curator(path=DISEASE_GENE_EXCLUSIONS_PATH) -> Dict[str, Optional[str]]:
"""Get disease-gene exclusions
:return: Dict[str, str]: Phenotype MIM as keys, ORCID of curator as values
"""
df = pd.read_csv(path, sep='\t').fillna('')
df['phenotype_mim'] = df['omim_id'].apply(lambda x: x.split(':')[1])
phenotype_mim_orcid_map = {x['phenotype_mim']: x['orcid'] for x in df.to_dict(orient='records')}
return {k: ORCID[v] if v else None for k, v in phenotype_mim_orcid_map.items()}
1 change: 1 addition & 0 deletions sparql/disease-gene-relationships.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ WHERE {

FILTER(
?PredUri IN (
<http://purl.obolibrary.org/obo/RO_0003302>,
<http://purl.obolibrary.org/obo/RO_0003303>,
<http://purl.obolibrary.org/obo/RO_0003304>,
<http://purl.obolibrary.org/obo/RO_0004013>,
Expand Down

0 comments on commit 20f7580

Please sign in to comment.