Skip to content

Commit

Permalink
feat: update development stage ontology term ID validation rules (#1009)
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria authored Sep 12, 2024
1 parent 8052c80 commit efb4ce5
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -290,22 +290,30 @@ components:
rule: "organism_ontology_term_id == 'NCBITaxon:9606'"
error_message_suffix: >-
When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens),
'development_stage_ontology_term_id' MUST be a term id of 'HsapDv' or unknown.
'development_stage_ontology_term_id' MUST be the most accurate descendant of 'HsapDv:0000001' or unknown.
type: curie
curie_constraints:
ontologies:
- HsapDv
allowed:
ancestors:
HsapDv:
- HsapDv:0000001
exceptions:
- unknown
- # If organism is Mouse
rule: "organism_ontology_term_id == 'NCBITaxon:10090'"
error_message_suffix: >-
When 'organism_ontology_term_id' is 'NCBITaxon:10090' (Mus musculus),
'development_stage_ontology_term_id' MUST be a term id of 'MmusDv' or unknown.
'development_stage_ontology_term_id' MUST be the most accurate descendant of 'MmusDv:0000001' or unknown.
type: curie
curie_constraints:
ontologies:
- MmusDv
allowed:
ancestors:
MmusDv:
- MmusDv:0000001
exceptions:
- unknown
# If organism is not humnan nor mouse
Expand Down
68 changes: 52 additions & 16 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,42 +566,78 @@ def test_cell_type_ontology_term_id(self, validator_with_adata, term):
f"ERROR: '{term}' in 'cell_type_ontology_term_id' is not allowed."
] or validator.errors == [f"ERROR: '{term}' in 'cell_type_ontology_term_id' is a deprecated term id of 'CL'."]

def test_development_stage_ontology_term_id_human(self, validator_with_adata):
@pytest.mark.parametrize(
"development_stage_ontology_term_id,error",
[
(
"CL:000001",
"ERROR: 'CL:000001' in 'development_stage_ontology_term_id' is not a valid ontology term id of 'HsapDv'.",
),
(
"HsapDv:0000000",
"ERROR: 'HsapDv:0000000' in 'development_stage_ontology_term_id' is not an allowed term id.",
),
(
"HsapDv:0000001",
"ERROR: 'HsapDv:0000001' in 'development_stage_ontology_term_id' is not an allowed term id.",
),
],
)
def test_development_stage_ontology_term_id_human(
self, validator_with_adata, development_stage_ontology_term_id, error
):
"""
development_stage_ontology_term_id categorical with str categories. If unavailable, this MUST be "unknown".
If organism_ontolology_term_id is "NCBITaxon:9606" for Homo sapiens,
this MUST be the most accurate HsapDv term.
this MUST be the most accurate HsapDv:0000001 descendant.
"""
validator = validator_with_adata
obs = validator.adata.obs
obs.loc[obs.index[0], "organism_ontology_term_id"] = "NCBITaxon:9606"
obs.loc[obs.index[0], "development_stage_ontology_term_id"] = "EFO:0000001"
obs.loc[obs.index[0], "development_stage_ontology_term_id"] = development_stage_ontology_term_id
validator.validate_adata()
assert validator.errors == [
"ERROR: 'EFO:0000001' in 'development_stage_ontology_term_id' is "
"not a valid ontology term id of 'HsapDv'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' "
"(Homo sapiens), 'development_stage_ontology_term_id' MUST be a term id of 'HsapDv' or unknown."
]
error_message_suffix = validator.schema_def["components"]["obs"]["columns"][
"development_stage_ontology_term_id"
]["dependencies"][0]["error_message_suffix"]
assert validator.errors == [self.get_format_error_message(error_message_suffix, error)]

def test_development_stage_ontology_term_id_mouse(self, validator_with_adata):
@pytest.mark.parametrize(
"development_stage_ontology_term_id,error",
[
(
"CL:000001",
"ERROR: 'CL:000001' in 'development_stage_ontology_term_id' is not a valid ontology term id of 'MmusDv'.",
),
(
"MmusDv:0000000",
"ERROR: 'MmusDv:0000000' in 'development_stage_ontology_term_id' is not an allowed term id.",
),
(
"MmusDv:0000001",
"ERROR: 'MmusDv:0000001' in 'development_stage_ontology_term_id' is not an allowed term id.",
),
],
)
def test_development_stage_ontology_term_id_mouse(
self, validator_with_adata, development_stage_ontology_term_id, error
):
"""
If organism_ontolology_term_id is "NCBITaxon:10090" for Mus musculus,
this MUST be the most accurate MmusDv term
this MUST be the most accurate MmusDv:0000001 descendant.
"""
validator = validator_with_adata
obs = validator.adata.obs
obs.loc[obs.index[0], "organism_ontology_term_id"] = "NCBITaxon:10090"
obs.loc[obs.index[0], "development_stage_ontology_term_id"] = "EFO:0000001"
obs.loc[obs.index[0], "development_stage_ontology_term_id"] = development_stage_ontology_term_id
obs.loc[
obs.index[0],
"self_reported_ethnicity_ontology_term_id",
] = "na"
validator.validate_adata()
assert validator.errors == [
"ERROR: 'EFO:0000001' in 'development_stage_ontology_term_id' is "
"not a valid ontology term id of 'MmusDv'. When 'organism_ontology_term_id' is 'NCBITaxon:10090' "
"(Mus musculus), 'development_stage_ontology_term_id' MUST be a term id of 'MmusDv' or unknown."
]
error_message_suffix = validator.schema_def["components"]["obs"]["columns"][
"development_stage_ontology_term_id"
]["dependencies"][1]["error_message_suffix"]
assert validator.errors == [self.get_format_error_message(error_message_suffix, error)]

def test_development_stage_ontology_term_id_all_species(self, validator_with_adata):
"""
Expand Down

0 comments on commit efb4ce5

Please sign in to comment.