Skip to content

Commit

Permalink
[r] Support for AnVIL duos_id (#6620)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Dec 20, 2024
1 parent e248a84 commit ab479b3
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 10 deletions.
5 changes: 5 additions & 0 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'registered_identifier',
'title',
'data_modality',
'duos_id',
]
},
'donors': {
Expand Down Expand Up @@ -351,6 +352,10 @@ def verbatim_pfb_schema(self,
is_polymorphic=is_duos_type)
]
if is_duos_type:
field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name,
column_name='duos_id',
anvil_datatype='string',
is_polymorphic=True))
field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name,
column_name='description',
anvil_datatype='string',
Expand Down
1 change: 1 addition & 0 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ def _duos_types(cls) -> FieldTypes:
return {
'document_id': null_str,
'description': null_str,
'duos_id': null_str,
}

def _duos(self, dataset: EntityReference) -> MutableJSON:
Expand Down
1 change: 1 addition & 0 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def _non_pivotal_fields_by_entity_type(self) -> dict[str, set[str]]:
},
'datasets': {
'dataset_id',
'duos_id',
'title'
},
'diagnoses': {
Expand Down
5 changes: 3 additions & 2 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,16 @@ def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBund

def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
assert not bundle_fqid.is_batched, bundle_fqid
duos_info = self.tdr.get_duos(bundle_fqid.source)
duos_id, duos_info = self.tdr.get_duos(bundle_fqid.source)
description = None if duos_info is None else duos_info.get('studyDescription')
ref, row = self._get_dataset(bundle_fqid.source.spec)
expected_entity_id = change_version(bundle_fqid.uuid,
self.bundle_uuid_version,
self.datarepo_row_uuid_version)
assert ref.entity_id == expected_entity_id, (ref, bundle_fqid)
bundle = TDRAnvilBundle(fqid=bundle_fqid)
bundle.add_entity(ref, self._version, {'description': description})
entity_row = {'duos_id': duos_id, 'description': description}
bundle.add_entity(ref, self._version, entity_row)
# Classify as orphan to suppress the emission of a contribution
bundle.add_entity(ref, self._version, dict(row), is_orphan=True)
return bundle
Expand Down
14 changes: 10 additions & 4 deletions src/azul/terra.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,19 +646,25 @@ def for_registered_user(cls, authentication: OAuth2) -> 'TDRClient':
def drs_client(self) -> DRSClient:
return DRSClient(http_client=self._http_client)

def get_duos(self, source: TDRSourceRef) -> Optional[MutableJSON]:
def get_duos(self,
source: TDRSourceRef
) -> tuple[str, MutableJSON] | tuple[None, None]:
response = self._retrieve_source(source)
try:
duos_id = response['duosFirecloudGroup']['duosId']
except (KeyError, TypeError):
log.warning('No DUOS ID available for %r', source.spec)
return None
return None, None
else:
url = self._duos_endpoint('dataset', 'registration', duos_id)
response = self._request('GET', url)
if response.status == 404:
log.warning('No DUOS dataset registration with ID %r from %r',
duos_id, source.spec)
return None
return None, None
else:
return self._check_response(url, response)
response = self._check_response(url, response)
consent_group = one(response['consentGroups'])
require(duos_id == consent_group['datasetIdentifier'],
'Mismatched identifiers', duos_id, consent_group)
return duos_id, response

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions test/indexer/test_anvil.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def setUpClass(cls) -> None:

mock_duos_url = furl('https:://mock_duos.lan')

duos_id = 'foo'
duos_id = 'DUOS-000000'
duos_description = 'Study description from DUOS'

@classmethod
Expand All @@ -93,6 +93,9 @@ def _patch_duos(cls) -> None:
}
})),
Mock(spec=HTTPResponse, status=200, data=json.dumps({
'consentGroups': [{
'datasetIdentifier': cls.duos_id
}],
'studyDescription': cls.duos_description
}))
]))
Expand Down Expand Up @@ -251,8 +254,9 @@ def test_dataset_description(self):
# These fields are populated only in the primary bundle
self.assertEqual(dataset_ref.entity_id, contents['document_id'])
self.assertEqual(['phs000693'], contents['registered_identifier'])
# This field is populated only in the DUOS bundle
# These fields are populated only in the DUOS bundle
self.assertEqual('Study description from DUOS', contents['description'])
self.assertEqual('DUOS-000000', contents['duos_id'])
else:
self.fail(qualifier)
self.assertDictEqual(doc_counts, {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
"datarepo_row_id": null,
"dataset_id": null,
"description": "Study description from DUOS",
"duos_id": "DUOS-000000",
"owner": null,
"principal_investigator": null,
"registered_identifier": null,
Expand Down Expand Up @@ -282,6 +283,7 @@
"datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf",
"dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739",
"description": null,
"duos_id": null,
"owner": [
"Debbie Nickerson"
],
Expand Down
8 changes: 8 additions & 0 deletions test/service/data/manifest/verbatim/pfb/anvil/pfb_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,14 @@
"string"
]
},
{
"name": "duos_id",
"namespace": "anvil_dataset",
"type": [
"null",
"string"
]
},
{
"name": "owner",
"namespace": "anvil_dataset",
Expand Down
6 changes: 6 additions & 0 deletions test/service/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1845,6 +1845,12 @@ def test_compact_manifest(self):
'',
''
),
(
'datasets.duos_id',
'',
'',
'',
),
(
'donors.document_id',
'',
Expand Down
5 changes: 3 additions & 2 deletions test/service/test_response_anvil.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ def test_entity_indices(self):
}
],
'datasets': [
{}
{'duos_id': ['DUOS-000000']}
],
'diagnoses': [],
'donors': [],
Expand Down Expand Up @@ -1220,7 +1220,8 @@ def test_entity_indices(self):
None
],
'accessible': True,
'description': 'Study description from DUOS'
'description': 'Study description from DUOS',
'duos_id': 'DUOS-000000'
}
],
'diagnoses': [
Expand Down

0 comments on commit ab479b3

Please sign in to comment.