From 3acd295dfdd385b9a45b0ef9f5f5e2facd96adb4 Mon Sep 17 00:00:00 2001 From: Daniel Sotirhos Date: Wed, 30 Oct 2024 11:03:18 -0700 Subject: [PATCH] [r] Support for AnVIL duos_id (#6620) --- src/azul/plugins/metadata/anvil/__init__.py | 5 +++++ src/azul/plugins/metadata/anvil/indexer/transform.py | 1 + src/azul/plugins/metadata/anvil/service/response.py | 1 + src/azul/plugins/repository/tdr_anvil/__init__.py | 3 ++- .../2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json | 1 + test/indexer/test_anvil.py | 8 ++++++-- .../data/manifest/verbatim/pfb/anvil/pfb_entities.json | 2 ++ .../data/manifest/verbatim/pfb/anvil/pfb_schema.json | 8 ++++++++ test/service/test_manifest.py | 6 ++++++ 9 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py index ab4c55441c..35be1410e3 100644 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ b/src/azul/plugins/metadata/anvil/__init__.py @@ -156,6 +156,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping: 'registered_identifier', 'title', 'data_modality', + 'duos_id', ] }, 'donors': { @@ -351,6 +352,10 @@ def verbatim_pfb_schema(self, is_polymorphic=is_duos_type) ] if is_duos_type: + field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name, + column_name='duos_id', + anvil_datatype='string', + is_polymorphic=True)) field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name, column_name='description', anvil_datatype='string', diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py index c5bc430299..df8b958935 100644 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ b/src/azul/plugins/metadata/anvil/indexer/transform.py @@ -498,6 +498,7 @@ def _duos_types(cls) -> FieldTypes: return { 'document_id': null_str, 'description': null_str, + 'duos_id': null_str, } def _duos(self, dataset: EntityReference) -> MutableJSON: diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py index 8d0be3c129..6175bd6473 100644 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ b/src/azul/plugins/metadata/anvil/service/response.py @@ -210,6 +210,7 @@ def _non_pivotal_fields_by_entity_type(self) -> dict[str, set[str]]: }, 'datasets': { 'dataset_id', + 'duos_id', 'title' }, 'diagnoses': { diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 031b2264f3..ce27a3bf30 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -490,7 +490,8 @@ def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: self.datarepo_row_uuid_version) assert ref.entity_id == expected_entity_id, (ref, bundle_fqid) bundle = TDRAnvilBundle(fqid=bundle_fqid) - bundle.add_entity(ref, self._version, {'description': description}) + entity_row = {'duos_id': duos_id, 'description': description} + bundle.add_entity(ref, self._version, entity_row) # Classify as orphan to suppress the emission of a contribution bundle.add_entity(ref, self._version, dict(row), is_orphan=True) return bundle diff --git a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json index 9859b200f1..b029436606 100644 --- a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json +++ b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json @@ -2,6 +2,7 @@ "entities": { "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf": { "description": "Study description from DUOS", + "duos_id": "DUOS-000000", "version": "2022-06-01T00:00:00.000000Z" } }, diff --git a/test/indexer/test_anvil.py b/test/indexer/test_anvil.py index 1cbc52ba0e..e6f0114dd0 100644 --- a/test/indexer/test_anvil.py +++ b/test/indexer/test_anvil.py @@ -75,7 +75,7 @@ def setUpClass(cls) -> None: mock_duos_url = furl('https:://mock_duos.lan') - duos_id = 'foo' + duos_id = 'DUOS-000000' duos_description = 'Study description from DUOS' @classmethod @@ -93,6 +93,9 @@ def _patch_duos(cls) -> None: } })), Mock(spec=HTTPResponse, status=200, data=json.dumps({ + 'consentGroups': [{ + 'datasetIdentifier': cls.duos_id + }], 'studyDescription': cls.duos_description })) ])) @@ -251,8 +254,9 @@ def test_dataset_description(self): # These fields are populated only in the primary bundle self.assertEqual(dataset_ref.entity_id, contents['document_id']) self.assertEqual(['phs000693'], contents['registered_identifier']) - # This field is populated only in the DUOS bundle + # These fields are populated only in the DUOS bundle self.assertEqual('Study description from DUOS', contents['description']) + self.assertEqual('DUOS-000000', contents['duos_id']) else: self.fail(qualifier) self.assertDictEqual(doc_counts, { diff --git a/test/service/data/manifest/verbatim/pfb/anvil/pfb_entities.json b/test/service/data/manifest/verbatim/pfb/anvil/pfb_entities.json index 8cb9a00eda..145153fcb3 100644 --- a/test/service/data/manifest/verbatim/pfb/anvil/pfb_entities.json +++ b/test/service/data/manifest/verbatim/pfb/anvil/pfb_entities.json @@ -110,6 +110,7 @@ "datarepo_row_id": null, "dataset_id": null, "description": "Study description from DUOS", + "duos_id": "DUOS-000000", "owner": null, "principal_investigator": null, "registered_identifier": null, @@ -282,6 +283,7 @@ "datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf", "dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739", "description": null, + "duos_id": null, "owner": [ "Debbie Nickerson" ], diff --git a/test/service/data/manifest/verbatim/pfb/anvil/pfb_schema.json b/test/service/data/manifest/verbatim/pfb/anvil/pfb_schema.json index 9bdd6fcf66..1f0d38f6f7 100644 --- a/test/service/data/manifest/verbatim/pfb/anvil/pfb_schema.json +++ b/test/service/data/manifest/verbatim/pfb/anvil/pfb_schema.json @@ -560,6 +560,14 @@ "string" ] }, + { + "name": "duos_id", + "namespace": "anvil_dataset", + "type": [ + "null", + "string" + ] + }, { "name": "owner", "namespace": "anvil_dataset", diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index 6eb1e52e0a..60777aea0a 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -1833,6 +1833,12 @@ def test_compact_manifest(self): '', '' ), + ( + 'datasets.duos_id', + '', + '', + '', + ), ( 'donors.document_id', '',