From c6a49161d2e62efd479835bd3bbca6cbc2be15ef Mon Sep 17 00:00:00 2001 From: uermel Date: Mon, 4 Nov 2024 11:56:10 -0800 Subject: [PATCH 1/5] expand tomogram hash --- ingestion_tools/scripts/importers/db/tomogram.py | 8 +++++++- ingestion_tools/scripts/importers/tomogram.py | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ingestion_tools/scripts/importers/db/tomogram.py b/ingestion_tools/scripts/importers/db/tomogram.py index 0396ee9ca..609bf101d 100644 --- a/ingestion_tools/scripts/importers/db/tomogram.py +++ b/ingestion_tools/scripts/importers/db/tomogram.py @@ -39,7 +39,13 @@ def get_data_map(self) -> dict[str, Any]: @classmethod def get_id_fields(cls) -> list[str]: - return ["tomogram_voxel_spacing_id", "deposition_id", "processing", "reconstruction_method"] + return [ + "tomogram_voxel_spacing_id", + "deposition_id", + "processing", + "processing_software", + "reconstruction_method", + ] @classmethod def get_db_model_class(cls) -> type[BaseModel]: diff --git a/ingestion_tools/scripts/importers/tomogram.py b/ingestion_tools/scripts/importers/tomogram.py index 006b3882e..5b98304e8 100644 --- a/ingestion_tools/scripts/importers/tomogram.py +++ b/ingestion_tools/scripts/importers/tomogram.py @@ -46,6 +46,7 @@ def _generate_hash_key( metadata.get("alignment_metadata_path", kwargs.get("alignment_metadata_path", "")), metadata.get("reconstruction_method", ""), metadata.get("processing", ""), + metadata.get("processing_software", ""), str(metadata.get("deposition_id", int(parents["deposition"].name))), ], ) @@ -79,7 +80,9 @@ def __init__( allow_imports=allow_imports, ) - self.alignment_metadata_path = config.to_formatted_path(alignment_metadata_path or self.get_alignment_metadata_path()) + self.alignment_metadata_path = config.to_formatted_path( + alignment_metadata_path or self.get_alignment_metadata_path(), + ) self.identifier = TomogramIdentifierHelper.get_identifier( config, self.get_base_metadata(), From 2f79ff0254bfcfcbae158cdf192446802ba35c05 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 4 Nov 2024 13:20:27 -0800 Subject: [PATCH 2/5] Fix tests --- ingestion_tools/scripts/tests/db_import/populate_db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ingestion_tools/scripts/tests/db_import/populate_db.py b/ingestion_tools/scripts/tests/db_import/populate_db.py index 7c8a91220..e44892c10 100644 --- a/ingestion_tools/scripts/tests/db_import/populate_db.py +++ b/ingestion_tools/scripts/tests/db_import/populate_db.py @@ -231,6 +231,7 @@ def populate_tomograms() -> None: scale1_dimensions="", scale2_dimensions="", processing="raw", + processing_software="tomo3D", deposition_id=DEPOSITION_ID1, offset_x=0, offset_y=0, From 50c6b02ec2c69bb82144b379366becd8236aa160 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 4 Nov 2024 13:20:46 -0800 Subject: [PATCH 3/5] Update api v2 --- apiv2/db_import/importers/tomogram.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/apiv2/db_import/importers/tomogram.py b/apiv2/db_import/importers/tomogram.py index 22b67227e..179192964 100644 --- a/apiv2/db_import/importers/tomogram.py +++ b/apiv2/db_import/importers/tomogram.py @@ -9,9 +9,15 @@ class TomogramItem(ItemDBImporter): - # TODO - add the alignment_id field once that data is added the first time. - # id_fields = ["run_id", "tomogram_voxel_spacing_id", "deposition_id", "alignment_id", "processing", "reconstruction_method""] - id_fields = ["run_id", "tomogram_voxel_spacing_id", "deposition_id", "processing", "reconstruction_method"] + id_fields = [ + "alignment_id", + "deposition_id", + "processing", + "processing_software", + "reconstruction_method", + "run_id", + "tomogram_voxel_spacing_id", + ] model_class = models.Tomogram direct_mapped_fields = { "name": ["run_name"], @@ -101,8 +107,6 @@ def get_finder_args(self) -> dict[str, Any]: class TomogramAuthorItem(ItemDBImporter): - # TODO - add the alignment_id field once that data is added the first time. - # id_fields = ["run_id", "tomogram_voxel_spacing_id", "deposition_id", "alignment_id", "processing", "reconstruction_method""] id_fields = ["tomogram_id", "name"] model_class = models.TomogramAuthor direct_mapped_fields = { From d597221f330ed46879b73265bc54235dec93b3c1 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 4 Nov 2024 14:15:47 -0800 Subject: [PATCH 4/5] Update api v2 --- apiv2/db_import/importers/tomogram.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apiv2/db_import/importers/tomogram.py b/apiv2/db_import/importers/tomogram.py index 179192964..d685e4ae4 100644 --- a/apiv2/db_import/importers/tomogram.py +++ b/apiv2/db_import/importers/tomogram.py @@ -9,8 +9,9 @@ class TomogramItem(ItemDBImporter): + # TODO - add the alignment_id field once that data is added the first time. id_fields = [ - "alignment_id", + # "alignment_id", "deposition_id", "processing", "processing_software", From 48659b9d6babe8bb9c14124c34d2b96ef86ec0e4 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Mon, 4 Nov 2024 14:39:51 -0800 Subject: [PATCH 5/5] Update api v2 test --- apiv2/db_import/tests/populate_db.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apiv2/db_import/tests/populate_db.py b/apiv2/db_import/tests/populate_db.py index 862977df0..b5f23afcd 100644 --- a/apiv2/db_import/tests/populate_db.py +++ b/apiv2/db_import/tests/populate_db.py @@ -294,6 +294,7 @@ def populate_tomograms(session: sa.orm.Session) -> Tomogram: scale1_dimensions="", scale2_dimensions="", processing="raw", + processing_software="tomo3D", offset_x=0, offset_y=0, offset_z=0, @@ -440,7 +441,9 @@ def populate_annotations(session: sa.orm.Session) -> Annotation: id=ANNOTATION_ID, run_id=RUN1_ID, deposition_id=DEPOSITION_ID2, - s3_metadata_path="s3://test-public-bucket/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json", + s3_metadata_path=( + "s3://test-public-bucket/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json" + ), https_metadata_path="foo", deposition_date="2025-04-01", release_date="2025-06-01",