From 8866566582182af8302d59dae8b4e947424c7a1f Mon Sep 17 00:00:00 2001 From: Guilherme de Freitas Date: Fri, 9 Feb 2024 16:33:00 +0000 Subject: [PATCH] Release v1.3.0 --- CHANGELOG.rst | 9 ++ pyproject.toml | 2 +- src/pato/crud/groups.py | 49 +++++--- src/pato/crud/sessions.py | 124 +++++++++++++++++++-- src/pato/models/parameters.py | 19 ++-- src/pato/models/response.py | 36 +++--- src/pato/routes/groups.py | 9 +- src/pato/routes/proposals.py | 18 ++- tests/collections/test_collection.py | 10 ++ tests/collections/test_spa_reprocessing.py | 7 +- tests/parameters/test_spa.py | 8 +- tests/sessions/test_create_dc.py | 57 ++++++++++ 12 files changed, 283 insertions(+), 65 deletions(-) create mode 100644 tests/sessions/test_create_dc.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 02791da..cb1e97d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,15 @@ Changelog ========== ++++++++++ +v1.3.0 (07/02/2024) ++++++++++ + +**Added** + +- Data collection creation endpoint (:code:`/proposals/{propId}/sessions/{sessionId}/dataCollections`) +- :code:`sortBy` argument to data collection listing endpoint + +++++++++ v1.2.3 (05/02/2024) +++++++++ diff --git a/pyproject.toml b/pyproject.toml index f852493..745de02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "mysql-connector-python~=8.2.0", "pydantic~=2.5.3", "types-requests", - "lims-utils~=0.1.1" + "lims-utils~=0.1.2" ] dynamic = ["version"] license.file = "LICENSE" diff --git a/src/pato/crud/groups.py b/src/pato/crud/groups.py index 4fb6e41..00c76c4 100644 --- a/src/pato/crud/groups.py +++ b/src/pato/crud/groups.py @@ -13,9 +13,11 @@ from sqlalchemy import select from ..auth import User +from ..models.parameters import DataCollectionSortTypes from ..models.response import DataCollectionGroupSummaryResponse, DataCollectionSummary from ..utils.auth import check_session from ..utils.database import db, paginate, unravel +from ..utils.generic import parse_proposal def get_collection_groups( @@ -26,6 +28,7 @@ def get_collection_groups( search: Optional[str], user: User, ) -> Paged[DataCollectionGroupSummaryResponse]: + query = ( select( *unravel(DataCollectionGroup), @@ -40,14 +43,18 @@ def get_collection_groups( .group_by(DataCollectionGroup.dataCollectionGroupId) ) - if search is not None: + if search is not None and search != "": query = query.filter(DataCollectionGroup.comments.contains(search)) if proposal: + proposal_reference = parse_proposal(proposal) session_id_query = ( select(BLSession.sessionId) .select_from(Proposal) - .where(f.concat(Proposal.proposalCode, Proposal.proposalNumber) == proposal) + .where( + Proposal.proposalCode == proposal_reference.code, + Proposal.proposalNumber == proposal_reference.number, + ) .join(BLSession) ) @@ -59,6 +66,7 @@ def get_collection_groups( query = query.filter( DataCollectionGroup.sessionId == db.session.scalar(session_id_query) ) + else: query = query.filter( DataCollectionGroup.sessionId.in_( @@ -74,24 +82,31 @@ def get_collections( page: int, groupId: Optional[int], search: Optional[str], + sortBy: DataCollectionSortTypes, user: User, onlyTomograms: bool, ) -> Paged[DataCollectionSummary]: + sort = ( + Tomogram.globalAlignmentQuality.desc() + if sortBy == "globalAlignmentQuality" + else DataCollection.dataCollectionId + ) + + base_sub_query = ( + select( + f.row_number().over(order_by=sort).label("index"), + *unravel(DataCollection), + f.count(Tomogram.tomogramId).label("tomograms"), + Tomogram.globalAlignmentQuality, + ) + .select_from(DataCollection) + .join(BLSession, BLSession.sessionId == DataCollection.SESSIONID) + .join(Tomogram, isouter=(not onlyTomograms)) + .group_by(DataCollection.dataCollectionId) + ) + sub_with_row = check_session( - ( - select( - f.row_number() - .over(order_by=DataCollection.dataCollectionId) - .label("index"), - *unravel(DataCollection), - f.count(Tomogram.tomogramId).label("tomograms"), - ) - .select_from(DataCollection) - .join(BLSession, BLSession.sessionId == DataCollection.SESSIONID) - .join(Tomogram, isouter=(not onlyTomograms)) - .group_by(DataCollection.dataCollectionId) - .order_by(DataCollection.dataCollectionId) - ), + base_sub_query, user, ) @@ -104,7 +119,7 @@ def get_collections( query = select(*sub_result.c) - if search is not None: + if search is not None and search != "": query = query.filter(sub_result.c.comments.contains(search)) return paginate(query, limit, page, slow_count=True) diff --git a/src/pato/crud/sessions.py b/src/pato/crud/sessions.py index fdc73b2..353d44a 100644 --- a/src/pato/crud/sessions.py +++ b/src/pato/crud/sessions.py @@ -1,17 +1,49 @@ +import pathlib from datetime import datetime from typing import Optional from fastapi import HTTPException, status from lims_utils.models import Paged -from lims_utils.tables import BLSession, DataCollectionGroup, Proposal -from sqlalchemy import Label, and_, or_, select -from sqlalchemy import func as f +from lims_utils.tables import BLSession, DataCollection, DataCollectionGroup, Proposal +from sqlalchemy import Label, and_, extract, func, insert, or_, select from ..auth import User +from ..models.parameters import DataCollectionCreationParameters from ..models.response import SessionResponse from ..utils.auth import check_session from ..utils.database import db, fast_count, paginate, unravel -from ..utils.generic import ProposalReference, parse_proposal +from ..utils.generic import ProposalReference, check_session_active, parse_proposal + + +def _validate_session_active(proposalReference: ProposalReference): + """Check if session is active and return session ID""" + session = db.session.scalar( + select(BLSession) + .select_from(Proposal) + .join(BLSession) + .filter( + BLSession.visit_number == proposalReference.visit_number, + Proposal.proposalNumber == proposalReference.number, + Proposal.proposalCode == proposalReference.code, + ) + ) + + if not check_session_active(session.endDate): + raise HTTPException( + status_code=status.HTTP_423_LOCKED, + detail="Reprocessing cannot be fired on an inactive session", + ) + + return session.sessionId + + +def _check_raw_files_exist(file_directory: str, glob_path: str): + """Check if raw data files exist in the filesystem""" + if not any(pathlib.Path(file_directory).glob(glob_path)): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No raw files found in session directory", + ) def get_sessions( @@ -27,12 +59,16 @@ def get_sessions( countCollections: bool, ) -> Paged[SessionResponse]: fields: list[Label[str] | Label[int]] = [ - f.concat(Proposal.proposalCode, Proposal.proposalNumber).label("parentProposal") + func.concat(Proposal.proposalCode, Proposal.proposalNumber).label( + "parentProposal" + ) ] if countCollections: fields.append( - f.count(DataCollectionGroup.dataCollectionGroupId).label("collectionGroups") + func.count(DataCollectionGroup.dataCollectionGroupId).label( + "collectionGroups" + ) ) query = select(*unravel(BLSession), *fields) @@ -65,7 +101,7 @@ def get_sessions( if maxStartDate is not None: query = query.filter(BLSession.startDate <= maxStartDate) - if search is not None: + if search is not None and search != "": query = query.filter( or_( BLSession.beamLineName.contains(search), @@ -89,7 +125,7 @@ def get_session(proposalReference: ProposalReference): query = ( select( *unravel(BLSession), - f.concat(Proposal.proposalCode, Proposal.proposalNumber).label( + func.concat(Proposal.proposalCode, Proposal.proposalNumber).label( "parentProposal" ), ) @@ -110,3 +146,75 @@ def get_session(proposalReference: ProposalReference): ) return session + + +def create_data_collection( + proposalReference: ProposalReference, params: DataCollectionCreationParameters +): + session_id = _validate_session_active(proposalReference) + + session = db.session.execute( + select( + BLSession.beamLineName, + BLSession.endDate, + extract("year", BLSession.startDate).label("year"), + func.concat( + Proposal.proposalCode, + Proposal.proposalNumber, + "-", + BLSession.visit_number, + ).label("name"), + ) + .filter(BLSession.sessionId == session_id) + .join(Proposal, Proposal.proposalId == BLSession.proposalId) + ).one() + + # TODO: Make the path string pattern configurable? + file_directory = f"/dls/{session.beamLineName}/data/{session.year}/{session.name}/{params.fileDirectory}/" + glob_path = f"GridSquare_*/Data/*{params.fileExtension}" + + _check_raw_files_exist(file_directory, glob_path) + + existing_data_collection = db.session.scalar( + select(DataCollection.dataCollectionId) + .filter( + DataCollection.imageDirectory == file_directory, + DataCollection.fileTemplate == glob_path, + DataCollectionGroup.sessionId == session_id, + ) + .join(DataCollectionGroup) + .limit(1) + ) + + if existing_data_collection is not None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Data collection already exists", + ) + + dcg_id = db.session.scalar( + insert(DataCollectionGroup).returning( + DataCollectionGroup.dataCollectionGroupId + ), + { + "sessionId": session_id, + "comments": "Created by PATo", + "experimentType": "EM", + }, + ) + + data_collection = db.session.scalar( + insert(DataCollection).returning(DataCollection), + { + "dataCollectionGroupId": dcg_id, + "endTime": session.endDate, + "runStatus": "Created by PATo", + "imageDirectory": file_directory, + "fileTemplate": glob_path, + "imageSuffix": params.fileExtension, + }, + ) + + db.session.commit() + + return data_collection diff --git a/src/pato/models/parameters.py b/src/pato/models/parameters.py index b8cf10f..45572bb 100644 --- a/src/pato/models/parameters.py +++ b/src/pato/models/parameters.py @@ -10,9 +10,6 @@ "do_class3d", "mask_diameter", "extract_boxsize", - "extract_small_boxsize", - "do_class2d_pass2", - "do_class3d_pass2", "autopick_LoG_diam_min", "autopick_LoG_diam_max", "use_fsc_criterion", @@ -22,7 +19,6 @@ _omit_when_autocalculating = [ "mask_diameter", "extract_box_size", - "extract_small_boxsize", ] @@ -55,13 +51,8 @@ class SPAReprocessingParameters(BaseModel): extract_boxsize: Optional[float] = Field( ge=0.1, le=1024, alias="boxSize", default=None ) - extract_small_boxsize: Optional[float] = Field( - ge=0.1, le=1024, alias="downsampleBoxSize", default=None - ) performCalculation: bool = Field(default=True, exclude=True) use_fsc_criterion: Optional[bool] = Field(default=False, alias="useFscCriterion") - do_class2d_pass2: Optional[bool] = Field(default=True, alias="perform2DSecondPass") - do_class3d_pass2: Optional[bool] = Field(default=False, alias="perform3DSecondPass") autopick_LoG_diam_min: Optional[float] = Field( ge=0.02, le=1024.0, alias="minimumDiameter", default=None ) @@ -110,3 +101,13 @@ def check_dynamically_required_fields(self): raise ValueError("maximumDiameter must be greater than minimumDiameter") return self + + +class DataCollectionCreationParameters(BaseModel): + fileDirectory: str + fileExtension: str + + +# mypy doesn't support type aliases yet + +DataCollectionSortTypes = Literal["dataCollectionId", "globalAlignmentQuality"] diff --git a/src/pato/models/response.py b/src/pato/models/response.py index 499136b..42f8a95 100644 --- a/src/pato/models/response.py +++ b/src/pato/models/response.py @@ -80,12 +80,29 @@ class SessionResponse(OrmBaseModel): dataCollectionGroupId: Optional[int] = None -class DataCollectionSummary(OrmBaseModel): +class BaseDataCollectionOut(OrmBaseModel): dataCollectionId: int = Field(..., lt=1e9, description="Data Collection ID") - comments: Optional[str] + dataCollectionGroupId: int + index: Optional[int] = None startTime: Optional[datetime] = Field( ..., description="Start time of the dataCollection" ) + endTime: Optional[datetime] = Field( + ..., description="End time of the dataCollection" + ) + experimenttype: Optional[str] = Field(..., max_length=24) + fileTemplate: Optional[str] = Field(..., max_length=255) + imageSuffix: Optional[str] = Field(..., max_length=24) + imageDirectory: Optional[str] = Field( + ..., + max_length=255, + description="The directory where files reside - should end with a slash", + ) + imagePrefix: Optional[str] = Field(..., max_length=45) + + +class DataCollectionSummary(BaseDataCollectionOut): + comments: Optional[str] pixelSizeOnImage: Optional[float] = Field( ..., description="Pixel size on image, calculated from magnification", @@ -98,11 +115,6 @@ class DataCollectionSummary(OrmBaseModel): imageSizeY: Optional[int] = Field( ..., description="Image size in y, in case crop has been used." ) - experimenttype: Optional[str] = Field(..., max_length=24) - index: int - endTime: Optional[datetime] = Field( - ..., description="End time of the dataCollection" - ) runStatus: Optional[str] = Field(..., max_length=255) axisStart: Optional[float] axisEnd: Optional[float] @@ -112,20 +124,12 @@ class DataCollectionSummary(OrmBaseModel): startImageNumber: Optional[int] numberOfPasses: Optional[int] exposureTime: Optional[float] - imageDirectory: Optional[str] = Field( - ..., - max_length=255, - description="The directory where files reside - should end with a slash", - ) - imagePrefix: Optional[str] = Field(..., max_length=45) - imageSuffix: Optional[str] = Field(..., max_length=24) imageContainerSubPath: Optional[str] = Field( ..., max_length=255, description="""Internal path of a HDF5 file pointing to the data for this data collection""", ) - fileTemplate: Optional[str] = Field(..., max_length=255) wavelength: Optional[float] resolution: Optional[float] detectorDistance: Optional[float] @@ -156,7 +160,6 @@ class DataCollectionSummary(OrmBaseModel): averageTemperature: Optional[float] actualCenteringPosition: Optional[str] = Field(..., max_length=255) beamShape: Optional[str] = Field(..., max_length=24) - dataCollectionGroupId: int detectorId: Optional[int] screeningOrigId: Optional[int] startPositionId: Optional[int] @@ -193,6 +196,7 @@ class DataCollectionSummary(OrmBaseModel): phasePlate: Optional[str] dataCollectionPlanId: Optional[int] tomograms: int + globalAlignmentQuality: Optional[float] = None @field_validator("phasePlate", mode="before") def to_bool_str(cls, v): diff --git a/src/pato/routes/groups.py b/src/pato/routes/groups.py index 94d9999..12cf10b 100644 --- a/src/pato/routes/groups.py +++ b/src/pato/routes/groups.py @@ -5,6 +5,7 @@ from ..auth import User from ..crud import groups as crud +from ..models.parameters import DataCollectionSortTypes from ..models.response import DataCollectionGroupSummaryResponse, DataCollectionSummary router = APIRouter( @@ -33,9 +34,15 @@ def get_collections( page: dict[str, int] = Depends(pagination), search: Optional[str] = None, onlyTomograms: bool = False, + sortBy: DataCollectionSortTypes = "dataCollectionId", user=Depends(User), ): """List collections belonging to a data collection group""" return crud.get_collections( - groupId=groupId, search=search, user=user, onlyTomograms=onlyTomograms, **page + groupId=groupId, + search=search, + user=user, + onlyTomograms=onlyTomograms, + sortBy=sortBy, + **page, ) diff --git a/src/pato/routes/proposals.py b/src/pato/routes/proposals.py index 21650ac..b3c3e09 100644 --- a/src/pato/routes/proposals.py +++ b/src/pato/routes/proposals.py @@ -1,10 +1,11 @@ -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Body, Depends, status from lims_utils.models import Paged, pagination from ..auth import Permissions, User from ..crud import proposals as crud from ..crud import sessions as sessions_crud -from ..models.response import ProposalResponse, SessionResponse +from ..models.parameters import DataCollectionCreationParameters +from ..models.response import BaseDataCollectionOut, ProposalResponse, SessionResponse router = APIRouter( tags=["Proposals"], @@ -28,3 +29,16 @@ def get_proposals( def get_session(proposalReference=Depends(Permissions.session)): """Get individual session""" return sessions_crud.get_session(proposalReference) + + +@router.post( + "/{proposalReference}/sessions/{visitNumber}/dataCollections", + response_model=BaseDataCollectionOut, + status_code=status.HTTP_201_CREATED, +) +def create_data_collection( + proposalReference=Depends(Permissions.session), + parameters: DataCollectionCreationParameters = Body(), +): + """Create data collection""" + return sessions_crud.create_data_collection(proposalReference, parameters) diff --git a/tests/collections/test_collection.py b/tests/collections/test_collection.py index d4141ba..58a7097 100644 --- a/tests/collections/test_collection.py +++ b/tests/collections/test_collection.py @@ -87,3 +87,13 @@ def test_get_pixel_size_null(mock_user, client): resp = client.get("/dataGroups/988855/dataCollections") assert resp.status_code == 200 assert resp.json()["items"][0]["pixelSizeOnImage"] is None + + +@pytest.mark.parametrize("mock_user", [admin], indirect=True) +def test_sort_by_global_alignment_quality(mock_user, client): + """Get data collections sorted by alignment quality""" + resp = client.get( + "/dataGroups/5440740/dataCollections?sortBy=globalAlignmentQuality" + ) + assert resp.status_code == 200 + assert resp.json()["items"][0]["dataCollectionId"] == 6017413 diff --git a/tests/collections/test_spa_reprocessing.py b/tests/collections/test_spa_reprocessing.py index 1cf9fa1..0409210 100644 --- a/tests/collections/test_spa_reprocessing.py +++ b/tests/collections/test_spa_reprocessing.py @@ -26,14 +26,11 @@ def active_mock(collectionId: int): "maximumDiameter": "140", "maskDiameter": "154", "boxSize": "140", - "downsampleBoxSize": "48", "import_images": "/dls/m06/data/2022/bi23047-76/raw/GridSquare_*/Data/*.mrc", "acquisition_software": "EPU", "gainReferenceFile": "gain.mrc", "performCalculation": False, "useFscCriterion": False, - "perform2DSecondPass": False, - "perform3DSecondPass": False, } @@ -55,7 +52,7 @@ def test_post(mock_permissions, client): assert resp.status_code == 202 parameters = _get_parameters(resp.json()["processingJobId"]) - assert len(parameters) == 21 + assert len(parameters) == 18 def test_inactive_session(mock_permissions, client): @@ -92,7 +89,7 @@ def test_post_perform_calculation(mock_permissions, client): assert resp.status_code == 202 parameters = _get_parameters(resp.json()["processingJobId"]) - assert len(parameters) == 19 + assert len(parameters) == 17 @patch("pato.crud.collections._validate_session_active", new=active_mock) diff --git a/tests/parameters/test_spa.py b/tests/parameters/test_spa.py index e029feb..8298611 100644 --- a/tests/parameters/test_spa.py +++ b/tests/parameters/test_spa.py @@ -16,9 +16,6 @@ "do_class3d", "mask_diameter", "extract_boxsize", - "extract_small_boxsize", - "do_class2d_pass2", - "do_class3d_pass2", "autopick_LoG_diam_min", "autopick_LoG_diam_max", "use_fsc_criterion", @@ -28,7 +25,6 @@ _omit_when_autocalculating = [ "mask_diameter", "extract_box_size", - "extract_small_boxsize", ] @@ -88,8 +84,8 @@ def test_empty_string_to_none(): minimumDiameter=3, maximumDiameter=4, maskDiameter="", - downsampleBoxSize="", + extractDownscale="", ) assert params.mask_diameter is None - assert params.extract_small_boxsize is None + assert params.extract_downscale is None diff --git a/tests/sessions/test_create_dc.py b/tests/sessions/test_create_dc.py new file mode 100644 index 0000000..356a4a4 --- /dev/null +++ b/tests/sessions/test_create_dc.py @@ -0,0 +1,57 @@ +from unittest.mock import patch + + +def active_mock(_): + return 27464088 + + +def raw_check_mock(_, _1): + return + + +full_params = {"fileDirectory": "raw", "fileExtension": ".tif"} + + +@patch("pato.crud.sessions._check_raw_files_exist", new=raw_check_mock) +@patch("pato.crud.sessions._validate_session_active", new=active_mock) +def test_post(mock_permissions, client): + """Create new data collection in session""" + resp = client.post( + "/proposals/cm31111/sessions/5/dataCollections", + json={**full_params, "fileDirectory": "raw2"}, + ) + assert resp.status_code == 201 + + data = resp.json() + + assert data["imageDirectory"] == "/dls/m12/data/2022/cm31111-5/raw2/" + + +@patch("pato.crud.sessions._check_raw_files_exist", new=raw_check_mock) +@patch("pato.crud.sessions._validate_session_active", new=active_mock) +def test_create_existing_collection(mock_permissions, client): + """Raise exception if a collection pointing to the specified folders already exist""" + resp = client.post( + "/proposals/cm31111/sessions/5/dataCollections", + json=full_params, + ) + assert resp.status_code == 400 + + +@patch("pato.crud.sessions._validate_session_active", new=active_mock) +def test_inexistent_files(mock_permissions, client): + """Raise exception if raw files do not exist""" + resp = client.post( + "/proposals/cm31111/sessions/5/dataCollections", + json=full_params, + ) + assert resp.status_code == 404 + + +def test_inactive_session(mock_permissions, client): + """Raise exception if session is inactive""" + resp = client.post( + "/proposals/cm31111/sessions/5/dataCollections", + json=full_params, + ) + assert resp.status_code == 423