Release v1.8.0

DiamondLightSource · Aug 28, 2024 · b374a2e · b374a2e
1 parent 09c78cc
commit b374a2e
Show file tree

Hide file tree

Showing 16 changed files with 291 additions and 66 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,17 @@
 Changelog
 ==========
 
++++++++++
+v1.8.0 (28/08/2024)
++++++++++
+
+**Added**
+
+- Particle count per defocus value endpoint (:code:`/dataCollections/{collectionId}/ctf`)
+- Particle count per resolution bin endpoint (:code:`/dataCollections/{collectionId}/particleCountPerResolution`)
+- Custom model upload endpoint
+- Sample handling redirect endpoint
+
 +++++++++
 v1.7.0 (20/06/2024)
 +++++++++

diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,7 @@
 # The devcontainer should use the build target and run as root with podman
 # or docker with user namespaces.
 #
-FROM docker.io/library/python:3.12.1-slim-bullseye as build
+FROM docker.io/library/python:3.12.4-slim-bookworm as build
 
 # Add any system dependencies for the developer/build environment here
 RUN apt-get update && apt-get upgrade -y && \

diff --git a/config.json b/config.json
@@ -36,7 +36,8 @@
     "contact_email": "[email protected]",
     "smtp_port": 8025,
     "smtp_server": "mail.service.com",
-    "active_session_cutoff": 5
+    "active_session_cutoff": 5,
+    "sample_handling_url": "https://ebic-sample-handling.diamond.ac.uk"
   },
   "enable_cors": false
 }
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,16 +14,17 @@ classifiers = [
 ]
 description = "PATO's backend"
 dependencies = [
+    "python-multipart~=0.0.9",
     "pika~=1.3.2",
-    "SQLAlchemy~=2.0.16",
-    "fastapi~=0.109.0",
-    "uvicorn[standard]~=0.22.0",
-    "requests~=2.31.0",
+    "SQLAlchemy~=2.0.31",
+    "fastapi~=0.111.0",
+    "uvicorn[standard]~=0.30.1",
+    "requests~=2.32.3",
     "mysqlclient~=2.1.1",
     "mysql-connector-python~=8.2.0",
     "pydantic~=2.5.3",
     "types-requests",
-    "lims-utils~=0.1.2"
+    "lims-utils~=0.2.2"
 ]
 dynamic = ["version"]
 license.file = "LICENSE"
@@ -111,7 +112,7 @@ setenv =
 [tool.ruff]
 src = ["src", "tests"]
 line-length = 120
-select = [
+lint.select = [
     "C4",   # flake8-comprehensions - https://beta.ruff.rs/docs/rules/#flake8-comprehensions-c4
     "E",    # pycodestyle errors - https://beta.ruff.rs/docs/rules/#error-e
     "F",    # pyflakes rules - https://beta.ruff.rs/docs/rules/#pyflakes-f

diff --git a/src/pato/crud/collections.py b/src/pato/crud/collections.py
@@ -12,21 +12,28 @@
     DataCollectionGroup,
     MotionCorrection,
     Movie,
+    ParticlePicker,
     ProcessingJob,
     ProcessingJobParameter,
     Proposal,
     TiltImageAlignment,
     Tomogram,
 )
-from sqlalchemy import Column, and_, case, extract, func, select
+from sqlalchemy import Column, ColumnElement, Select, and_, case, extract, func, select
 
 from ..models.parameters import (
     SPAReprocessingParameters,
     TomogramReprocessingParameters,
 )
-from ..models.response import FullMovie, ProcessingJobResponse, TomogramFullResponse
+from ..models.response import (
+    DataPoint,
+    FullMovie,
+    ItemList,
+    ProcessingJobResponse,
+    TomogramFullResponse,
+)
 from ..utils.database import db, paginate
-from ..utils.generic import check_session_active
+from ..utils.generic import check_session_active, parse_count
 from ..utils.pika import pika_publisher
 
 _job_status_description = case(
@@ -292,3 +299,69 @@ def get_processing_jobs(
     )
 
     return paginate(query, limit, page, slow_count=False)
+
+
+def _with_ctf_joins(query: Select, collectionId: int):
+    return (
+        query.select_from(ProcessingJob)
+        .filter(ProcessingJob.dataCollectionId == collectionId)
+        .join(AutoProcProgram)
+        .join(MotionCorrection)
+        .join(CTF, CTF.motionCorrectionId == MotionCorrection.motionCorrectionId)
+        .join(
+            ParticlePicker,
+            ParticlePicker.firstMotionCorrectionId
+            == MotionCorrection.motionCorrectionId,
+        )
+    )
+
+
+def get_ctf(collectionId: int):
+    data = db.session.execute(
+        _with_ctf_joins(
+            select(
+                CTF.estimatedDefocus.label("x"),
+                ParticlePicker.numberOfParticles.label("y"),
+            ),
+            collectionId,
+        ).group_by(MotionCorrection.imageNumber)
+    ).all()
+
+    return ItemList[DataPoint](items=data)
+
+
+def _histogram_sum_bin(condition: ColumnElement):
+    return func.coalesce(
+        func.sum(
+            case(
+                (
+                    condition,
+                    ParticlePicker.numberOfParticles,
+                ),
+            )
+        ),
+        0,
+    )
+
+
+def get_particle_count_per_resolution(collectionId: int) -> ItemList[DataPoint]:
+    data = parse_count(
+        _with_ctf_joins(
+            select(
+                _histogram_sum_bin(CTF.estimatedResolution < 1).label("<1"),
+                *[
+                    _histogram_sum_bin(
+                        and_(
+                            CTF.estimatedResolution >= i,
+                            CTF.estimatedResolution < i + 1,
+                        )
+                    ).label(str(i))
+                    for i in range(1, 8)
+                ],
+                _histogram_sum_bin(CTF.estimatedResolution >= 9).label(">9"),
+            ),
+            collectionId,
+        )
+    )
+
+    return data
diff --git a/src/pato/crud/generic.py b/src/pato/crud/generic.py
@@ -1,6 +1,5 @@
 from typing import Literal
 
-from fastapi import HTTPException, status
 from lims_utils.tables import (
     CTF,
     AutoProcProgram,
@@ -11,11 +10,11 @@
     ProcessingJob,
     RelativeIceThickness,
 )
-from sqlalchemy import Column, and_, case, literal_column, select
+from sqlalchemy import Column, and_, case, select
 from sqlalchemy import func as f
 
 from ..models.response import DataPoint, ItemList
-from ..utils.database import db
+from ..utils.generic import parse_count
 
 
 def _generate_buckets(bin: float, minimum: float, column: Column):
@@ -33,22 +32,9 @@ def _generate_buckets(bin: float, minimum: float, column: Column):
                     )
                 )
             ).label(str(bin * i + minimum))
-            for i in range(0, 10)
+            for i in range(0, 8)
         ],
-        f.count(case((column >= bin * 10 + minimum, 1))).label(f">{bin*10+minimum}"),
-    )
-
-
-def _parse_count(query):
-    data = db.session.execute(query.order_by(literal_column("1"))).mappings().one()
-    if not any(value != 0 for value in data.values()):
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="No items found",
-        )
-
-    return ItemList[DataPoint](
-        items=[{"x": key, "y": value} for (key, value) in dict(data).items()]
+        f.count(case((column >= bin * 8 + minimum, 1))).label(f">{bin*8+minimum}"),
     )
 
 
@@ -73,7 +59,7 @@ def get_ice_histogram(
             .join(RelativeIceThickness)
         )
 
-    return _parse_count(query)
+    return parse_count(query)
 
 
 def get_motion(
@@ -94,7 +80,7 @@ def get_motion(
             .join(MotionCorrection)
         )
 
-    return _parse_count(query)
+    return parse_count(query)
 
 
 def get_resolution(
@@ -116,7 +102,7 @@ def get_resolution(
             .join(CTF)
         )
 
-    return _parse_count(query)
+    return parse_count(query)
 
 
 def get_particle_count(
@@ -137,4 +123,4 @@ def get_particle_count(
             .join(ParticlePicker)
         )
 
-    return _parse_count(query)
+    return parse_count(query)
diff --git a/src/pato/crud/sessions.py b/src/pato/crud/sessions.py
@@ -1,12 +1,14 @@
 import pathlib
+import shutil
 from datetime import datetime
 from typing import Optional
 
-from fastapi import HTTPException, status
+from fastapi import HTTPException, UploadFile, status
 from lims_utils.auth import GenericUser
+from lims_utils.logging import app_logger
 from lims_utils.models import Paged
 from lims_utils.tables import BLSession, DataCollection, DataCollectionGroup, Proposal
-from sqlalchemy import Label, and_, extract, func, insert, or_, select
+from sqlalchemy import Label, and_, func, insert, or_, select
 
 from ..models.parameters import DataCollectionCreationParameters
 from ..models.response import SessionAllowsReprocessing, SessionResponse
@@ -15,17 +17,19 @@
 from ..utils.database import db, paginate, unravel
 from ..utils.generic import ProposalReference, check_session_active, parse_proposal
 
+HDF5_FILE_SIGNATURE = b"\x89\x48\x44\x46\x0d\x0a\x1a\x0a"
 
-def _validate_session_active(proposalReference: ProposalReference):
+
+def _validate_session_active(proposal_reference: ProposalReference):
     """Check if session is active and return session ID"""
     session = db.session.scalar(
         select(BLSession)
         .select_from(Proposal)
         .join(BLSession)
         .filter(
-            BLSession.visit_number == proposalReference.visit_number,
-            Proposal.proposalNumber == proposalReference.number,
-            Proposal.proposalCode == proposalReference.code,
+            BLSession.visit_number == proposal_reference.visit_number,
+            Proposal.proposalNumber == proposal_reference.number,
+            Proposal.proposalCode == proposal_reference.code,
         )
     )
 
@@ -35,7 +39,20 @@ def _validate_session_active(proposalReference: ProposalReference):
             detail="Reprocessing cannot be fired on an inactive session",
         )
 
-    return session.sessionId
+    assert session is not None
+
+    return session
+
+
+def _get_folder_and_visit(prop_ref: ProposalReference):
+    session = _validate_session_active(prop_ref)
+    year = session.startDate.year
+
+    # TODO: Make the path string pattern configurable?
+    return (
+        f"/dls/{session.beamLineName}/data/{year}/{prop_ref.code}{prop_ref.number}-{prop_ref.visit_number}",
+        session,
+    )
 
 
 def _check_raw_files_exist(file_directory: str, glob_path: str):
@@ -153,26 +170,8 @@ def get_session(proposalReference: ProposalReference):
 def create_data_collection(
     proposalReference: ProposalReference, params: DataCollectionCreationParameters
 ):
-    session_id = _validate_session_active(proposalReference)
-
-    session = db.session.execute(
-        select(
-            BLSession.beamLineName,
-            BLSession.endDate,
-            extract("year", BLSession.startDate).label("year"),
-            func.concat(
-                Proposal.proposalCode,
-                Proposal.proposalNumber,
-                "-",
-                BLSession.visit_number,
-            ).label("name"),
-        )
-        .filter(BLSession.sessionId == session_id)
-        .join(Proposal, Proposal.proposalId == BLSession.proposalId)
-    ).one()
-
-    # TODO: Make the path string pattern configurable?
-    file_directory = f"/dls/{session.beamLineName}/data/{session.year}/{session.name}/{params.fileDirectory}/"
+    session_folder, session = _get_folder_and_visit(proposalReference)
+    file_directory = f"{session_folder}/{params.fileDirectory}/"
     glob_path = f"GridSquare_*/Data/*{params.fileExtension}"
 
     _check_raw_files_exist(file_directory, glob_path)
@@ -182,7 +181,7 @@ def create_data_collection(
         .filter(
             DataCollection.imageDirectory == file_directory,
             DataCollection.fileTemplate == glob_path,
-            DataCollectionGroup.sessionId == session_id,
+            DataCollectionGroup.sessionId == session.sessionId,
         )
         .join(DataCollectionGroup)
         .limit(1)
@@ -199,7 +198,7 @@ def create_data_collection(
             DataCollectionGroup.dataCollectionGroupId
         ),
         {
-            "sessionId": session_id,
+            "sessionId": session.sessionId,
             "comments": "Created by PATo",
             "experimentType": "EM",
         },
@@ -237,3 +236,30 @@ def check_reprocessing_enabled(proposalReference: ProposalReference):
     return SessionAllowsReprocessing(
         allowReprocessing=((bool(Config.mq.user)) and check_session_active(end_date)),
     )
+
+
+def upload_processing_model(file: UploadFile, proposal_reference: ProposalReference):
+    file_path = (
+        f"{_get_folder_and_visit(proposal_reference)[0]}/processing/{file.filename}"
+    )
+    file_signature = file.file.read(8)
+    file.file.seek(0)
+
+    if file_signature != HDF5_FILE_SIGNATURE:
+        raise HTTPException(
+            detail="Invalid file type (must be HDF5 file)",
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+        )
+
+    try:
+        with open(file_path, "wb") as f:
+            shutil.copyfileobj(file.file, f)
+    except OSError as e:
+        file.file.close()
+        app_logger.error(f"Failed to upload {file.filename}: {e}")
+        raise HTTPException(
+            detail="Failed to upload file",
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        )
+
+    file.file.close()