Skip to content

Commit

Permalink
Release v1.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gfrn committed Feb 9, 2024
1 parent 2388ec3 commit 8866566
Show file tree
Hide file tree
Showing 12 changed files with 283 additions and 65 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
Changelog
==========

+++++++++
v1.3.0 (07/02/2024)
+++++++++

**Added**

- Data collection creation endpoint (:code:`/proposals/{propId}/sessions/{sessionId}/dataCollections`)
- :code:`sortBy` argument to data collection listing endpoint

+++++++++
v1.2.3 (05/02/2024)
+++++++++
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies = [
"mysql-connector-python~=8.2.0",
"pydantic~=2.5.3",
"types-requests",
"lims-utils~=0.1.1"
"lims-utils~=0.1.2"
]
dynamic = ["version"]
license.file = "LICENSE"
Expand Down
49 changes: 32 additions & 17 deletions src/pato/crud/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
from sqlalchemy import select

from ..auth import User
from ..models.parameters import DataCollectionSortTypes
from ..models.response import DataCollectionGroupSummaryResponse, DataCollectionSummary
from ..utils.auth import check_session
from ..utils.database import db, paginate, unravel
from ..utils.generic import parse_proposal


def get_collection_groups(
Expand All @@ -26,6 +28,7 @@ def get_collection_groups(
search: Optional[str],
user: User,
) -> Paged[DataCollectionGroupSummaryResponse]:

query = (
select(
*unravel(DataCollectionGroup),
Expand All @@ -40,14 +43,18 @@ def get_collection_groups(
.group_by(DataCollectionGroup.dataCollectionGroupId)
)

if search is not None:
if search is not None and search != "":
query = query.filter(DataCollectionGroup.comments.contains(search))

if proposal:
proposal_reference = parse_proposal(proposal)
session_id_query = (
select(BLSession.sessionId)
.select_from(Proposal)
.where(f.concat(Proposal.proposalCode, Proposal.proposalNumber) == proposal)
.where(
Proposal.proposalCode == proposal_reference.code,
Proposal.proposalNumber == proposal_reference.number,
)
.join(BLSession)
)

Expand All @@ -59,6 +66,7 @@ def get_collection_groups(
query = query.filter(
DataCollectionGroup.sessionId == db.session.scalar(session_id_query)
)

else:
query = query.filter(
DataCollectionGroup.sessionId.in_(
Expand All @@ -74,24 +82,31 @@ def get_collections(
page: int,
groupId: Optional[int],
search: Optional[str],
sortBy: DataCollectionSortTypes,
user: User,
onlyTomograms: bool,
) -> Paged[DataCollectionSummary]:
sort = (
Tomogram.globalAlignmentQuality.desc()
if sortBy == "globalAlignmentQuality"
else DataCollection.dataCollectionId
)

base_sub_query = (
select(
f.row_number().over(order_by=sort).label("index"),
*unravel(DataCollection),
f.count(Tomogram.tomogramId).label("tomograms"),
Tomogram.globalAlignmentQuality,
)
.select_from(DataCollection)
.join(BLSession, BLSession.sessionId == DataCollection.SESSIONID)
.join(Tomogram, isouter=(not onlyTomograms))
.group_by(DataCollection.dataCollectionId)
)

sub_with_row = check_session(
(
select(
f.row_number()
.over(order_by=DataCollection.dataCollectionId)
.label("index"),
*unravel(DataCollection),
f.count(Tomogram.tomogramId).label("tomograms"),
)
.select_from(DataCollection)
.join(BLSession, BLSession.sessionId == DataCollection.SESSIONID)
.join(Tomogram, isouter=(not onlyTomograms))
.group_by(DataCollection.dataCollectionId)
.order_by(DataCollection.dataCollectionId)
),
base_sub_query,
user,
)

Expand All @@ -104,7 +119,7 @@ def get_collections(

query = select(*sub_result.c)

if search is not None:
if search is not None and search != "":
query = query.filter(sub_result.c.comments.contains(search))

return paginate(query, limit, page, slow_count=True)
124 changes: 116 additions & 8 deletions src/pato/crud/sessions.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,49 @@
import pathlib
from datetime import datetime
from typing import Optional

from fastapi import HTTPException, status
from lims_utils.models import Paged
from lims_utils.tables import BLSession, DataCollectionGroup, Proposal
from sqlalchemy import Label, and_, or_, select
from sqlalchemy import func as f
from lims_utils.tables import BLSession, DataCollection, DataCollectionGroup, Proposal
from sqlalchemy import Label, and_, extract, func, insert, or_, select

from ..auth import User
from ..models.parameters import DataCollectionCreationParameters
from ..models.response import SessionResponse
from ..utils.auth import check_session
from ..utils.database import db, fast_count, paginate, unravel
from ..utils.generic import ProposalReference, parse_proposal
from ..utils.generic import ProposalReference, check_session_active, parse_proposal


def _validate_session_active(proposalReference: ProposalReference):
"""Check if session is active and return session ID"""
session = db.session.scalar(
select(BLSession)
.select_from(Proposal)
.join(BLSession)
.filter(
BLSession.visit_number == proposalReference.visit_number,
Proposal.proposalNumber == proposalReference.number,
Proposal.proposalCode == proposalReference.code,
)
)

if not check_session_active(session.endDate):
raise HTTPException(
status_code=status.HTTP_423_LOCKED,
detail="Reprocessing cannot be fired on an inactive session",
)

return session.sessionId


def _check_raw_files_exist(file_directory: str, glob_path: str):
"""Check if raw data files exist in the filesystem"""
if not any(pathlib.Path(file_directory).glob(glob_path)):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No raw files found in session directory",
)


def get_sessions(
Expand All @@ -27,12 +59,16 @@ def get_sessions(
countCollections: bool,
) -> Paged[SessionResponse]:
fields: list[Label[str] | Label[int]] = [
f.concat(Proposal.proposalCode, Proposal.proposalNumber).label("parentProposal")
func.concat(Proposal.proposalCode, Proposal.proposalNumber).label(
"parentProposal"
)
]

if countCollections:
fields.append(
f.count(DataCollectionGroup.dataCollectionGroupId).label("collectionGroups")
func.count(DataCollectionGroup.dataCollectionGroupId).label(
"collectionGroups"
)
)

query = select(*unravel(BLSession), *fields)
Expand Down Expand Up @@ -65,7 +101,7 @@ def get_sessions(
if maxStartDate is not None:
query = query.filter(BLSession.startDate <= maxStartDate)

if search is not None:
if search is not None and search != "":
query = query.filter(
or_(
BLSession.beamLineName.contains(search),
Expand All @@ -89,7 +125,7 @@ def get_session(proposalReference: ProposalReference):
query = (
select(
*unravel(BLSession),
f.concat(Proposal.proposalCode, Proposal.proposalNumber).label(
func.concat(Proposal.proposalCode, Proposal.proposalNumber).label(
"parentProposal"
),
)
Expand All @@ -110,3 +146,75 @@ def get_session(proposalReference: ProposalReference):
)

return session


def create_data_collection(
proposalReference: ProposalReference, params: DataCollectionCreationParameters
):
session_id = _validate_session_active(proposalReference)

session = db.session.execute(
select(
BLSession.beamLineName,
BLSession.endDate,
extract("year", BLSession.startDate).label("year"),
func.concat(
Proposal.proposalCode,
Proposal.proposalNumber,
"-",
BLSession.visit_number,
).label("name"),
)
.filter(BLSession.sessionId == session_id)
.join(Proposal, Proposal.proposalId == BLSession.proposalId)
).one()

# TODO: Make the path string pattern configurable?
file_directory = f"/dls/{session.beamLineName}/data/{session.year}/{session.name}/{params.fileDirectory}/"
glob_path = f"GridSquare_*/Data/*{params.fileExtension}"

_check_raw_files_exist(file_directory, glob_path)

existing_data_collection = db.session.scalar(
select(DataCollection.dataCollectionId)
.filter(
DataCollection.imageDirectory == file_directory,
DataCollection.fileTemplate == glob_path,
DataCollectionGroup.sessionId == session_id,
)
.join(DataCollectionGroup)
.limit(1)
)

if existing_data_collection is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Data collection already exists",
)

dcg_id = db.session.scalar(
insert(DataCollectionGroup).returning(
DataCollectionGroup.dataCollectionGroupId
),
{
"sessionId": session_id,
"comments": "Created by PATo",
"experimentType": "EM",
},
)

data_collection = db.session.scalar(
insert(DataCollection).returning(DataCollection),
{
"dataCollectionGroupId": dcg_id,
"endTime": session.endDate,
"runStatus": "Created by PATo",
"imageDirectory": file_directory,
"fileTemplate": glob_path,
"imageSuffix": params.fileExtension,
},
)

db.session.commit()

return data_collection
19 changes: 10 additions & 9 deletions src/pato/models/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
"do_class3d",
"mask_diameter",
"extract_boxsize",
"extract_small_boxsize",
"do_class2d_pass2",
"do_class3d_pass2",
"autopick_LoG_diam_min",
"autopick_LoG_diam_max",
"use_fsc_criterion",
Expand All @@ -22,7 +19,6 @@
_omit_when_autocalculating = [
"mask_diameter",
"extract_box_size",
"extract_small_boxsize",
]


Expand Down Expand Up @@ -55,13 +51,8 @@ class SPAReprocessingParameters(BaseModel):
extract_boxsize: Optional[float] = Field(
ge=0.1, le=1024, alias="boxSize", default=None
)
extract_small_boxsize: Optional[float] = Field(
ge=0.1, le=1024, alias="downsampleBoxSize", default=None
)
performCalculation: bool = Field(default=True, exclude=True)
use_fsc_criterion: Optional[bool] = Field(default=False, alias="useFscCriterion")
do_class2d_pass2: Optional[bool] = Field(default=True, alias="perform2DSecondPass")
do_class3d_pass2: Optional[bool] = Field(default=False, alias="perform3DSecondPass")
autopick_LoG_diam_min: Optional[float] = Field(
ge=0.02, le=1024.0, alias="minimumDiameter", default=None
)
Expand Down Expand Up @@ -110,3 +101,13 @@ def check_dynamically_required_fields(self):
raise ValueError("maximumDiameter must be greater than minimumDiameter")

return self


class DataCollectionCreationParameters(BaseModel):
fileDirectory: str
fileExtension: str


# mypy doesn't support type aliases yet

DataCollectionSortTypes = Literal["dataCollectionId", "globalAlignmentQuality"]
Loading

0 comments on commit 8866566

Please sign in to comment.