Write BIDS json sidecar for each modalities #21

balbasty · 2024-11-13T21:31:05Z

We should write BIDS-compatible sidecar JSON's for most of these files. But we need data-generating sites to provide us with proper metadata.

See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/microscopy.html#microscopy-metadata-sidecar-json

We dhould probably also save info about the data generating site themselves, similar to what's in the MRI sidecars

See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetic-resonance-imaging-data.html#institution-information

chourroutm · 2024-11-21T19:29:18Z

This is the BIDS sidecar JSON we got for the HiP-CT data: micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json; it is automatically derived from the metadata generated with HiP-CT release (@dstansby maybe you'd like to develop?)

balbasty · 2024-11-21T20:23:50Z

Thanks @chourroutm ! That's exactly what we'd like to do for the other modalities :)

chourroutm · 2024-11-26T11:22:16Z

Are you interested in our script to populate it?

"metadata.py"

from pathlib import Path
import json
import pandas

path_input_metadata = Path("./metadata.json")

with open(path_input_metadata, 'r') as input_file:
    input_metadata = json.load(input_file)

# BIDS convention
path_participants_schema = Path("../participants.json")

# Per BIDS convention, the second part of `participant_id` is only alphanumeric 
# without dashes; thus, I chose to use the numeric placeholder `01` and 
# introduce a `participant_name` alongside
participant_id = "sub-01"

# BIDS convention
path_samples_schema = Path("../samples.json")

# Per BIDS convention, the second part of `participant_id` is only alphanumeric 
# without dashes; thus, I chose to use `brain` here... it needs to be adapted!
sample_id = "sample-brain"

path_sessions_schema = Path(f"../{participant_id}/{participant_id}_sessions.json")

# Per BIDS convention, the second part of `session_id` is only alphanumeric 
# without dashes; thus, I chose to use the numeric placeholder `01`. Sessions 
# have a unique `session_doi` (this may vary across facilities) and a unique 
# 3-entry key: `acq_time` + `proposal_facility` + `proposal_reference`
session_id = "ses-01"

# This file is not part of the BIDS structure but it was convenient to use the 
# same JSON template to define the keys.
path_sidecar_file_to_dataset_schema = Path("./sidecar_file_to_dataset_schema.json")

# This file IS part of the BIDS structure
path_sidecar_file_to_dataset = Path(f"../{participant_id}/{session_id}/micr/{participant_id}_{session_id}_{sample_id}_XPCT.json")

# Helper function to convert a string to title case
def to_title_case(snake_str):
    components = snake_str.split('_')
    return ''.join(x.title() for x in components)

# Recursive function to update all keys to title case
def keys_to_title_case(obj):
    if isinstance(obj, dict):
        new_obj = {}
        for key, value in obj.items():
            new_key = to_title_case(key)
            new_obj[new_key] = value
        return new_obj
    elif isinstance(obj, list):
        return [keys_to_title_case(item) for item in obj]
    else:
        return obj

with open(path_participants_schema, 'r') as participants_schema_file:
    participants_schema = json.load(participants_schema_file)
print(participants_schema.keys())
participants_obj = dict.fromkeys(participants_schema.keys(),None)
participants_obj['participant_id'] = participant_id # BIDS convention
participants_obj['participant_name'] = input_metadata['donor']['id']
participants_obj['age'] = input_metadata['donor']['age']
participants_obj['sex'] = input_metadata['donor']['sex']
participants_obj['weight'] = input_metadata['donor']['weight']
participants_obj['height'] = input_metadata['donor']['height']
participants_obj['medical_information'] = input_metadata['donor']['medical_history'] + "; died from " + input_metadata['donor']['cause_of_death']
participants_obj['species'] = "homo sapiens" # BIDS convention
participants_obj['institute'] = "Laboratoire d'Anatomie des Alpes Françaises" if input_metadata['donor']['id'].startswith("LADAF") else None

pandas.DataFrame(participants_obj).to_csv(path_participants_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_samples_schema, 'r') as samples_schema_file:
    samples_schema = json.load(samples_schema_file)
print(samples_schema.keys())
samples_obj = dict.fromkeys(samples_schema.keys(),None)
samples_obj['sample_id'] = sample_id
samples_obj['participant_id'] = participant_id
samples_obj['sample_type'] = "tissue" # BIDS convention
#samples_obj['sample_info'] = ""
#samples_obj['sample_preparation'] = ""

pandas.DataFrame(samples_obj).to_csv(path_samples_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_sessions_schema, 'r') as sessions_schema_file:
    sessions_schema = json.load(sessions_schema_file)
print(sessions_schema.keys())
sessions_obj = dict.fromkeys(sessions_schema.keys(),None)
sessions_obj['session_id'] = session_id # BIDS convention
# Note that the following line might not be accurate as it is not necessarily 
# the first day of the beamtime
#sessions_obj['acq_time'] = f"{input_metadata['scan']['date']}T00:00:00"
sessions_obj['proposal_facility'] = "ESRF"
sessions_obj['proposal_reference'] = input_metadata['proposal']['proposal_number']
sessions_obj['proposal_title'] = input_metadata['proposal']['title']
sessions_obj['proposal_members'] = ", ".join(input_metadata['proposal']['proposers'])

pandas.DataFrame(sessions_obj).to_csv(path_sessions_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_sidecar_file_to_dataset_schema, 'r') as sidecar_file_to_dataset_schema_file:
    sidecar_file_to_dataset_schema = json.load(sidecar_file_to_dataset_schema_file)
print(sidecar_file_to_dataset_schema.keys())
# NB: this file uses TitleCase for the keys of the JSON.
sidecar_file_to_dataset_obj = dict.fromkeys(sidecar_file_to_dataset_schema.keys(),None)
sidecar_file_to_dataset_obj['InstitutionName'] = "European Synchrotron Radiation Facility"
sidecar_file_to_dataset_obj['StationName'] = "BM18 EBS" if input_metadata['name'].endswith("_bm18") else None
sidecar_file_to_dataset_obj['BodyPart'] = "BRAIN" if input_metadata['organ'] == "brain" else None
sidecar_file_to_dataset_obj['SampleEnvironment'] = "ex vivo"
sidecar_file_to_dataset_obj['SampleFixation'] = "formalin"
#sidecar_file_to_dataset_obj['SampleEmbedding'] = "mixed agar gel at 70% ethanol"
sidecar_file_to_dataset_obj['PixelSize'] = [input_metadata['resolution_um']] * 3
sidecar_file_to_dataset_obj['PixelSizeUnits'] = "um"
sidecar_file_to_dataset_obj['AcquisitionParameters'] = keys_to_title_case(input_metadata['scan'])
sidecar_file_to_dataset_obj['ProcessingParameters']['RefApproach'] = None
sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeX'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeY'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeZ'] = input_metadata['shape']

with open(path_sidecar_file_to_dataset,'r') as sidecar_file_to_dataset_file:
    json.dump(sidecar_file_to_dataset_obj,sidecar_file_to_dataset_file)

"sidecar_file_to_dataset_schema.json"

{
        "InstitutionName": {
            "Description": "facility where the data was acquired, e.g. ESRF"
        },
        "StationName": {
            "Description": "the name of the instrument within the facility such as the beamline, e.g. BM18 EBS"
        },
        "BodyPart": {
            "Description": "per BIDS convention, see column 'Body Part Examined' from https://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_L.html#chapter_L"
        },
        "SampleEnvironment": {
            "Description": "per BIDS convention, how the sample was scanned",
            "Levels": {
                "ex vivo": "ex vivo",
                "in vivo": "in vivo",
                "in vitro": "in vitro"
                }
        },
        "SampleFixation": {
            "Description": "free-text details of the fixation"
        },
        "SampleEmbedding": {
            "Description": "free-text details of the embedding"
        },
        "PixelSize": {
            "Description": "per BIDS convention, three-value list"
        },
        "PixelSizeUnits": {
            "Description": "per BIDS convention",
            "Levels": {
                "mm": "millimeters",
                "um": "micrometers",
                "nm": "nanometers"
                }
        },
        "AcquisitionParameters": {
            "Description": "free-content entry"
        },
        "ProcessingParameters": {
            "Description": "free-content entry, should contain VolumeX, VolumeY and VolumeZ in pixels"
        }
}

"metadata.json" (with dummy data)

{
    "name": "",
    "organ": "brain",
    "organ_context": null,
    "scan_type": "",
    "roi": "",
    "resolution_um": 1,
    "shape": [
        1000,
        1000,
        1000
    ],
    "donor": {
        "id": "",
        "age": 70,
        "sex": "",
        "weight": 100,
        "height": 200,
        "cause_of_death": "",
        "date_of_death": "1970-01-01",
        "medical_history": "",
        "diabetes": null,
        "hypertension": null,
        "smoker": null
    },
    "scan": {
        "date": "1970-01-01",
        "energy": 1,
        "current_start": null,
        "filling_mode": null,
        "n_projections": 1,
        "n_ref": 1,
        "n_dark": 1,
        "latency_time": null,
        "exposure_time": 1,
        "accumulation_n_frames": null,
        "scan_type": "",
        "scan_range": 1,
        "n_scans": 1,
        "acquisition": "",
        "half_acquisition_value": 1,
        "quarter_acquisition_value": 1,
        "z_step": null,
        "scan_time": null,
        "filters": null,
        "scintillator": null,
        "optic": null,
        "distance_sample_detector": 1,
        "sensor_name": null,
        "sensor_mode": null,
        "sensor_magnification": null,
        "sensor_roi_x_size": 1,
        "sensor_roi_y_size": 1,
        "pixel_size": 1,
        "xray_magnification": null
    },
    "proposal": {
        "proposal_number": "xx1111",
        "title": "",
        "proposers": [
            ""
        ]
    }
}

The missing files follow this BIDS structure: https://github.com/chourroutm/bids-xpct-example/tree/main/micr_XPCTzarr

balbasty added the enhancement New feature or request label Nov 13, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Write BIDS json sidecar for each modalities #21

Write BIDS json sidecar for each modalities #21

balbasty commented Nov 13, 2024

chourroutm commented Nov 21, 2024

balbasty commented Nov 21, 2024

chourroutm commented Nov 26, 2024

Write BIDS json sidecar for each modalities #21

Write BIDS json sidecar for each modalities #21

Comments

balbasty commented Nov 13, 2024

chourroutm commented Nov 21, 2024

balbasty commented Nov 21, 2024

chourroutm commented Nov 26, 2024