Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write BIDS json sidecar for each modalities #21

Open
balbasty opened this issue Nov 13, 2024 · 3 comments
Open

Write BIDS json sidecar for each modalities #21

balbasty opened this issue Nov 13, 2024 · 3 comments
Labels
enhancement New feature or request

Comments

@balbasty
Copy link
Collaborator

We should write BIDS-compatible sidecar JSON's for most of these files. But we need data-generating sites to provide us with proper metadata.

See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/microscopy.html#microscopy-metadata-sidecar-json

We dhould probably also save info about the data generating site themselves, similar to what's in the MRI sidecars

See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetic-resonance-imaging-data.html#institution-information

@balbasty balbasty added the enhancement New feature or request label Nov 13, 2024
@chourroutm
Copy link

This is the BIDS sidecar JSON we got for the HiP-CT data: micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json; it is automatically derived from the metadata generated with HiP-CT release (@dstansby maybe you'd like to develop?)

@balbasty
Copy link
Collaborator Author

Thanks @chourroutm ! That's exactly what we'd like to do for the other modalities :)

@chourroutm
Copy link

Are you interested in our script to populate it?

"metadata.py"

from pathlib import Path
import json
import pandas

path_input_metadata = Path("./metadata.json")

with open(path_input_metadata, 'r') as input_file:
    input_metadata = json.load(input_file)

# BIDS convention
path_participants_schema = Path("../participants.json")

# Per BIDS convention, the second part of `participant_id` is only alphanumeric 
# without dashes; thus, I chose to use the numeric placeholder `01` and 
# introduce a `participant_name` alongside
participant_id = "sub-01"

# BIDS convention
path_samples_schema = Path("../samples.json")

# Per BIDS convention, the second part of `participant_id` is only alphanumeric 
# without dashes; thus, I chose to use `brain` here... it needs to be adapted!
sample_id = "sample-brain"

path_sessions_schema = Path(f"../{participant_id}/{participant_id}_sessions.json")

# Per BIDS convention, the second part of `session_id` is only alphanumeric 
# without dashes; thus, I chose to use the numeric placeholder `01`. Sessions 
# have a unique `session_doi` (this may vary across facilities) and a unique 
# 3-entry key: `acq_time` + `proposal_facility` + `proposal_reference`
session_id = "ses-01"

# This file is not part of the BIDS structure but it was convenient to use the 
# same JSON template to define the keys.
path_sidecar_file_to_dataset_schema = Path("./sidecar_file_to_dataset_schema.json")

# This file IS part of the BIDS structure
path_sidecar_file_to_dataset = Path(f"../{participant_id}/{session_id}/micr/{participant_id}_{session_id}_{sample_id}_XPCT.json")

# Helper function to convert a string to title case
def to_title_case(snake_str):
    components = snake_str.split('_')
    return ''.join(x.title() for x in components)

# Recursive function to update all keys to title case
def keys_to_title_case(obj):
    if isinstance(obj, dict):
        new_obj = {}
        for key, value in obj.items():
            new_key = to_title_case(key)
            new_obj[new_key] = value
        return new_obj
    elif isinstance(obj, list):
        return [keys_to_title_case(item) for item in obj]
    else:
        return obj

with open(path_participants_schema, 'r') as participants_schema_file:
    participants_schema = json.load(participants_schema_file)
print(participants_schema.keys())
participants_obj = dict.fromkeys(participants_schema.keys(),None)
participants_obj['participant_id'] = participant_id # BIDS convention
participants_obj['participant_name'] = input_metadata['donor']['id']
participants_obj['age'] = input_metadata['donor']['age']
participants_obj['sex'] = input_metadata['donor']['sex']
participants_obj['weight'] = input_metadata['donor']['weight']
participants_obj['height'] = input_metadata['donor']['height']
participants_obj['medical_information'] = input_metadata['donor']['medical_history'] + "; died from " + input_metadata['donor']['cause_of_death']
participants_obj['species'] = "homo sapiens" # BIDS convention
participants_obj['institute'] = "Laboratoire d'Anatomie des Alpes Françaises" if input_metadata['donor']['id'].startswith("LADAF") else None

pandas.DataFrame(participants_obj).to_csv(path_participants_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_samples_schema, 'r') as samples_schema_file:
    samples_schema = json.load(samples_schema_file)
print(samples_schema.keys())
samples_obj = dict.fromkeys(samples_schema.keys(),None)
samples_obj['sample_id'] = sample_id
samples_obj['participant_id'] = participant_id
samples_obj['sample_type'] = "tissue" # BIDS convention
#samples_obj['sample_info'] = ""
#samples_obj['sample_preparation'] = ""

pandas.DataFrame(samples_obj).to_csv(path_samples_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_sessions_schema, 'r') as sessions_schema_file:
    sessions_schema = json.load(sessions_schema_file)
print(sessions_schema.keys())
sessions_obj = dict.fromkeys(sessions_schema.keys(),None)
sessions_obj['session_id'] = session_id # BIDS convention
# Note that the following line might not be accurate as it is not necessarily 
# the first day of the beamtime
#sessions_obj['acq_time'] = f"{input_metadata['scan']['date']}T00:00:00"
sessions_obj['proposal_facility'] = "ESRF"
sessions_obj['proposal_reference'] = input_metadata['proposal']['proposal_number']
sessions_obj['proposal_title'] = input_metadata['proposal']['title']
sessions_obj['proposal_members'] = ", ".join(input_metadata['proposal']['proposers'])

pandas.DataFrame(sessions_obj).to_csv(path_sessions_schema.with_suffix(".tsv"), sep='\t', index=False)

with open(path_sidecar_file_to_dataset_schema, 'r') as sidecar_file_to_dataset_schema_file:
    sidecar_file_to_dataset_schema = json.load(sidecar_file_to_dataset_schema_file)
print(sidecar_file_to_dataset_schema.keys())
# NB: this file uses TitleCase for the keys of the JSON.
sidecar_file_to_dataset_obj = dict.fromkeys(sidecar_file_to_dataset_schema.keys(),None)
sidecar_file_to_dataset_obj['InstitutionName'] = "European Synchrotron Radiation Facility"
sidecar_file_to_dataset_obj['StationName'] = "BM18 EBS" if input_metadata['name'].endswith("_bm18") else None
sidecar_file_to_dataset_obj['BodyPart'] = "BRAIN" if input_metadata['organ'] == "brain" else None
sidecar_file_to_dataset_obj['SampleEnvironment'] = "ex vivo"
sidecar_file_to_dataset_obj['SampleFixation'] = "formalin"
#sidecar_file_to_dataset_obj['SampleEmbedding'] = "mixed agar gel at 70% ethanol"
sidecar_file_to_dataset_obj['PixelSize'] = [input_metadata['resolution_um']] * 3
sidecar_file_to_dataset_obj['PixelSizeUnits'] = "um"
sidecar_file_to_dataset_obj['AcquisitionParameters'] = keys_to_title_case(input_metadata['scan'])
sidecar_file_to_dataset_obj['ProcessingParameters']['RefApproach'] = None
sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeX'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeY'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeZ'] = input_metadata['shape']

with open(path_sidecar_file_to_dataset,'r') as sidecar_file_to_dataset_file:
    json.dump(sidecar_file_to_dataset_obj,sidecar_file_to_dataset_file)

"sidecar_file_to_dataset_schema.json"

{
        "InstitutionName": {
            "Description": "facility where the data was acquired, e.g. ESRF"
        },
        "StationName": {
            "Description": "the name of the instrument within the facility such as the beamline, e.g. BM18 EBS"
        },
        "BodyPart": {
            "Description": "per BIDS convention, see column 'Body Part Examined' from https://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_L.html#chapter_L"
        },
        "SampleEnvironment": {
            "Description": "per BIDS convention, how the sample was scanned",
            "Levels": {
                "ex vivo": "ex vivo",
                "in vivo": "in vivo",
                "in vitro": "in vitro"
                }
        },
        "SampleFixation": {
            "Description": "free-text details of the fixation"
        },
        "SampleEmbedding": {
            "Description": "free-text details of the embedding"
        },
        "PixelSize": {
            "Description": "per BIDS convention, three-value list"
        },
        "PixelSizeUnits": {
            "Description": "per BIDS convention",
            "Levels": {
                "mm": "millimeters",
                "um": "micrometers",
                "nm": "nanometers"
                }
        },
        "AcquisitionParameters": {
            "Description": "free-content entry"
        },
        "ProcessingParameters": {
            "Description": "free-content entry, should contain VolumeX, VolumeY and VolumeZ in pixels"
        }
}    

"metadata.json" (with dummy data)

{
    "name": "",
    "organ": "brain",
    "organ_context": null,
    "scan_type": "",
    "roi": "",
    "resolution_um": 1,
    "shape": [
        1000,
        1000,
        1000
    ],
    "donor": {
        "id": "",
        "age": 70,
        "sex": "",
        "weight": 100,
        "height": 200,
        "cause_of_death": "",
        "date_of_death": "1970-01-01",
        "medical_history": "",
        "diabetes": null,
        "hypertension": null,
        "smoker": null
    },
    "scan": {
        "date": "1970-01-01",
        "energy": 1,
        "current_start": null,
        "filling_mode": null,
        "n_projections": 1,
        "n_ref": 1,
        "n_dark": 1,
        "latency_time": null,
        "exposure_time": 1,
        "accumulation_n_frames": null,
        "scan_type": "",
        "scan_range": 1,
        "n_scans": 1,
        "acquisition": "",
        "half_acquisition_value": 1,
        "quarter_acquisition_value": 1,
        "z_step": null,
        "scan_time": null,
        "filters": null,
        "scintillator": null,
        "optic": null,
        "distance_sample_detector": 1,
        "sensor_name": null,
        "sensor_mode": null,
        "sensor_magnification": null,
        "sensor_roi_x_size": 1,
        "sensor_roi_y_size": 1,
        "pixel_size": 1,
        "xray_magnification": null
    },
    "proposal": {
        "proposal_number": "xx1111",
        "title": "",
        "proposers": [
            ""
        ]
    }
}

The missing files follow this BIDS structure: https://github.com/chourroutm/bids-xpct-example/tree/main/micr_XPCTzarr

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

2 participants