-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Write BIDS json sidecar for each modalities #21
Comments
This is the BIDS sidecar JSON we got for the HiP-CT data: micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json; it is automatically derived from the metadata generated with HiP-CT release (@dstansby maybe you'd like to develop?) |
Thanks @chourroutm ! That's exactly what we'd like to do for the other modalities :) |
Are you interested in our script to populate it? "metadata.py"
from pathlib import Path
import json
import pandas
path_input_metadata = Path("./metadata.json")
with open(path_input_metadata, 'r') as input_file:
input_metadata = json.load(input_file)
# BIDS convention
path_participants_schema = Path("../participants.json")
# Per BIDS convention, the second part of `participant_id` is only alphanumeric
# without dashes; thus, I chose to use the numeric placeholder `01` and
# introduce a `participant_name` alongside
participant_id = "sub-01"
# BIDS convention
path_samples_schema = Path("../samples.json")
# Per BIDS convention, the second part of `participant_id` is only alphanumeric
# without dashes; thus, I chose to use `brain` here... it needs to be adapted!
sample_id = "sample-brain"
path_sessions_schema = Path(f"../{participant_id}/{participant_id}_sessions.json")
# Per BIDS convention, the second part of `session_id` is only alphanumeric
# without dashes; thus, I chose to use the numeric placeholder `01`. Sessions
# have a unique `session_doi` (this may vary across facilities) and a unique
# 3-entry key: `acq_time` + `proposal_facility` + `proposal_reference`
session_id = "ses-01"
# This file is not part of the BIDS structure but it was convenient to use the
# same JSON template to define the keys.
path_sidecar_file_to_dataset_schema = Path("./sidecar_file_to_dataset_schema.json")
# This file IS part of the BIDS structure
path_sidecar_file_to_dataset = Path(f"../{participant_id}/{session_id}/micr/{participant_id}_{session_id}_{sample_id}_XPCT.json")
# Helper function to convert a string to title case
def to_title_case(snake_str):
components = snake_str.split('_')
return ''.join(x.title() for x in components)
# Recursive function to update all keys to title case
def keys_to_title_case(obj):
if isinstance(obj, dict):
new_obj = {}
for key, value in obj.items():
new_key = to_title_case(key)
new_obj[new_key] = value
return new_obj
elif isinstance(obj, list):
return [keys_to_title_case(item) for item in obj]
else:
return obj
with open(path_participants_schema, 'r') as participants_schema_file:
participants_schema = json.load(participants_schema_file)
print(participants_schema.keys())
participants_obj = dict.fromkeys(participants_schema.keys(),None)
participants_obj['participant_id'] = participant_id # BIDS convention
participants_obj['participant_name'] = input_metadata['donor']['id']
participants_obj['age'] = input_metadata['donor']['age']
participants_obj['sex'] = input_metadata['donor']['sex']
participants_obj['weight'] = input_metadata['donor']['weight']
participants_obj['height'] = input_metadata['donor']['height']
participants_obj['medical_information'] = input_metadata['donor']['medical_history'] + "; died from " + input_metadata['donor']['cause_of_death']
participants_obj['species'] = "homo sapiens" # BIDS convention
participants_obj['institute'] = "Laboratoire d'Anatomie des Alpes Françaises" if input_metadata['donor']['id'].startswith("LADAF") else None
pandas.DataFrame(participants_obj).to_csv(path_participants_schema.with_suffix(".tsv"), sep='\t', index=False)
with open(path_samples_schema, 'r') as samples_schema_file:
samples_schema = json.load(samples_schema_file)
print(samples_schema.keys())
samples_obj = dict.fromkeys(samples_schema.keys(),None)
samples_obj['sample_id'] = sample_id
samples_obj['participant_id'] = participant_id
samples_obj['sample_type'] = "tissue" # BIDS convention
#samples_obj['sample_info'] = ""
#samples_obj['sample_preparation'] = ""
pandas.DataFrame(samples_obj).to_csv(path_samples_schema.with_suffix(".tsv"), sep='\t', index=False)
with open(path_sessions_schema, 'r') as sessions_schema_file:
sessions_schema = json.load(sessions_schema_file)
print(sessions_schema.keys())
sessions_obj = dict.fromkeys(sessions_schema.keys(),None)
sessions_obj['session_id'] = session_id # BIDS convention
# Note that the following line might not be accurate as it is not necessarily
# the first day of the beamtime
#sessions_obj['acq_time'] = f"{input_metadata['scan']['date']}T00:00:00"
sessions_obj['proposal_facility'] = "ESRF"
sessions_obj['proposal_reference'] = input_metadata['proposal']['proposal_number']
sessions_obj['proposal_title'] = input_metadata['proposal']['title']
sessions_obj['proposal_members'] = ", ".join(input_metadata['proposal']['proposers'])
pandas.DataFrame(sessions_obj).to_csv(path_sessions_schema.with_suffix(".tsv"), sep='\t', index=False)
with open(path_sidecar_file_to_dataset_schema, 'r') as sidecar_file_to_dataset_schema_file:
sidecar_file_to_dataset_schema = json.load(sidecar_file_to_dataset_schema_file)
print(sidecar_file_to_dataset_schema.keys())
# NB: this file uses TitleCase for the keys of the JSON.
sidecar_file_to_dataset_obj = dict.fromkeys(sidecar_file_to_dataset_schema.keys(),None)
sidecar_file_to_dataset_obj['InstitutionName'] = "European Synchrotron Radiation Facility"
sidecar_file_to_dataset_obj['StationName'] = "BM18 EBS" if input_metadata['name'].endswith("_bm18") else None
sidecar_file_to_dataset_obj['BodyPart'] = "BRAIN" if input_metadata['organ'] == "brain" else None
sidecar_file_to_dataset_obj['SampleEnvironment'] = "ex vivo"
sidecar_file_to_dataset_obj['SampleFixation'] = "formalin"
#sidecar_file_to_dataset_obj['SampleEmbedding'] = "mixed agar gel at 70% ethanol"
sidecar_file_to_dataset_obj['PixelSize'] = [input_metadata['resolution_um']] * 3
sidecar_file_to_dataset_obj['PixelSizeUnits'] = "um"
sidecar_file_to_dataset_obj['AcquisitionParameters'] = keys_to_title_case(input_metadata['scan'])
sidecar_file_to_dataset_obj['ProcessingParameters']['RefApproach'] = None
sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeX'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeY'], sidecar_file_to_dataset_obj['ProcessingParameters']['VolumeZ'] = input_metadata['shape']
with open(path_sidecar_file_to_dataset,'r') as sidecar_file_to_dataset_file:
json.dump(sidecar_file_to_dataset_obj,sidecar_file_to_dataset_file) "sidecar_file_to_dataset_schema.json"
{
"InstitutionName": {
"Description": "facility where the data was acquired, e.g. ESRF"
},
"StationName": {
"Description": "the name of the instrument within the facility such as the beamline, e.g. BM18 EBS"
},
"BodyPart": {
"Description": "per BIDS convention, see column 'Body Part Examined' from https://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_L.html#chapter_L"
},
"SampleEnvironment": {
"Description": "per BIDS convention, how the sample was scanned",
"Levels": {
"ex vivo": "ex vivo",
"in vivo": "in vivo",
"in vitro": "in vitro"
}
},
"SampleFixation": {
"Description": "free-text details of the fixation"
},
"SampleEmbedding": {
"Description": "free-text details of the embedding"
},
"PixelSize": {
"Description": "per BIDS convention, three-value list"
},
"PixelSizeUnits": {
"Description": "per BIDS convention",
"Levels": {
"mm": "millimeters",
"um": "micrometers",
"nm": "nanometers"
}
},
"AcquisitionParameters": {
"Description": "free-content entry"
},
"ProcessingParameters": {
"Description": "free-content entry, should contain VolumeX, VolumeY and VolumeZ in pixels"
}
} "metadata.json" (with dummy data)
{
"name": "",
"organ": "brain",
"organ_context": null,
"scan_type": "",
"roi": "",
"resolution_um": 1,
"shape": [
1000,
1000,
1000
],
"donor": {
"id": "",
"age": 70,
"sex": "",
"weight": 100,
"height": 200,
"cause_of_death": "",
"date_of_death": "1970-01-01",
"medical_history": "",
"diabetes": null,
"hypertension": null,
"smoker": null
},
"scan": {
"date": "1970-01-01",
"energy": 1,
"current_start": null,
"filling_mode": null,
"n_projections": 1,
"n_ref": 1,
"n_dark": 1,
"latency_time": null,
"exposure_time": 1,
"accumulation_n_frames": null,
"scan_type": "",
"scan_range": 1,
"n_scans": 1,
"acquisition": "",
"half_acquisition_value": 1,
"quarter_acquisition_value": 1,
"z_step": null,
"scan_time": null,
"filters": null,
"scintillator": null,
"optic": null,
"distance_sample_detector": 1,
"sensor_name": null,
"sensor_mode": null,
"sensor_magnification": null,
"sensor_roi_x_size": 1,
"sensor_roi_y_size": 1,
"pixel_size": 1,
"xray_magnification": null
},
"proposal": {
"proposal_number": "xx1111",
"title": "",
"proposers": [
""
]
}
} The missing files follow this BIDS structure: https://github.com/chourroutm/bids-xpct-example/tree/main/micr_XPCTzarr |
We should write BIDS-compatible sidecar JSON's for most of these files. But we need data-generating sites to provide us with proper metadata.
See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/microscopy.html#microscopy-metadata-sidecar-json
We dhould probably also save info about the data generating site themselves, similar to what's in the MRI sidecars
See: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetic-resonance-imaging-data.html#institution-information
The text was updated successfully, but these errors were encountered: