diff --git a/.gitignore b/.gitignore index 213258b..daff4cb 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,5 @@ dmypy.json #misc endpoint_schemas/ tests/ +src/ibl_to_nwb/local/ +.vscode diff --git a/src/ibl_to_nwb/_scripts/_convert_brainwide_map.py b/src/ibl_to_nwb/_scripts/_convert_brainwide_map.py new file mode 100644 index 0000000..fab3a20 --- /dev/null +++ b/src/ibl_to_nwb/_scripts/_convert_brainwide_map.py @@ -0,0 +1,210 @@ +import os +import shutil +import sys +from datetime import datetime +from pathlib import Path + +import spikeglx + +# if running on SDSC, use the OneSdsc, else normal +if "USE_SDSC_ONE" in os.environ: + print("using SDSC ONE") + from deploy.iblsdsc import OneSdsc as ONE +else: + print("using regular ONE") + from one.api import ONE + +from ibl_to_nwb.converters import BrainwideMapConverter, IblSpikeGlxConverter +from ibl_to_nwb.datainterfaces import ( + BrainwideMapTrialsInterface, + IblPoseEstimationInterface, + IblSortingInterface, + LickInterface, + PupilTrackingInterface, + RawVideoInterface, + RoiMotionEnergyInterface, + WheelInterface, +) + + +def create_symlinks(source_dir, target_dir, remove_uuid=True): + """replicates the tree under source_dir at target dir in the form of symlinks""" + for root, dirs, files in os.walk(source_dir): + for dir in dirs: + folder = target_dir / (Path(root) / dir).relative_to(source_dir) + folder.mkdir(parents=True, exist_ok=True) + + for root, dirs, files in os.walk(source_dir): + for file in files: + source_file_path = Path(root) / file + target_file_path = target_dir / source_file_path.relative_to(source_dir) + if remove_uuid: + parent, name = target_file_path.parent, target_file_path.name + name_parts = name.split(".") + name_parts.remove(name_parts[-2]) + target_file_path = parent / ".".join(name_parts) + if not target_file_path.exists(): + target_file_path.symlink_to(source_file_path) + + +def get_last_before(eid: str, one: ONE, revision: str): + revisions = one.list_revisions(eid, revision="*") + revisions = [datetime.strptime(revision, "%Y-%m-%d") for revision in revisions] + revision = datetime.strptime(revision, "%Y-%m-%d") + revisions = sorted(revisions) + ix = sum([not (rev > revision) for rev in revisions]) - 1 + return revisions[ix].strftime("%Y-%m-%d") + + +def convert(eid: str, one: ONE, data_interfaces: list, revision: str, mode: str): + # Run conversion + session_converter = BrainwideMapConverter(one=one, session=eid, data_interfaces=data_interfaces, verbose=True) + metadata = session_converter.get_metadata() + metadata["NWBFile"]["session_id"] = f"{eid}:{revision}" # FIXME this hack has to go + subject_id = metadata["Subject"]["subject_id"] + + subject_folder_path = output_folder / f"sub-{subject_id}" + subject_folder_path.mkdir(exist_ok=True) + if mode == "raw": + fname = f"sub-{subject_id}_ses-{eid}_desc-raw_ecephys+image.nwb" + if mode == "processed": + fname = f"sub-{subject_id}_ses-{eid}_desc-processed_behavior+ecephys.nwb" + + nwbfile_path = subject_folder_path / fname + session_converter.run_conversion( + nwbfile_path=nwbfile_path, + metadata=metadata, + overwrite=True, + ) + return nwbfile_path + + +cleanup = False + +if __name__ == "__main__": + if len(sys.argv) == 1: + eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" + mode = "raw" + else: + eid = sys.argv[1] + mode = sys.argv[2] # raw or processed + + print(eid) + print(mode) + + # path setup + base_path = Path.home() / "ibl_scratch" + output_folder = base_path / "nwbfiles" + output_folder.mkdir(exist_ok=True, parents=True) + local_scratch_folder = base_path / eid + + # common + one_kwargs = dict( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + mode="remote", + ) + + # if not running on SDSC adding the cache folder explicitly + if "USE_SDSC_ONE" in os.environ: + one_kwargs["cache_rest"] = None # disables rest caching (write permission errors on popeye) + else: + # Initialize IBL (ONE) client to download processed data for this session + one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" + one_kwargs["cache_dir"] = one_cache_folder_path + + # instantiate one + one = ONE(**one_kwargs) + + # correct revision + revision = get_last_before(eid=eid, one=one, revision="2024-07-10") + + # Initialize as many of each interface as we need across the streams + data_interfaces = [] + + if mode == "raw": + # ephys + session_folder = one.eid2path(eid) + spikeglx_source_folder_path = session_folder / "raw_ephys_data" + + # create symlinks at local scratch + create_symlinks(spikeglx_source_folder_path, local_scratch_folder) + + # check and decompress + cbin_paths = [] + for root, dirs, files in os.walk(local_scratch_folder): + for file in files: + if file.endswith(".cbin"): + cbin_paths.append(Path(root) / file) + + for path in cbin_paths: + if not path.with_suffix(".bin").exists(): + print(f"decompressing {path}") + spikeglx.Reader(path).decompress_to_scratch() + + # Specify the path to the SpikeGLX files on the server but use ONE API for timestamps + spikeglx_subconverter = IblSpikeGlxConverter(folder_path=spikeglx_source_folder_path, one=one, eid=eid) + data_interfaces.append(spikeglx_subconverter) + + # video + metadata_retrieval = BrainwideMapConverter(one=one, session=eid, data_interfaces=[], verbose=False) + subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"] + + pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") + for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + + video_interface = RawVideoInterface( + nwbfiles_folder_path=output_folder, + subject_id=subject_id, + one=one, + session=eid, + camera_name=camera_name, + ) + data_interfaces.append(video_interface) + + if mode == "processed": + # These interfaces should always be present in source data + data_interfaces.append(IblSortingInterface(one=one, session=eid, revision=revision)) + data_interfaces.append(BrainwideMapTrialsInterface(one=one, session=eid, revision=revision)) + data_interfaces.append(WheelInterface(one=one, session=eid, revision=revision)) + + # # These interfaces may not be present; check if they are before adding to list + pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") + for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + data_interfaces.append( + IblPoseEstimationInterface(one=one, session=eid, camera_name=camera_name, revision=revision) + ) + + pupil_tracking_files = one.list_datasets(eid=eid, filename="*features*") + for pupil_tracking_file in pupil_tracking_files: + camera_name = pupil_tracking_file.replace("alf/_ibl_", "").replace(".features.pqt", "") + data_interfaces.append( + PupilTrackingInterface(one=one, session=eid, camera_name=camera_name, revision=revision) + ) + + roi_motion_energy_files = one.list_datasets(eid=eid, filename="*ROIMotionEnergy.npy*") + for roi_motion_energy_file in roi_motion_energy_files: + camera_name = roi_motion_energy_file.replace("alf/", "").replace(".ROIMotionEnergy.npy", "") + data_interfaces.append( + RoiMotionEnergyInterface(one=one, session=eid, camera_name=camera_name, revision=revision) + ) + + if one.list_datasets(eid=eid, collection="alf", filename="licks*"): + data_interfaces.append(LickInterface(one=one, session=eid, revision=revision)) + + # run the conversion + nwbfile_path = convert( + eid=eid, + one=one, + data_interfaces=data_interfaces, + revision=revision, + mode=mode, + ) + + # cleanup + if cleanup: + if mode == "raw": + os.system(f"find {local_scratch_folder} -type l -exec unlink {{}} \;") + shutil.rmtree(local_scratch_folder) diff --git a/src/ibl_to_nwb/_scripts/_convert_brainwide_map_processed.py b/src/ibl_to_nwb/_scripts/_convert_brainwide_map_processed.py new file mode 100644 index 0000000..0853d9b --- /dev/null +++ b/src/ibl_to_nwb/_scripts/_convert_brainwide_map_processed.py @@ -0,0 +1,106 @@ +import os +import sys +from datetime import datetime +from pathlib import Path + +if "USE_SDSC_ONE" in os.envion: + from deploy.iblsdsc import OneSdsc as ONE +else: + from one.api import ONE + +from ibl_to_nwb.converters import BrainwideMapConverter +from ibl_to_nwb.datainterfaces import ( + BrainwideMapTrialsInterface, + IblPoseEstimationInterface, + IblSortingInterface, + LickInterface, + PupilTrackingInterface, + RoiMotionEnergyInterface, + WheelInterface, +) + + +def get_last_before(eid: str, one: ONE, revision: str): + revisions = one.list_revisions(eid, revision="*") + revisions = [datetime.strptime(revision, "%Y-%m-%d") for revision in revisions] + revision = datetime.strptime(revision, "%Y-%m-%d") + revisions = sorted(revisions) + ix = sum([not (rev > revision) for rev in revisions]) - 1 + return revisions[ix].strftime("%Y-%m-%d") + + +def convert(eid: str, one: ONE, data_interfaces: list, revision: str): + # Run conversion + session_converter = BrainwideMapConverter(one=one, session=eid, data_interfaces=data_interfaces, verbose=True) + metadata = session_converter.get_metadata() + metadata["NWBFile"]["session_id"] = f"{eid}:{revision}" # FIXME this hack has to go + subject_id = metadata["Subject"]["subject_id"] + + subject_folder_path = output_folder / f"sub-{subject_id}" + subject_folder_path.mkdir(exist_ok=True) + fname = f"sub-{subject_id}_ses-{eid}_desc-processed.nwb" + + nwbfile_path = subject_folder_path / fname + session_converter.run_conversion( + nwbfile_path=nwbfile_path, + metadata=metadata, + overwrite=True, + ) + return nwbfile_path + + +if __name__ == "__main__": + if len(sys.argv) == 1: + eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" + else: + eid = sys.argv[1] + + # path setup + base_path = Path.home() / "ibl_scratch" + output_folder = base_path / "nwbfiles" + output_folder.mkdir(exist_ok=True, parents=True) + + # Initialize IBL (ONE) client to download processed data for this session + one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" + one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + mode="remote", + # silent=True, + cache_dir=one_cache_folder_path, + ) + + revision = get_last_before(eid=eid, one=one, revision="2024-07-10") + + # Initialize as many of each interface as we need across the streams + data_interfaces = list() + + # These interfaces should always be present in source data + data_interfaces.append(IblSortingInterface(one=one, session=eid, revision=revision)) + data_interfaces.append(BrainwideMapTrialsInterface(one=one, session=eid, revision=revision)) + data_interfaces.append(WheelInterface(one=one, session=eid, revision=revision)) + + # # These interfaces may not be present; check if they are before adding to list + pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") + for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + data_interfaces.append( + IblPoseEstimationInterface(one=one, session=eid, camera_name=camera_name, revision=revision) + ) + + pupil_tracking_files = one.list_datasets(eid=eid, filename="*features*") + for pupil_tracking_file in pupil_tracking_files: + camera_name = pupil_tracking_file.replace("alf/_ibl_", "").replace(".features.pqt", "") + data_interfaces.append(PupilTrackingInterface(one=one, session=eid, camera_name=camera_name, revision=revision)) + + roi_motion_energy_files = one.list_datasets(eid=eid, filename="*ROIMotionEnergy.npy*") + for roi_motion_energy_file in roi_motion_energy_files: + camera_name = roi_motion_energy_file.replace("alf/", "").replace(".ROIMotionEnergy.npy", "") + data_interfaces.append( + RoiMotionEnergyInterface(one=one, session=eid, camera_name=camera_name, revision=revision) + ) + + if one.list_datasets(eid=eid, collection="alf", filename="licks*"): + data_interfaces.append(LickInterface(one=one, session=eid, revision=revision)) + + nwbfile_path = convert(eid=eid, one=one, data_interfaces=data_interfaces, revision=revision) diff --git a/src/ibl_to_nwb/_scripts/_convert_brainwide_map_raw.py b/src/ibl_to_nwb/_scripts/_convert_brainwide_map_raw.py new file mode 100644 index 0000000..e16180b --- /dev/null +++ b/src/ibl_to_nwb/_scripts/_convert_brainwide_map_raw.py @@ -0,0 +1,95 @@ +import os +import sys +from datetime import datetime +from pathlib import Path + +if "USE_SDSC_ONE" in os.envion: + from deploy.iblsdsc import OneSdsc as ONE +else: + from one.api import ONE + +from ibl_to_nwb.converters import BrainwideMapConverter, IblSpikeGlxConverter +from ibl_to_nwb.datainterfaces import RawVideoInterface + + +def get_last_before(eid: str, one: ONE, revision: str): + revisions = one.list_revisions(eid, revision="*") + revisions = [datetime.strptime(revision, "%Y-%m-%d") for revision in revisions] + revision = datetime.strptime(revision, "%Y-%m-%d") + revisions = sorted(revisions) + ix = sum([not (rev > revision) for rev in revisions]) - 1 + return revisions[ix].strftime("%Y-%m-%d") + + +def convert(eid: str, one: ONE, data_interfaces: list, raw: bool, revision: str): + # Run conversion + session_converter = BrainwideMapConverter(one=one, session=eid, data_interfaces=data_interfaces, verbose=True) + metadata = session_converter.get_metadata() + metadata["NWBFile"]["session_id"] = f"{eid}:{revision}" # FIXME this hack has to go + subject_id = metadata["Subject"]["subject_id"] + + subject_folder_path = output_folder / f"sub-{subject_id}" + subject_folder_path.mkdir(exist_ok=True) + fname = f"sub-{subject_id}_ses-{eid}_desc-raw.nwb" + + nwbfile_path = subject_folder_path / fname + session_converter.run_conversion( + nwbfile_path=nwbfile_path, + metadata=metadata, + overwrite=True, + ) + return nwbfile_path + + +if __name__ == "__main__": + if len(sys.argv) == 1: + eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" + else: + eid = sys.argv[1] + + # path setup + base_path = Path.home() / "ibl_scratch" + output_folder = base_path / "nwbfiles" + output_folder.mkdir(exist_ok=True, parents=True) + + # Initialize IBL (ONE) client to download processed data for this session + one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" + one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + mode="remote", + # silent=True, + cache_dir=one_cache_folder_path, + ) + + revision = get_last_before(eid=eid, one=one, revision="2024-07-10") + + # Initialize as many of each interface as we need across the streams + data_interfaces = list() + + # ephys + session_folder = one.eid2path(eid) + spikeglx_source_folder_path = session_folder / "raw_ephys_data" + + # Specify the path to the SpikeGLX files on the server but use ONE API for timestamps + spikeglx_subconverter = IblSpikeGlxConverter(folder_path=spikeglx_source_folder_path, one=one, eid=eid) + data_interfaces.append(spikeglx_subconverter) + + # video + metadata_retrieval = BrainwideMapConverter(one=one, session=eid, data_interfaces=[], verbose=False) + subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"] + + pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") + for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + + video_interface = RawVideoInterface( + nwbfiles_folder_path=output_folder, + subject_id=subject_id, + one=one, + session=eid, + camera_name=camera_name, + ) + data_interfaces.append(video_interface) + + nwbfile_path = convert(eid=eid, one=one, data_interfaces=data_interfaces, raw=False, revision=revision) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py index af1ba5e..3fb6c4c 100644 --- a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only.py @@ -14,13 +14,15 @@ ) from ibl_to_nwb.testing import check_written_nwbfile_for_consistency -session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71" +# session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71" +session_id = "caa5dddc-9290-4e27-9f5e-575ba3598614" # a BWM session with dual probe # Specify the revision of the pose estimation data # Setting to 'None' will use whatever the latest released revision is revision = None -base_path = Path("E:/IBL") +# base_path = Path("E:/IBL") +base_path = Path.home() / "ibl_scratch" # local directory base_path.mkdir(exist_ok=True) nwbfiles_folder_path = base_path / "nwbfiles" nwbfiles_folder_path.mkdir(exist_ok=True) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only_local_testing.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only_local_testing.py index 8200505..7bac904 100644 --- a/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only_local_testing.py +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_processed_only_local_testing.py @@ -1,30 +1,10 @@ -import os - -os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # Annoying - -import os - -# import traceback -# from concurrent.futures import ProcessPoolExecutor, as_completed from pathlib import Path -from shutil import rmtree - -# from tempfile import mkdtemp -# from dandi.download import download as dandi_download -# from dandi.organize import organize as dandi_organize -# from dandi.upload import upload as dandi_upload -# from neuroconv.tools.data_transfers import automatic_dandi_upload -# from nwbinspector.tools import get_s3_urls_and_dandi_paths + from one.api import ONE -# from pynwb import NWBHDF5IO -# from pynwb.image import ImageSeries -# from tqdm import tqdm -from ibl_to_nwb.brainwide_map import BrainwideMapConverter -from ibl_to_nwb.brainwide_map.datainterfaces import ( - BrainwideMapTrialsInterface, -) +from ibl_to_nwb.converters import BrainwideMapConverter from ibl_to_nwb.datainterfaces import ( + BrainwideMapTrialsInterface, IblPoseEstimationInterface, IblSortingInterface, LickInterface, @@ -32,80 +12,78 @@ RoiMotionEnergyInterface, WheelInterface, ) +from ibl_to_nwb.testing._consistency_checks import check_written_nwbfile_for_consistency -base_path = Path.home() / "ibl_scratch" # local directory -# session = "d32876dd-8303-4720-8e7e-20678dc2fd71" -session = "caa5dddc-9290-4e27-9f5e-575ba3598614" # a BWM session with dual probe +# select eid +# -> run download_data_local first with this eid to set up the local folder structure and one cache +eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" -nwbfile_path = base_path / "nwbfiles" / session / f"{session}.nwb" -nwbfile_path.parent.mkdir(exist_ok=True) +# folders +base_path = Path.home() / "ibl_scratch" +base_path.mkdir(exist_ok=True) +nwbfiles_folder_path = base_path / "nwbfiles" +nwbfiles_folder_path.mkdir(exist_ok=True) stub_test: bool = False cleanup: bool = False # assert len(os.environ.get("DANDI_API_KEY", "")) > 0, "Run `export DANDI_API_KEY=...`!" +revision = None -nwbfile_path.parent.mkdir(exist_ok=True) - -# Download behavior and spike sorted data for this session -session_path = base_path / "ibl_conversion" / session -cache_folder = base_path / "ibl_conversion" / session / "cache" -session_one = ONE( +# Initialize IBL (ONE) client to download processed data for this session +one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" +one = ONE( base_url="https://openalyx.internationalbrainlab.org", password="international", - silent=False, - cache_dir=cache_folder, + silent=True, + cache_dir=one_cache_folder_path, ) # Initialize as many of each interface as we need across the streams data_interfaces = list() # These interfaces should always be present in source data -data_interfaces.append(IblSortingInterface(session=session, cache_folder=cache_folder / "sorting")) -data_interfaces.append(BrainwideMapTrialsInterface(one=session_one, session=session)) -data_interfaces.append(WheelInterface(one=session_one, session=session)) +data_interfaces.append(IblSortingInterface(one=one, session=eid, revision=revision)) +data_interfaces.append(BrainwideMapTrialsInterface(one=one, session=eid, revision=revision)) +data_interfaces.append(WheelInterface(one=one, session=eid, revision=revision)) # These interfaces may not be present; check if they are before adding to list -pose_estimation_files = session_one.list_datasets(eid=session, filename="*.dlc*") +pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") for pose_estimation_file in pose_estimation_files: camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") - data_interfaces.append( - IblPoseEstimationInterface( - one=session_one, session=session, camera_name=camera_name, include_pose=True, include_video=False - ) - ) + data_interfaces.append(IblPoseEstimationInterface(one=one, session=eid, camera_name=camera_name, revision=revision)) -pupil_tracking_files = session_one.list_datasets(eid=session, filename="*features*") +pupil_tracking_files = one.list_datasets(eid=eid, filename="*features*") for pupil_tracking_file in pupil_tracking_files: camera_name = pupil_tracking_file.replace("alf/_ibl_", "").replace(".features.pqt", "") - data_interfaces.append(PupilTrackingInterface(one=session_one, session=session, camera_name=camera_name)) + data_interfaces.append(PupilTrackingInterface(one=one, session=eid, camera_name=camera_name, revision=revision)) -roi_motion_energy_files = session_one.list_datasets(eid=session, filename="*ROIMotionEnergy.npy*") +roi_motion_energy_files = one.list_datasets(eid=eid, filename="*ROIMotionEnergy.npy*") for roi_motion_energy_file in roi_motion_energy_files: camera_name = roi_motion_energy_file.replace("alf/", "").replace(".ROIMotionEnergy.npy", "") - data_interfaces.append(RoiMotionEnergyInterface(one=session_one, session=session, camera_name=camera_name)) + data_interfaces.append(RoiMotionEnergyInterface(one=one, session=eid, camera_name=camera_name, revision=revision)) -if session_one.list_datasets(eid=session, collection="alf", filename="licks*"): - data_interfaces.append(LickInterface(one=session_one, session=session)) +if one.list_datasets(eid=eid, collection="alf", filename="licks*"): + data_interfaces.append(LickInterface(one=one, session=eid, revision=revision)) # Run conversion -session_converter = BrainwideMapConverter( - one=session_one, session=session, data_interfaces=data_interfaces, verbose=True -) +session_converter = BrainwideMapConverter(one=one, session=eid, data_interfaces=data_interfaces, verbose=True) metadata = session_converter.get_metadata() -metadata["NWBFile"]["session_id"] = metadata["NWBFile"]["session_id"] + "-processed-only" +subject_id = metadata["Subject"]["subject_id"] + +subject_folder_path = nwbfiles_folder_path / f"sub-{subject_id}" +subject_folder_path.mkdir(exist_ok=True) +nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{eid}_desc-processed_.nwb" session_converter.run_conversion( nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True, ) -# automatic_dandi_upload( -# dandiset_id="000409", -# nwb_folder_path=nwbfile_path.parent, -# cleanup=cleanup, -# ) -if cleanup: - rmtree(cache_folder) - rmtree(nwbfile_path.parent) + +# if cleanup: +# rmtree(cache_folder) +# rmtree(nwbfile_path.parent) + +check_written_nwbfile_for_consistency(one=one, nwbfile_path=nwbfile_path) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py index 09388d1..0c0ebf1 100644 --- a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only.py @@ -5,13 +5,16 @@ from ibl_to_nwb.converters import BrainwideMapConverter, IblSpikeGlxConverter from ibl_to_nwb.datainterfaces import RawVideoInterface -session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71" +# session_id = "d32876dd-8303-4720-8e7e-20678dc2fd71" +session_id = "caa5dddc-9290-4e27-9f5e-575ba3598614" # a BWM session with dual probe + # Specify the revision of the pose estimation data # Setting to 'None' will use whatever the latest released revision is revision = None -base_path = Path("E:/IBL") +# base_path = Path("E:/IBL") +base_path = Path.home() / "ibl_scratch" # local directory base_path.mkdir(exist_ok=True) nwbfiles_folder_path = base_path / "nwbfiles" nwbfiles_folder_path.mkdir(exist_ok=True) @@ -28,7 +31,9 @@ # Specify the path to the SpikeGLX files on the server but use ONE API for timestamps data_interfaces = [] -spikeglx_source_folder_path = Path("D:/example_data/ephy_testing_data/spikeglx/Noise4Sam_g0") +# spikeglx_source_folder_path = Path("D:/example_data/ephy_testing_data/spikeglx/Noise4Sam_g0") +session_folder = ibl_client.eid2path(session_id) +spikeglx_source_folder_path = session_folder / "raw_ephys_data" spikeglx_subconverter = IblSpikeGlxConverter(folder_path=spikeglx_source_folder_path, one=ibl_client) data_interfaces.append(spikeglx_subconverter) diff --git a/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only_local_testing.py b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only_local_testing.py new file mode 100644 index 0000000..085b31b --- /dev/null +++ b/src/ibl_to_nwb/_scripts/convert_brainwide_map_raw_only_local_testing.py @@ -0,0 +1,69 @@ +# %% +from pathlib import Path + +from one.api import ONE + +from ibl_to_nwb.converters import BrainwideMapConverter, IblSpikeGlxConverter +from ibl_to_nwb.datainterfaces import RawVideoInterface + +# select eid +# -> run download_data_local first with this eid to set up the local folder structure and one cache +eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" + +# folders +base_path = Path.home() / "ibl_scratch" +base_path.mkdir(exist_ok=True) +nwbfiles_folder_path = base_path / "nwbfiles" +nwbfiles_folder_path.mkdir(exist_ok=True) + +# Initialize IBL (ONE) client to download processed data for this session +one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" +one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + silent=True, + cache_dir=one_cache_folder_path, +) + +data_interfaces = [] + +# %% ephys +session_folder = one.eid2path(eid) +spikeglx_source_folder_path = session_folder / "raw_ephys_data" + +# Specify the path to the SpikeGLX files on the server but use ONE API for timestamps +spikeglx_subconverter = IblSpikeGlxConverter(folder_path=spikeglx_source_folder_path, one=one, eid=eid) +data_interfaces.append(spikeglx_subconverter) + +# %% video +# Raw video takes some special handling +metadata_retrieval = BrainwideMapConverter(one=one, session=eid, data_interfaces=[], verbose=False) +subject_id = metadata_retrieval.get_metadata()["Subject"]["subject_id"] + +pose_estimation_files = one.list_datasets(eid=eid, filename="*.dlc*") +for pose_estimation_file in pose_estimation_files: + camera_name = pose_estimation_file.replace("alf/_ibl_", "").replace(".dlc.pqt", "") + + video_interface = RawVideoInterface( + nwbfiles_folder_path=nwbfiles_folder_path, + subject_id=subject_id, + one=one, + session=eid, + camera_name=camera_name, + ) + data_interfaces.append(video_interface) + +# Run conversion +session_converter = BrainwideMapConverter(one=one, session=eid, data_interfaces=data_interfaces, verbose=False) + +metadata = session_converter.get_metadata() +subject_id = metadata["Subject"]["subject_id"] + +subject_folder_path = nwbfiles_folder_path / f"sub-{subject_id}" +subject_folder_path.mkdir(exist_ok=True) +nwbfile_path = subject_folder_path / f"sub-{subject_id}_ses-{eid}_desc-raw_ecephys+raw_video_.nwb" + +session_converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True) + +# TODO: add some kind of raw-specific check +# check_written_nwbfile_for_consistency(one=one, nwbfile_path=nwbfile_path) diff --git a/src/ibl_to_nwb/_scripts/download_data.py b/src/ibl_to_nwb/_scripts/download_data.py new file mode 100644 index 0000000..93fd184 --- /dev/null +++ b/src/ibl_to_nwb/_scripts/download_data.py @@ -0,0 +1,45 @@ +# %% +from pathlib import Path + +from one.api import ONE + +# %% +eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" # a BWM eid with dual probe + +base_path = Path.home() / "ibl_scratch" # local directory + +# Download behavior and spike sorted data for this eid +session_path = base_path / "ibl_conversion" / eid +cache_folder = base_path / "ibl_conversion" / eid / "cache" +session_one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + silent=False, + cache_dir=cache_folder, +) + +# %% latest revision +revisions = session_one.list_revisions(eid) +revision = revisions[-1] + +# %% list all datasets +datasets = session_one.list_datasets(eid) + +# %% list all collections +collections = session_one.list_collections(eid) + +# %% +for dataset in datasets: + session_one.load_dataset(eid, dataset, download_only=True) + +# %% downloads all raw ephys data! +collections = session_one.list_collections(eid, collection="raw_ephys_data/*") +for collection in collections: + datasets = session_one.list_datasets(eid, collection=collection) + for dataset in datasets: + session_one.load_dataset(eid, dataset, download_only=True) + +# %% just the video data +datasets = session_one.list_datasets(eid, collection="raw_video_data") +for dataset in datasets: + session_one.load_dataset(eid, dataset, download_only=True) diff --git a/src/ibl_to_nwb/_scripts/post_conversion_check.py b/src/ibl_to_nwb/_scripts/post_conversion_check.py new file mode 100644 index 0000000..bcdad28 --- /dev/null +++ b/src/ibl_to_nwb/_scripts/post_conversion_check.py @@ -0,0 +1,38 @@ +from pathlib import Path + +from one.api import ONE + +from ibl_to_nwb.testing._consistency_checks import check_nwbfile_for_consistency + +nwbfile_path = "" + +# eid = sys.argv[1] +eid = "caa5dddc-9290-4e27-9f5e-575ba3598614" + +# path setup +base_path = Path.home() / "ibl_scratch" +output_folder = base_path / "nwbfiles" +output_folder.mkdir(exist_ok=True, parents=True) + +# Initialize IBL (ONE) client to download processed data for this session +one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" +one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + # mode="local", + mode="remote", + # silent=True, + cache_dir=one_cache_folder_path, +) + +subject_id = one.eid2ref(eid)["subject"] + +subject_folder_path = output_folder / f"sub-{subject_id}" +subject_folder_path.mkdir(exist_ok=True) +# if raw: +# fname = f"sub-{subject_id}_ses-{eid}_desc-raw.nwb" +# else: +fname = f"sub-{subject_id}_ses-{eid}_desc-processed.nwb" + +nwbfile_path = subject_folder_path / fname +check_nwbfile_for_consistency(one=one, nwbfile_path=nwbfile_path) diff --git a/src/ibl_to_nwb/_scripts/post_conversion_check_nwbfile.py b/src/ibl_to_nwb/_scripts/post_conversion_check_nwbfile.py new file mode 100644 index 0000000..c923f46 --- /dev/null +++ b/src/ibl_to_nwb/_scripts/post_conversion_check_nwbfile.py @@ -0,0 +1,33 @@ +# %% +from pathlib import Path + +from one.api import ONE +from pynwb import NWBHDF5IO + +from ibl_to_nwb.testing._consistency_checks import check_nwbfile_for_consistency + +# path setup +nwbfile_path = Path( + "/home/georg/ibl_scratch/nwbfiles/sub-NR_0031/sub-NR_0031_ses-caa5dddc-9290-4e27-9f5e-575ba3598614_desc-processed-debug.nwb" +) +nwbfile = NWBHDF5IO.read_nwb(nwbfile_path) + +eid, revision = nwbfile.session_id.split(":") # this is the hack that has to be removed eventually + +# path setup +base_path = Path.home() / "ibl_scratch" +output_folder = base_path / "nwbfiles" +output_folder.mkdir(exist_ok=True, parents=True) + +# %% +# Initialize IBL (ONE) client to download processed data for this session +one_cache_folder_path = base_path / "ibl_conversion" / eid / "cache" +one = ONE( + base_url="https://openalyx.internationalbrainlab.org", + password="international", + mode="remote", + cache_dir=one_cache_folder_path, +) + +check_nwbfile_for_consistency(one=one, nwbfile_path=nwbfile_path) +# %% diff --git a/src/ibl_to_nwb/converters/_brainwide_map_converter.py b/src/ibl_to_nwb/converters/_brainwide_map_converter.py index a0aa4ca..a212a66 100644 --- a/src/ibl_to_nwb/converters/_brainwide_map_converter.py +++ b/src/ibl_to_nwb/converters/_brainwide_map_converter.py @@ -2,7 +2,7 @@ from neuroconv.utils import dict_deep_update, load_dict_from_file -from src.ibl_to_nwb.converters._iblconverter import IblConverter +from ibl_to_nwb.converters._iblconverter import IblConverter class BrainwideMapConverter(IblConverter): diff --git a/src/ibl_to_nwb/converters/_ibl_spikeglx_converter.py b/src/ibl_to_nwb/converters/_ibl_spikeglx_converter.py index 3f9cc9a..8e63c7f 100644 --- a/src/ibl_to_nwb/converters/_ibl_spikeglx_converter.py +++ b/src/ibl_to_nwb/converters/_ibl_spikeglx_converter.py @@ -1,3 +1,5 @@ +import numpy as np +from brainbox.io.one import EphysSessionLoader, SpikeSortingLoader from neuroconv.converters import SpikeGLXConverterPipe from one.api import ONE from pydantic import DirectoryPath @@ -5,36 +7,30 @@ class IblSpikeGlxConverter(SpikeGLXConverterPipe): - - def __init__(self, folder_path: DirectoryPath, one: ONE) -> None: + def __init__(self, folder_path: DirectoryPath, one: ONE, eid: str) -> None: super().__init__(folder_path=folder_path) self.one = one + self.eid = eid def temporally_align_data_interfaces(self) -> None: """Align the raw data timestamps to the other data streams using the ONE API.""" # This is the syntax for aligning the raw timestamps; I cannot test this without the actual data as stored # on your end, so please work with Heberto if there are any problems after uncommenting - # probe_to_imec_map = { - # "probe00": 0, - # "probe01": 1, - # } - # - # ephys_session_loader = EphysSessionLoader(one=self.one, eid=session_id) - # probes = ephys_session_loader.probes - # for probe_name, pid in ephys_session_loader.probes.items(): - # spike_sorting_loader = SpikeSortingLoader(pid=pid, one=ibl_client) - # - # probe_index = probe_to_imec_map[probe_name] - # for band in ["ap", "lf"]: - # recording_interface = next( - # interface - # for interface in self.data_interface_objects - # if f"imec{probe_index}.{band}" in interface.source_data["file_path"] - # ) - # - # band_info = spike_sorting_loader.raw_electrophysiology(band=band, stream=True) - # aligned_timestamps = spike_sorting_loader.samples2times(numpy.arange(0, band_info.ns), direction='forward') - # recording_interface.set_aligned_timestamps(aligned_timestamps=aligned_timestamps) + probe_to_imec_map = { + "probe00": 0, + "probe01": 1, + } + + ephys_session_loader = EphysSessionLoader(one=self.one, eid=self.eid) + for probe_name, pid in ephys_session_loader.probes.items(): + spike_sorting_loader = SpikeSortingLoader(pid=pid, one=self.one) + + probe_index = probe_to_imec_map[probe_name] + for band in ["ap", "lf"]: + recording_interface = self.data_interface_objects[f"imec{probe_index}.{band}"] + sl = spike_sorting_loader.raw_electrophysiology(band=band, stream=True) + aligned_timestamps = spike_sorting_loader.samples2times(np.arange(0, sl.ns), direction="forward") + recording_interface.set_aligned_timestamps(aligned_timestamps=aligned_timestamps) pass def add_to_nwbfile(self, nwbfile: NWBFile, metadata) -> None: diff --git a/src/ibl_to_nwb/datainterfaces/_brainwide_map_trials.py b/src/ibl_to_nwb/datainterfaces/_brainwide_map_trials.py index 723b7d5..ebd3b09 100644 --- a/src/ibl_to_nwb/datainterfaces/_brainwide_map_trials.py +++ b/src/ibl_to_nwb/datainterfaces/_brainwide_map_trials.py @@ -1,5 +1,7 @@ from pathlib import Path +from typing import Optional +from brainbox.io.one import SessionLoader from hdmf.common import VectorData from neuroconv.basedatainterface import BaseDataInterface from neuroconv.utils import load_dict_from_file @@ -9,9 +11,10 @@ class BrainwideMapTrialsInterface(BaseDataInterface): - def __init__(self, one: ONE, session: str): + def __init__(self, one: ONE, session: str, revision: Optional[str] = None): self.one = one self.session = session + self.revision = one.list_revisions(session)[-1] if revision is None else revision def get_metadata(self) -> dict: metadata = super().get_metadata() @@ -20,7 +23,9 @@ def get_metadata(self) -> dict: return metadata def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - trials = self.one.load_object(id=self.session, obj="trials", collection="alf") + session_loader = SessionLoader(one=self.one, eid=self.session, revision=self.revision) + session_loader.load_trials() + trials = session_loader.trials column_ordering = [ "choice", @@ -40,12 +45,12 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): VectorData( name="start_time", description="The beginning of the trial.", - data=trials["intervals"][:, 0], + data=trials["intervals_0"].values, ), VectorData( name="stop_time", description="The end of the trial.", - data=trials["intervals"][:, 1], + data=trials["intervals_1"].values, ), ] for ibl_key in column_ordering: @@ -53,7 +58,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): VectorData( name=metadata["Trials"][ibl_key]["name"], description=metadata["Trials"][ibl_key]["description"], - data=trials[ibl_key], + data=trials[ibl_key].values, ) ) nwbfile.add_time_intervals( diff --git a/src/ibl_to_nwb/datainterfaces/_ibl_sorting_extractor.py b/src/ibl_to_nwb/datainterfaces/_ibl_sorting_extractor.py index 38cbc7e..2f9749c 100644 --- a/src/ibl_to_nwb/datainterfaces/_ibl_sorting_extractor.py +++ b/src/ibl_to_nwb/datainterfaces/_ibl_sorting_extractor.py @@ -5,7 +5,11 @@ import numpy as np import pandas as pd -from pydantic import DirectoryPath +from brainbox.io.one import SpikeSortingLoader +from iblatlas.atlas import AllenAtlas +from iblatlas.regions import BrainRegions +from neuroconv.utils import get_json_schema_from_method_signature +from one.api import ONE from spikeinterface import BaseSorting, BaseSortingSegment @@ -16,18 +20,27 @@ class IblSortingExtractor(BaseSorting): installation_mesg = "" name = "iblsorting" - def __init__(self, session: str, cache_folder: Optional[DirectoryPath] = None): - from brainbox.io.one import SpikeSortingLoader - from iblatlas.atlas import AllenAtlas - from iblatlas.regions import BrainRegions - from one.api import ONE - - one = ONE( - base_url="https://openalyx.internationalbrainlab.org", - password="international", - silent=True, - cache_dir=cache_folder, - ) + def get_source_schema(cls) -> dict: + """ + Infer the JSON schema for the source_data from the method signature (annotation typing). + + Returns + ------- + dict + The JSON schema for the source_data. + """ + return get_json_schema_from_method_signature(cls, exclude=["source_data", "one"]) + + # def __init__(self, session: str, cache_folder: Optional[DirectoryPath] = None, revision: Optional[str] = None): + def __init__( + self, + one: ONE, + session: str, + revision: Optional[str] = None, + ): + if revision is None: # latest + revision = one.list_revisions(session)[-1] + atlas = AllenAtlas() brain_regions = BrainRegions() @@ -45,7 +58,7 @@ def __init__(self, session: str, cache_folder: Optional[DirectoryPath] = None): for probe_name in probe_names: sorting_loader = SpikeSortingLoader(eid=session, one=one, pname=probe_name, atlas=atlas) sorting_loaders.update({probe_name: sorting_loader}) - spikes, clusters, channels = sorting_loader.load_spike_sorting() + spikes, clusters, channels = sorting_loader.load_spike_sorting(revision=revision) # cluster_ids.extend(list(np.array(clusters["metrics"]["cluster_id"]) + unit_id_per_probe_shift)) number_of_units = len(np.unique(spikes["clusters"])) cluster_ids.extend(list(np.arange(number_of_units).astype("int32") + unit_id_per_probe_shift)) diff --git a/src/ibl_to_nwb/datainterfaces/_ibl_sorting_interface.py b/src/ibl_to_nwb/datainterfaces/_ibl_sorting_interface.py index 18c478f..296b5b0 100644 --- a/src/ibl_to_nwb/datainterfaces/_ibl_sorting_interface.py +++ b/src/ibl_to_nwb/datainterfaces/_ibl_sorting_interface.py @@ -1,11 +1,13 @@ """The interface for loading spike sorted data via ONE access.""" from pathlib import Path +from typing import Optional from neuroconv.datainterfaces.ecephys.basesortingextractorinterface import ( BaseSortingExtractorInterface, ) from neuroconv.utils import load_dict_from_file +from one.api import ONE from ._ibl_sorting_extractor import IblSortingExtractor @@ -13,6 +15,14 @@ class IblSortingInterface(BaseSortingExtractorInterface): Extractor = IblSortingExtractor + def __init__( + self, + session: str, + one: ONE, + revision: Optional[str] = None, + ): + super().__init__(session=session, one=one, revision=revision) + def get_metadata(self) -> dict: metadata = super().get_metadata() diff --git a/src/ibl_to_nwb/datainterfaces/_ibl_streaming_interface.py b/src/ibl_to_nwb/datainterfaces/_ibl_streaming_interface.py index f8aac0b..5064633 100644 --- a/src/ibl_to_nwb/datainterfaces/_ibl_streaming_interface.py +++ b/src/ibl_to_nwb/datainterfaces/_ibl_streaming_interface.py @@ -81,7 +81,8 @@ def __init__(self, **kwargs): self.recording_extractor.set_property(key="ibl_y", values=ibl_coords[:, 1]) self.recording_extractor.set_property(key="ibl_z", values=ibl_coords[:, 2]) self.recording_extractor.set_property( # SpikeInterface refers to this as 'brain_area' - key="brain_area", values=list(channels["acronym"]) # NeuroConv remaps to 'location', a required field + key="brain_area", + values=list(channels["acronym"]), # NeuroConv remaps to 'location', a required field ) # Acronyms are symmetric, do not differentiate hemisphere self.recording_extractor.set_property( key="beryl_location", diff --git a/src/ibl_to_nwb/datainterfaces/_lick_times.py b/src/ibl_to_nwb/datainterfaces/_lick_times.py index 76f9a9e..e38e310 100644 --- a/src/ibl_to_nwb/datainterfaces/_lick_times.py +++ b/src/ibl_to_nwb/datainterfaces/_lick_times.py @@ -1,3 +1,5 @@ +from typing import Optional + from hdmf.common import VectorData from neuroconv.basedatainterface import BaseDataInterface from neuroconv.tools.nwb_helpers import get_module @@ -7,12 +9,14 @@ class LickInterface(BaseDataInterface): - def __init__(self, one: ONE, session: str): + def __init__(self, one: ONE, session: str, revision: Optional[str] = None): self.one = one self.session = session + self.revision = one.list_revisions(session)[-1] if revision is None else revision def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): - licks = self.one.load_object(id=self.session, obj="licks", collection="alf") + # licks = self.one.load_object(id=self.session, obj="licks", collection="alf") + licks = self.one.load_dataset(self.session, "licks.times", collection="alf", revision=self.revision) lick_events_table = DynamicTable( name="LickTimes", @@ -24,7 +28,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict): VectorData( name="lick_time", description="Time stamps of licks as detected from tongue dlc traces", - data=licks["times"], + data=licks, ) ], ) diff --git a/src/ibl_to_nwb/datainterfaces/_pose_estimation.py b/src/ibl_to_nwb/datainterfaces/_pose_estimation.py index abf30d3..4686199 100644 --- a/src/ibl_to_nwb/datainterfaces/_pose_estimation.py +++ b/src/ibl_to_nwb/datainterfaces/_pose_estimation.py @@ -1,4 +1,3 @@ -from datetime import datetime from typing import Optional import numpy as np @@ -37,17 +36,7 @@ def __init__( self.revision = revision if self.revision is None: - session_files = self.one.list_datasets(eid=self.session, filename=f"*{self.camera_name}.dlc*") - revision_datetime_format = "%Y-%m-%d" - revisions = [ - datetime.strptime(session_file.split("#")[1], revision_datetime_format) - for session_file in session_files - if "#" in session_file - ] - - if any(revisions): - most_recent = max(revisions) - self.revision = most_recent.strftime("%Y-%m-%d") + self.revision = one.list_revisions(session)[-1] def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: camera_data = self.one.load_object( diff --git a/src/ibl_to_nwb/datainterfaces/_pupil_tracking.py b/src/ibl_to_nwb/datainterfaces/_pupil_tracking.py index c307ef6..c0c4972 100644 --- a/src/ibl_to_nwb/datainterfaces/_pupil_tracking.py +++ b/src/ibl_to_nwb/datainterfaces/_pupil_tracking.py @@ -1,6 +1,7 @@ """Data Interface for the pupil tracking.""" from pathlib import Path +from typing import Optional import numpy as np from neuroconv.basedatainterface import BaseDataInterface @@ -12,10 +13,11 @@ class PupilTrackingInterface(BaseDataInterface): - def __init__(self, one: ONE, session: str, camera_name: str): + def __init__(self, one: ONE, session: str, camera_name: str, revision: Optional[str] = None): self.one = one self.session = session self.camera_name = camera_name + self.revision = one.list_revisions(session)[-1] if revision is None else revision def get_metadata(self) -> dict: metadata = super().get_metadata() @@ -28,7 +30,9 @@ def get_metadata(self) -> dict: def add_to_nwbfile(self, nwbfile, metadata: dict): left_or_right = self.camera_name[:5].rstrip("C") - camera_data = self.one.load_object(id=self.session, obj=self.camera_name, collection="alf") + camera_data = self.one.load_object( + id=self.session, obj=self.camera_name, collection="alf", revision=self.revision + ) pupil_time_series = list() for ibl_key in ["pupilDiameter_raw", "pupilDiameter_smooth"]: diff --git a/src/ibl_to_nwb/datainterfaces/_roi_motion_energy.py b/src/ibl_to_nwb/datainterfaces/_roi_motion_energy.py index 4218647..13c5c22 100644 --- a/src/ibl_to_nwb/datainterfaces/_roi_motion_energy.py +++ b/src/ibl_to_nwb/datainterfaces/_roi_motion_energy.py @@ -1,5 +1,7 @@ """Data Interface for the special data type of ROI Motion Energy.""" +from typing import Optional + from neuroconv.basedatainterface import BaseDataInterface from neuroconv.tools.nwb_helpers import get_module from one.api import ONE @@ -7,15 +9,18 @@ class RoiMotionEnergyInterface(BaseDataInterface): - def __init__(self, one: ONE, session: str, camera_name: str): + def __init__(self, one: ONE, session: str, camera_name: str, revision: Optional[str] = None): self.one = one self.session = session self.camera_name = camera_name + self.revision = one.list_revisions(session)[-1] if revision is None else revision def add_to_nwbfile(self, nwbfile, metadata: dict): left_right_or_body = self.camera_name[:5].rstrip("C") - camera_data = self.one.load_object(id=self.session, obj=self.camera_name, collection="alf") + camera_data = self.one.load_object( + id=self.session, obj=self.camera_name, collection="alf", revision=self.revision + ) motion_energy_video_region = self.one.load_object( id=self.session, obj=f"{left_right_or_body}ROIMotionEnergy", collection="alf" ) diff --git a/src/ibl_to_nwb/datainterfaces/_wheel_movement.py b/src/ibl_to_nwb/datainterfaces/_wheel_movement.py index 234b307..cefa455 100644 --- a/src/ibl_to_nwb/datainterfaces/_wheel_movement.py +++ b/src/ibl_to_nwb/datainterfaces/_wheel_movement.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional from brainbox.behavior import wheel as wheel_methods from neuroconv.basedatainterface import BaseDataInterface @@ -11,9 +12,10 @@ class WheelInterface(BaseDataInterface): - def __init__(self, one: ONE, session: str): + def __init__(self, one: ONE, session: str, revision: Optional[str] = None): self.one = one self.session = session + self.revision = one.list_revisions(session) if revision is None else revision def get_metadata(self) -> dict: metadata = super().get_metadata() @@ -23,8 +25,8 @@ def get_metadata(self) -> dict: return metadata def add_to_nwbfile(self, nwbfile, metadata: dict): - wheel_moves = self.one.load_object(id=self.session, obj="wheelMoves", collection="alf") - wheel = self.one.load_object(id=self.session, obj="wheel", collection="alf") + wheel_moves = self.one.load_object(id=self.session, obj="wheelMoves", collection="alf", revision=self.revision) + wheel = self.one.load_object(id=self.session, obj="wheel", collection="alf", revision=self.revision) # Estimate velocity and acceleration interpolation_frequency = 1000.0 # Hz @@ -57,7 +59,7 @@ def add_to_nwbfile(self, nwbfile, metadata: dict): description=metadata["WheelPosition"]["description"], data=wheel["position"], timestamps=wheel["timestamps"], - unit="rad", + unit="radians", reference_frame="Initial angle at start time is zero. Counter-clockwise is positive.", ) ) diff --git a/src/ibl_to_nwb/testing/__init__.py b/src/ibl_to_nwb/testing/__init__.py index 516db43..4b7dc3f 100644 --- a/src/ibl_to_nwb/testing/__init__.py +++ b/src/ibl_to_nwb/testing/__init__.py @@ -1 +1 @@ -from ._consistency_checks import check_written_nwbfile_for_consistency +from ._consistency_checks import check_nwbfile_for_consistency, check_raw_nwbfile_for_consistency diff --git a/src/ibl_to_nwb/testing/_consistency_checks.py b/src/ibl_to_nwb/testing/_consistency_checks.py index ec5484a..247fd4f 100644 --- a/src/ibl_to_nwb/testing/_consistency_checks.py +++ b/src/ibl_to_nwb/testing/_consistency_checks.py @@ -1,32 +1,39 @@ from pathlib import Path import numpy as np +from brainbox.io.one import SessionLoader, SpikeSortingLoader from numpy.testing import assert_array_equal, assert_array_less from one.api import ONE from pandas.testing import assert_frame_equal from pynwb import NWBHDF5IO, NWBFile -def check_written_nwbfile_for_consistency(*, one: ONE, nwbfile_path: Path): - """ - Check the processed-only NWB file for consistency with the equivalent calls to the ONE API. +def check_nwbfile_for_consistency(*, one: ONE, nwbfile_path: Path): + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + + # run all consistentcy checks + _check_wheel_data(nwbfile=nwbfile, one=one) + _check_lick_data(nwbfile=nwbfile, one=one) + _check_roi_motion_energy_data(nwbfile=nwbfile, one=one) + _check_pose_estimation_data(nwbfile=nwbfile, one=one) + _check_trials_data(nwbfile=nwbfile, one=one) + _check_pupil_tracking_data(nwbfile=nwbfile, one=one) + _check_spike_sorting_data(nwbfile=nwbfile, one=one) - Parameters - ---------- - one : ONE - Initialized ONE client. - nwbfile_path : Path - Path to the NWB file. - """ + +def check_raw_nwbfile_for_consistency(*, one: ONE, nwbfile_path: Path): with NWBHDF5IO(path=nwbfile_path, mode="r") as io: nwbfile = io.read() - eid = nwbfile.session_id + eid, revision = nwbfile.session_id.split(":") - _check_wheel_data(eid=eid, nwbfile=nwbfile, one=one) - # TODO: fill in the rest of the routed calls + # run checks for raw files + _check_raw_ephys_data(eid=eid, one=one, nwbfile=nwbfile) + _check_raw_video_data(eid=eid, one=one, nwbfile=nwbfile, nwbfile_path=nwbfile_path) -def _check_wheel_data(*, eid: str, one: ONE, nwbfile: NWBFile, revision: str = None): +def _check_wheel_data(*, one: ONE, nwbfile: NWBFile): + eid, revision = nwbfile.session_id.split(":") processing_module = nwbfile.processing["wheel"] wheel_position_series = processing_module.data_interfaces["CompassDirection"].spatial_series["WheelPositionSeries"] wheel_movement_table = processing_module.data_interfaces["WheelMovementIntervals"][:] @@ -52,17 +59,20 @@ def _check_wheel_data(*, eid: str, one: ONE, nwbfile: NWBFile, revision: str = N assert_array_equal(x=data_from_ONE, y=data_from_NWB) -def _check_lick_data(*, eid: str, one: ONE, nwbfile: NWBFile): +def _check_lick_data(*, one: ONE, nwbfile: NWBFile): + eid, revision = nwbfile.session_id.split(":") + processing_module = nwbfile.processing["camera"] lick_times_table = processing_module.data_interfaces["LickTimes"][:] data_from_NWB = lick_times_table["lick_time"].values - data_from_ONE = one.load_dataset(eid, "licks.times") + data_from_ONE = one.load_dataset(eid, "licks.times", revision=revision) assert_array_equal(x=data_from_ONE, y=data_from_NWB) -def _check_roi_motion_energy_data(*, eid: str, one: ONE, nwbfile: NWBFile): +def _check_roi_motion_energy_data(*, one: ONE, nwbfile: NWBFile): processing_module = nwbfile.processing["camera"] + eid, revision = nwbfile.session_id.split(":") camera_views = ["body", "left", "right"] for view in camera_views: @@ -70,17 +80,18 @@ def _check_roi_motion_energy_data(*, eid: str, one: ONE, nwbfile: NWBFile): # data data_from_NWB = camera_motion_energy.data[:] - data_from_ONE = one.load_dataset(eid, f"{view}Camera.ROIMotionEnergy", collection="alf") + data_from_ONE = one.load_dataset(eid, f"{view}Camera.ROIMotionEnergy", collection="alf", revision=revision) assert_array_equal(x=data_from_ONE, y=data_from_NWB) # timestamps data_from_NWB = camera_motion_energy.timestamps[:] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.times", collection="alf") + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.times", collection="alf", revision=revision) assert_array_equal(x=data_from_ONE, y=data_from_NWB) -def _check_pose_estimation_data(*, eid: str, one: ONE, nwbfile: NWBFile, revision: str = None): +def _check_pose_estimation_data(*, one: ONE, nwbfile: NWBFile): processing_module = nwbfile.processing["camera"] + eid, revision = nwbfile.session_id.split(":") camera_views = ["body", "left", "right"] for view in camera_views: @@ -90,32 +101,42 @@ def _check_pose_estimation_data(*, eid: str, one: ONE, nwbfile: NWBFile, revisio for node in nodes: # x data_from_NWB = pose_estimation_container.pose_estimation_series[node].data[:][:, 0] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf")[f"{node}_x"].values + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf", revision=revision)[ + f"{node}_x" + ].values assert_array_equal(x=data_from_ONE, y=data_from_NWB) # y data_from_NWB = pose_estimation_container.pose_estimation_series[node].data[:][:, 1] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf")[f"{node}_y"].values + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf", revision=revision)[ + f"{node}_y" + ].values assert_array_equal(x=data_from_ONE, y=data_from_NWB) # confidence data_from_NWB = pose_estimation_container.pose_estimation_series[node].confidence[:] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf")[ + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.dlc.pqt", collection="alf", revision=revision)[ f"{node}_likelihood" ].values assert_array_equal(x=data_from_ONE, y=data_from_NWB) # timestamps data_from_NWB = pose_estimation_container.pose_estimation_series[node].timestamps[:] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.times", collection="alf") + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.times", collection="alf", revision=revision) assert_array_equal(x=data_from_ONE, y=data_from_NWB) -def _check_trials_data(*, eid: str, one: ONE, nwbfile: NWBFile): - data_from_NWB = nwbfile.trials[:] - data_from_ONE = one.load_dataset(eid, "_ibl_trials.table", collection="alf") - data_from_ONE["stimOff_times"] = one.load_dataset(eid, "_ibl_trials.stimOff_times", collection="alf") - data_from_ONE.index.name = "id" +def _check_trials_data(*, one: ONE, nwbfile: NWBFile): + eid, revision = nwbfile.session_id.split(":") + + data_from_NWB = nwbfile.trials[:].reset_index(drop=True) + session_loader = SessionLoader(one=one, eid=eid, revision=revision) + session_loader.load_trials() + data_from_ONE = session_loader.trials.reset_index(drop=True) + + # data_from_ONE = one.load_dataset(eid, "_ibl_trials.table", collection="alf") + # data_from_ONE["stimOff_times"] = one.load_dataset(eid, "_ibl_trials.stimOff_times", collection="alf") + # data_from_ONE.index.name = "id" naming_map = { "start_time": "intervals_0", @@ -141,7 +162,9 @@ def _check_trials_data(*, eid: str, one: ONE, nwbfile: NWBFile): assert_frame_equal(left=data_from_NWB, right=data_from_ONE) -def _check_pupil_tracking_data(*, eid: str, one: ONE, nwbfile: NWBFile): +def _check_pupil_tracking_data(*, one: ONE, nwbfile: NWBFile): + eid, revision = nwbfile.session_id.split(":") + processing_module = nwbfile.processing["camera"] camera_views = ["left", "right"] @@ -150,27 +173,34 @@ def _check_pupil_tracking_data(*, eid: str, one: ONE, nwbfile: NWBFile): # raw data_from_NWB = pupil_tracking_container.time_series[f"{view.capitalize()}RawPupilDiameter"].data[:] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.features.pqt", collection="alf")[ + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.features.pqt", collection="alf", revision=revision)[ "pupilDiameter_raw" ].values assert_array_equal(x=data_from_ONE, y=data_from_NWB) # smooth data_from_NWB = pupil_tracking_container.time_series[f"{view.capitalize()}SmoothedPupilDiameter"].data[:] - data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.features.pqt", collection="alf")[ + data_from_ONE = one.load_dataset(eid, f"_ibl_{view}Camera.features.pqt", collection="alf", revision=revision)[ "pupilDiameter_smooth" ].values assert_array_equal(x=data_from_ONE, y=data_from_NWB) -def _check_spike_sorting_data(*, eid: str, one: ONE, nwbfile: NWBFile, revision: str = None): +def _check_spike_sorting_data(*, one: ONE, nwbfile: NWBFile): + eid, revision = nwbfile.session_id.split(":") + + pids, probe_names = one.eid2pid(eid) + pids = dict(zip(probe_names, pids)) + units_table = nwbfile.units[:] - probe_names = units_table["probe_name"].unique() + # probe_names = units_table["probe_name"].unique() - spike_times = {} - spike_clusters = {} - cluster_uuids = {} + # spike_times = {} + # spike_clusters = {} + # cluster_uuids = {} + spikes = {} + clusters = {} # for fast spike extraction def get_spikes_for_cluster(spike_clusters, spike_times, cluster): @@ -178,34 +208,115 @@ def get_spikes_for_cluster(spike_clusters, spike_times, cluster): start_ix, stop_ix = np.searchsorted(spike_clusters, [cluster, cluster + 1]) return np.sort(spike_times[start_ix:stop_ix]) - # get and prep data once + # get and prep data for probe_name in probe_names: - - # include revision TODO FIXME this will likely change - check back in with Miles - if revision is not None: - collection = f"alf/{probe_name}/pykilosort/{revision}" - else: - collection = f"alf/{probe_name}/pykilosort" - - spike_times[probe_name] = one.load_dataset(eid, "spikes.times", collection=collection) - spike_clusters[probe_name] = one.load_dataset(eid, "spikes.clusters", collection=collection) - cluster_uuids[probe_name] = one.load_dataset(eid, "clusters.uuids", collection=collection) + spike_sorting_loader = SpikeSortingLoader(pid=pids[probe_name], one=one) + spikes_, clusters_, _ = spike_sorting_loader.load_spike_sorting(revision=revision) + spikes[probe_name] = spikes_ + clusters[probe_name] = clusters_ # pre-sort for fast access - sort_ix = np.argsort(spike_clusters[probe_name]) - spike_clusters[probe_name] = spike_clusters[probe_name][sort_ix] - spike_times[probe_name] = spike_times[probe_name][sort_ix] + sort_ix = np.argsort(spikes[probe_name]["clusters"]) + spikes[probe_name]["times"] = spikes[probe_name]["times"][sort_ix] + spikes[probe_name]["clusters"] = spikes[probe_name]["clusters"][sort_ix] for ix in units_table.index: - probe_name = units_table.loc[ix, "probe_name"] - uuid = units_table.loc[ix, "uuid"] + probe_name, uuid = units_table.loc[ix, ["probe_name", "cluster_uuid"]] + assert uuid in clusters[probe_name]["uuids"].values spike_times_from_NWB = units_table.loc[ix, "spike_times"] - cluster_id = np.where(cluster_uuids[probe_name] == uuid)[0][0] - spike_times_from_ONE = get_spikes_for_cluster(spike_clusters[probe_name], spike_times[probe_name], cluster_id) + cluster_id = np.where(clusters[probe_name]["uuids"] == uuid)[0][0] + spikes[probe_name]["clusters"] + spike_times_from_ONE = get_spikes_for_cluster( + spikes[probe_name]["clusters"], spikes[probe_name]["times"], cluster_id + ) # more verbose but slower for more than ~20 checks # spike_times_from_ONE = spike_times[probe_name][spike_clusters[probe_name] == cluster_id] # testing assert_array_less(np.max((spike_times_from_ONE - spike_times_from_NWB) * 30000), 1) + + +def _check_raw_ephys_data(*, one: ONE, nwbfile: NWBFile, pname: str = None, band: str = "ap"): + eid, revision = nwbfile.session_id.split(":") + + # data_one + pids, pnames_one = one.eid2pid(eid) + pidname_map = dict(zip(pnames_one, pids)) + pid = pidname_map[pname] + spike_sorting_loader = SpikeSortingLoader(pid=pid, one=one) + sglx_streamer = spike_sorting_loader.raw_electrophysiology(band=band, stream=True) + data_one = sglx_streamer._raw + + pname_to_imec = { + "probe00": "Imec0", + "probe01": "Imec1", + } + imec_to_pname = dict(zip(pname_to_imec.values(), pname_to_imec.keys())) + imecs = [key.split(band.upper())[1] for key in list(nwbfile.acquisition.keys()) if band.upper() in key] + pnames_nwb = [imec_to_pname[imec] for imec in imecs] + + assert set(pnames_one) == set(pnames_nwb) + + # nwb ephys data + imec = pname_to_imec[pname] + data_nwb = nwbfile.acquisition[f"ElectricalSeries{band.upper()}{imec}"].data + + # compare number of samples in both + n_samples_one = data_one.shape[0] + n_samples_nwb = data_nwb.shape[0] + + assert n_samples_nwb == n_samples_one + + # draw a random set of samples and check if they are equal in value + n_samples, n_channels = data_nwb.shape + + ix = np.column_stack( + [ + np.random.randint(n_samples, size=10), + np.random.randint(n_channels, size=10), + ] + ) + + samples_nwb = np.array([data_nwb[*i] for i in ix]) + samples_one = np.array([data_one[*i] for i in ix]) + np.testing.assert_array_equal(samples_nwb, samples_one) + + # check the time stamps + nwb_timestamps = nwbfile.acquisition[f"ElectricalSeries{band.upper()}{imec}"].timestamps[:] + + # from brainbox.io + brainbox_timestamps = spike_sorting_loader.samples2times(np.arange(0, sglx_streamer.ns), direction="forward") + np.testing.assert_array_equal(nwb_timestamps, brainbox_timestamps) + + +def _check_raw_video_data(*, one: ONE, nwbfile: NWBFile, nwbfile_path: str): + eid, revision = nwbfile.session_id.split(":") + + # timestamps + datasets = one.list_datasets(eid, "*Camera.times*", collection="alf") + cameras = [key for key in nwbfile.acquisition.keys() if key.endswith("Camera")] + for camera in cameras: + timestamps_nwb = nwbfile.acquisition[camera].timestamps[:] + + dataset = [dataset for dataset in datasets if camera.split("OriginalVideo")[1].lower() in dataset.lower()] + timestamps_one = one.load_dataset(eid, dataset) + np.testing.assert_array_equal(timestamps_nwb, timestamps_one) + + # values (the first 100 bytes) + datasets = one.list_datasets(eid, collection="raw_video_data") + cameras = [key for key in nwbfile.acquisition.keys() if key.endswith("Camera")] + + for camera in cameras: + cam = camera.split("OriginalVideo")[1].lower() + dataset = [dataset for dataset in datasets if cam in dataset.lower()] + one_video_path = one.load_dataset(eid, dataset) + with open(one_video_path, "rb") as fH: + one_video_bytes = fH.read(100) + + nwb_video_path = nwbfile_path.parent / Path(nwbfile.acquisition[camera].external_file[:][0]) + with open(nwb_video_path, "rb") as fH: + nwb_video_bytes = fH.read(100) + + assert one_video_bytes == nwb_video_bytes