Skip to content

Commit

Permalink
Loading data from Big Data Viewer (#3)
Browse files Browse the repository at this point in the history
* Add create_pyramid_bdv_h5 function to create downsample pyramid

* Added function for parsing metadata from mesospim

* Added function to check the mesoSPIM directory

* Added function to extract slice attribute from big data viewer xml files

* Basic tile class

* Basic ImageMosaic class

* Formating in file_utils.py

* Added a data_for_napari method

* Added napari.yaml

* Basic widget with the ability to load big data view h5 files

* Updated pyproject.toml

* Added method for checking and loading mesospim directory

* Changed header text

* Deleted placeholder test file

* Added tests for tile.py

* Added tests for image_mosaic.py

* Added tests for stitching_widget.py

* Add qt setup for linux in test_and_deploy.yml

* Test only calling make_napari_viewer_proxy

* Remove all tests with make_napari_viewer_proxy

* Just the widget init test

* Copied pyproject.toml from working branch

* Added the entire test_stitching_widget.py file from the working branch

* Added tox.ini configs from refactor branch to the lgeacy_tox_ini in pyproject.toml

* Reverted back to tests relating to functionality of this PR

* Apply suggestions from code review

* Only add 1 to the shape if the dimension is of odd shape

* Added docstrings to the stitching widget

* Added comments for test_image_mosaic.py to describe file setup

* Add default_directory as attribute in stitching widget
  • Loading branch information
IgorTatarnikov authored May 7, 2024
1 parent 493594e commit 68f0d78
Show file tree
Hide file tree
Showing 13 changed files with 1,183 additions and 51 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,16 @@ jobs:
python-version: "3.10"

steps:
# these libraries enable testing on Qt on linux
- uses: pyvista/setup-headless-display-action@v2
with:
qt: true

# Run tests
- uses: neuroinformatics-unit/actions/test@v2
with:
python-version: ${{ matrix.python-version }}
secret-codecov-token: ${{ secrets.CODECOV_TOKEN }}

build_sdist_wheels:
name: Build source distribution
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include LICENSE
include README.md
include brainglobe_stitch/napari.yaml
exclude .pre-commit-config.yaml

recursive-include brainglobe_stitch *.py
Expand Down
240 changes: 192 additions & 48 deletions brainglobe_stitch/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, List, Tuple, Union

import h5py
import numpy as np
import numpy.typing as npt

HEADERS = [
"[POSITION]",
Expand All @@ -8,70 +14,135 @@
]


def write_big_stitcher_tile_config(meta_file_name: Path) -> list[dict]:
tile_metadata = parse_mesospim_metadata(meta_file_name)

output_file = str(meta_file_name)[:-12] + "_tile_config.txt"

first_channel = tile_metadata[0]["Laser"]
num_channels = 1

for metadata in tile_metadata[1:]:
if metadata["Laser"] == first_channel:
break
else:
num_channels += 1

num_tiles = len(tile_metadata) // num_channels
tile_xy_locations = []

for i in range(0, len(tile_metadata), num_channels):
curr_tile_dict = tile_metadata[i]

x = round(curr_tile_dict["x_pos"] / curr_tile_dict["Pixelsize in um"])
y = round(curr_tile_dict["y_pos"] / curr_tile_dict["Pixelsize in um"])

tile_xy_locations.append((x, y))

relative_locations = [(0, 0)]

for abs_tuple in tile_xy_locations[1:]:
rel_tuple = (
abs(abs_tuple[0] - tile_xy_locations[0][0]),
abs(abs_tuple[1] - tile_xy_locations[0][1]),
)
relative_locations.append(rel_tuple)

with open(output_file, "w") as f:
f.write("dim=3\n")
for i in range(len(tile_metadata)):
f.write(
f"{i};;"
f"({relative_locations[i%num_tiles][0]},"
f"{relative_locations[i%num_tiles][1]},0)\n"
)

return tile_metadata


def parse_mesospim_metadata(meta_file_name: Path):
def create_pyramid_bdv_h5(
input_file: Path,
yield_progress: bool = False,
resolutions_array: npt.NDArray = np.array(
[[1, 1, 1], [2, 2, 1], [4, 4, 1], [8, 8, 1], [16, 16, 1]]
),
subdivisions_array: npt.NDArray = np.array(
[[32, 32, 16], [32, 32, 16], [32, 32, 16], [32, 32, 16], [32, 32, 16]]
),
):
"""
Create a resolution pyramid for a Big Data Viewer HDF5 file. The function
assumes no pyramid exists and creates a new one in place. By default,
the function creates a 5 level pyramid with downsampling factors of 1, 2,
4, 8, and 16 in x and y, with no downsampling in z. Deletes the old
resolutions and subdivisions datasets and creates new ones.
Parameters
----------
input_file: Path
The path to the input HDF5 file.
yield_progress: bool, optional
Whether to yield progress. If True, the function will yield the
progress as a percentage.
resolutions_array: npt.NDArray, optional
The downsampling factors to use for each resolution level.
This is a 2D array where each row represents a resolution level and the
columns represent the downsampling factors for x, y, and z.
subdivisions_array: npt.NDArray, optional
The size of the blocks at each resolution level.
This is a 2D array where each row represents a resolution level and the
columns represent the size of the blocks for x, y, and z.
"""
with h5py.File(input_file, "r+") as f:
data_group = f["t00000"]
num_done = 0
num_slices = len(data_group)

for curr_slice in data_group:
# Delete the old resolutions and subdivisions datasets
del f[f"{curr_slice}/resolutions"]
del f[f"{curr_slice}/subdivisions"]
f[f"{curr_slice}/resolutions"] = resolutions_array
f[f"{curr_slice}/subdivisions"] = subdivisions_array

grp: h5py.Group = f[f"t00000/{curr_slice}"]
for i in range(1, resolutions_array.shape[0]):
downsampling_factors = (
resolutions_array[i] // resolutions_array[i - 1]
)
prev_resolution = grp[f"{i - 1}/cells"]
# Add 1 to account for odd dimensions

grp.require_dataset(
f"{i}/cells",
dtype=prev_resolution.dtype,
# Data is stored in z,y,x, but the downsampling
# factors are in x,y,z, so need to reverse
# Adding 1 allows to account for dimensions of odd size,
# Only add 1 if the downsampling factor is greater than 1
shape=(
(
prev_resolution.shape[0]
+ (downsampling_factors[2] > 1)
)
// downsampling_factors[2],
(
prev_resolution.shape[1]
+ (downsampling_factors[1] > 1)
)
// downsampling_factors[1],
(
prev_resolution.shape[2]
+ (downsampling_factors[0] > 1)
)
// downsampling_factors[0],
),
)
grp[f"{i}/cells"][...] = prev_resolution[
:: downsampling_factors[2],
:: downsampling_factors[1],
:: downsampling_factors[0],
]

num_done += 1

if yield_progress:
yield int(100 * num_done / num_slices)


def parse_mesospim_metadata(
meta_file_name: Path,
) -> List[Dict]:
"""
Parse the metadata from a mesoSPIM .h5_meta.txt file.
Parameters
----------
meta_file_name: Path
The path to the h5_meta.txt file.
Returns
-------
List[Dict]
A list of dictionaries containing the metadata for each tile.
"""
tile_metadata = []
with open(meta_file_name, "r") as f:
lines = f.readlines()
curr_tile_metadata: dict[str, str | int | float] = {}
curr_tile_metadata: Dict[str, Union[str, int, float]] = {}

for line in lines[3:]:
line = line.strip()
# Tile metadata is separated by a line starting with [CFG]
if line.startswith("[CFG"):
tile_metadata.append(curr_tile_metadata)
curr_tile_metadata = {}
# Skip the headers
elif line in HEADERS:
continue
# Skip empty lines
elif not line:
continue
else:
split_line = line.split("]")
value = split_line[1].strip()
# Check if the value is an int or a float
# If it is neither, store it as a string
if value.isdigit():
curr_tile_metadata[split_line[0][1:]] = int(value)
else:
Expand All @@ -82,3 +153,76 @@ def parse_mesospim_metadata(meta_file_name: Path):

tile_metadata.append(curr_tile_metadata)
return tile_metadata


def check_mesospim_directory(
mesospim_directory: Path,
) -> Tuple[Path, Path, Path]:
"""
Check that the mesoSPIM directory contains the expected files.
Parameters
----------
mesospim_directory: Path
The path to the mesoSPIM directory.
Returns
-------
Tuple[Path, Path, Path]
The paths to the bdv.xml, h5_meta.txt, and bdv.h5 files.
"""
# List all files in the directory that do not start with a period
# But end in the correct file extension
xml_path = list(mesospim_directory.glob("[!.]*bdv.xml"))
meta_path = list(mesospim_directory.glob("[!.]*h5_meta.txt"))
h5_path = list(mesospim_directory.glob("[!.]*bdv.h5"))

# Check that there is exactly one file of each type
if len(xml_path) != 1:
raise FileNotFoundError(
f"Expected 1 bdv.xml file, found {len(xml_path)}"
)

if len(meta_path) != 1:
raise FileNotFoundError(
f"Expected 1 h5_meta.txt file, found {len(meta_path)}"
)

if len(h5_path) != 1:
raise FileNotFoundError(f"Expected 1 h5 file, found {len(h5_path)}")

return xml_path[0], meta_path[0], h5_path[0]


def get_slice_attributes(
xml_path: Path, tile_names: List[str]
) -> Dict[str, Dict]:
"""
Get the slice attributes from a Big Data Viewer XML file. Attributes
include the illumination id, channel id, and tile id, and angle id.
Parameters
----------
xml_path: Path
The path to the XML file.
tile_names: List[str]
The names of the tiles.
Returns
-------
Dict[str, Dict]
A dictionary containing the slice attributes for each tile.
"""
tree = ET.parse(xml_path)
root = tree.getroot()
view_setups = root.findall(".//ViewSetup//attributes")

slice_attributes = {}
for child, name in zip(view_setups, tile_names):
sub_dict = {}
for sub_child in child:
sub_dict[sub_child.tag] = sub_child.text

slice_attributes[name] = sub_dict

return slice_attributes
Loading

0 comments on commit 68f0d78

Please sign in to comment.